[Mesa-dev] [PATCH v2 2/5] i965: perf: snapshot RPSTAT register

Kenneth Graunke kenneth at whitecape.org
Tue Apr 17 06:03:53 UTC 2018


On Tuesday, April 3, 2018 7:48:10 AM PDT Lionel Landwerlin wrote:
> This register contains the current/previous frequency of the GT, it's
> one of the value GPA would like to have as part of their queries.
> 
> v2: Don't use this register on baytrail/cherryview (Ken)
>     Use GET_FIELD() macro (Ken)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h           | 12 ++++++
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 50 +++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_performance_query.h |  5 +++
>  3 files changed, 67 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8bf6f68b67c..855f1c7d744 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
>  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
>  
> +#define GEN7_RPSTAT1                       0xA01C
> +#define  GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT   7
> +#define  GEN7_RPSTAT1_CURR_GT_FREQ_MASK    INTEL_MASK(13, 7)
> +#define  GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT   0
> +#define  GEN7_RPSTAT1_PREV_GT_FREQ_MASK    INTEL_MASK(6, 0)
> +
> +#define GEN9_RPSTAT0                       0xA01C
> +#define  GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT   23
> +#define  GEN9_RPSTAT0_CURR_GT_FREQ_MASK    INTEL_MASK(31, 23)
> +#define  GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT   0
> +#define  GEN9_RPSTAT0_PREV_GT_FREQ_MASK    INTEL_MASK(8, 0)
> +
>  #define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gen9+ */
>  # define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
>  # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 44cac85c6e6..32cf96a333d 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o)
>  
>  #define MI_RPC_BO_SIZE              4096
>  #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
> +#define MI_FREQ_START_OFFSET_BYTES  (3072)
> +#define MI_FREQ_END_OFFSET_BYTES    (3076)
>  
>  /******************************************************************************/
>  
> @@ -946,6 +948,21 @@ close_perf(struct brw_context *brw)
>     }
>  }
>  
> +static void
> +capture_frequency_stat_register(struct brw_context *brw,
> +                                struct brw_bo *bo,
> +                                uint32_t bo_offset)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +
> +   if (devinfo->gen >= 7 && devinfo->gen <= 8 &&
> +       !devinfo->is_baytrail && !devinfo->is_cherryview) {
> +      brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset);
> +   } else if (devinfo->gen >= 9) {
> +      brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset);
> +   }
> +}
> +
>  /**
>   * Driver hook for glBeginPerfQueryINTEL().
>   */
> @@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx,
>        /* Take a starting OA counter snapshot. */
>        brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
>                                            obj->oa.begin_report_id);
> +      capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES);
> +
>        ++brw->perfquery.n_active_oa_queries;
>  
>        /* No already-buffered samples can possibly be associated with this query
> @@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx,
>         */
>        if (!obj->oa.results_accumulated) {
>           /* Take an ending OA counter snapshot. */
> +         capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_END_OFFSET_BYTES);
>           brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
>                                               MI_RPC_BO_END_OFFSET_BYTES,
>                                               obj->oa.begin_report_id + 1);
> @@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx,
>     return false;
>  }
>  
> +static void
> +read_gt_frequency(struct brw_context *brw,
> +                  struct brw_perf_query_object *obj)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
> +      end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
> +
> +   switch (devinfo->gen) {
> +   case 7:
> +   case 8:
> +      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
> +      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
> +      break;
> +   case 9:
> +   case 10:
> +   case 11:
> +      obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
> +      obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;

Thanks for the pointers to the other kernel code in your reply to v1.
This looks right.  One thing I noticed is that the kernel rounds to the
closest, while this will truncate, but I don't think that's too crucial.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180416/fbef2693/attachment-0001.sig>


More information about the mesa-dev mailing list