[Mesa-dev] [PATCH v2 37/45] swr/rast: Add shader stats infrastructure (WIP)

George Kyriazis george.kyriazis at intel.com
Wed Apr 18 01:32:17 UTC 2018


---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   | 64 +++++++++++++++++++--
 .../drivers/swr/rasterizer/archrast/events.proto   | 65 ++++++++++++----------
 .../swr/rasterizer/archrast/events_private.proto   | 30 ++++++++++
 .../drivers/swr/rasterizer/jitter/builder.h        | 23 ++++++++
 4 files changed, 148 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 2184673..871db79 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -61,7 +61,7 @@ namespace ArchRast
         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
     };
 
-    struct GSInfo
+    struct GSStateInfo
     {
         uint32_t inputPrimCount;
         uint32_t primGeneratedCount;
@@ -155,7 +155,7 @@ namespace ArchRast
             mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
 
             //earlyZ test single and multi sample
-            mDSCombined.earlyZTestPassCount  += _mm_popcnt_u32(event.data.depthPassMask);
+            mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
             mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
 
             //earlyStencil test single and multi sample
@@ -257,11 +257,51 @@ namespace ArchRast
             mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
         }
 
+        struct ShaderStats
+        {
+            uint32_t numInstExecuted;
+        };
+
+        virtual void Handle(const VSStats& event)
+        {
+            mShaderStats[SHADER_VERTEX].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const GSStats& event)
+        {
+            mShaderStats[SHADER_GEOMETRY].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const DSStats& event)
+        {
+            mShaderStats[SHADER_DOMAIN].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const HSStats& event)
+        {
+            mShaderStats[SHADER_HULL].numInstExecuted += event.data.numInstExecuted;
+        }
+
+        virtual void Handle(const PSStats& event)
+        {
+            mShaderStats[SHADER_PIXEL].numInstExecuted += event.data.numInstExecuted;
+            mNeedFlush = true;
+        }
+
+        virtual void Handle(const CSStats& event)
+        {
+            mShaderStats[SHADER_COMPUTE].numInstExecuted += event.data.numInstExecuted;
+            mNeedFlush = true;
+        }
+
         // Flush cached events for this draw
         virtual void FlushDraw(uint32_t drawId)
         {
             if (mNeedFlush == false) return;
 
+            EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
+            EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
+
             //singleSample
             EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
             EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
@@ -297,7 +337,7 @@ namespace ArchRast
 
             // Primitive Culling
             EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
-            
+
             mDSSingleSample = {};
             mDSSampleRate = {};
             mDSCombined = {};
@@ -307,6 +347,10 @@ namespace ArchRast
             rastStats = {};
             mCullStats = {};
             mAlphaStats = {};
+
+            mShaderStats[SHADER_PIXEL] = {};
+            mShaderStats[SHADER_COMPUTE] = {};
+
             mNeedFlush = false;
         }
 
@@ -323,6 +367,16 @@ namespace ArchRast
             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
 
+            EventHandlerFile::Handle(VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
+            EventHandlerFile::Handle(HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
+            EventHandlerFile::Handle(DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
+            EventHandlerFile::Handle(GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+
+            mShaderStats[SHADER_VERTEX] = {};
+            mShaderStats[SHADER_HULL] = {};
+            mShaderStats[SHADER_DOMAIN] = {};
+            mShaderStats[SHADER_GEOMETRY] = {};
+
             //Reset Internal Counters
             mClipper = {};
             mTS = {};
@@ -369,11 +423,13 @@ namespace ArchRast
         DepthStencilStats mDSOmZ = {};
         CStats mClipper = {};
         TEStats mTS = {};
-        GSInfo mGS = {};
+        GSStateInfo mGS = {};
         RastStats rastStats = {};
         CullStats mCullStats = {};
         AlphaStats mAlphaStats = {};
 
+        ShaderStats mShaderStats[NUM_SHADER_TYPES];
+
     };
 
     static EventManager* FromHandle(HANDLE hThreadContext)
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index f924b57..32bd81f 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -115,36 +115,6 @@ event FrontendStatsEvent
     uint64_t SoNumPrimsWritten3;
 };
 
-event VSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event HSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event DSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event GSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event PSStats
-{
-    uint32_t numInstExecuted;
-};
-
-event CSStats
-{
-    uint32_t numInstExecuted;
-};
-
 event BackendStatsEvent
 {
     uint32_t drawId;
@@ -351,3 +321,38 @@ event AlphaEvent
     uint32_t alphaBlendCount;
 };
 
+event VSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event HSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event DSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event GSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event PSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
+
+event CSInfo
+{
+    uint32_t drawId;
+    uint32_t numInstExecuted;
+};
\ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index 37593be..f5cfb47 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -165,3 +165,33 @@ event DrawIndexedInstancedEvent
     uint32_t soTopology;
     uint32_t splitId; // Split draw count or id.
 };
+
+event VSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event HSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event DSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event GSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event PSStats
+{
+    uint32_t numInstExecuted;
+};
+
+event CSStats
+{
+    uint32_t numInstExecuted;
+};
\ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 27a32bc..e2ad1e8 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -34,6 +34,29 @@
 
 namespace SwrJit
 {
+    ///@todo Move this to better place
+    enum SHADER_STATS_COUNTER_TYPE
+    {
+        STATS_INST_EXECUTED = 0,
+        STATS_SAMPLE_EXECUTED = 1,
+        STATS_SAMPLE_L_EXECUTED = 2,
+        STATS_SAMPLE_B_EXECUTED = 3,
+        STATS_SAMPLE_C_EXECUTED = 4,
+        STATS_SAMPLE_C_LZ_EXECUTED = 5,
+        STATS_SAMPLE_C_D_EXECUTED = 6,
+        STATS_LOD_EXECUTED = 7,
+        STATS_GATHER4_EXECUTED = 8,
+        STATS_GATHER4_C_EXECUTED = 9,
+        STATS_GATHER4_C_PO_EXECUTED = 10,
+        STATS_GATHER4_C_PO_C_EXECUTED = 11,
+        STATS_LOAD_RAW_UAV = 12,
+        STATS_LOAD_RAW_RESOURCE = 13,
+        STATS_STORE_RAW_UAV = 14,
+        STATS_STORE_TGSM = 15,
+        STATS_DISCARD = 16,
+        STATS_BARRIER = 17,
+    };
+
     using namespace llvm;
     struct Builder
     {
-- 
2.7.4



More information about the mesa-dev mailing list