[Mesa-dev] [PATCH v2 06/45] swr/rast: Add some archrast stats

George Kyriazis george.kyriazis at intel.com
Wed Apr 18 01:31:46 UTC 2018


Add stats for degenerate and backfacing primitive counts

Wire archrast stats for alpha blend and alpha test.
pass value to jitter, upon return have archrast event increment a value
---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   | 35 +++++++++++++++++++++-
 .../drivers/swr/rasterizer/archrast/events.proto   | 19 ++++++++++++
 .../swr/rasterizer/archrast/events_private.proto   | 15 ++++++++++
 .../drivers/swr/rasterizer/core/backend_impl.h     | 18 +++++++----
 .../drivers/swr/rasterizer/core/backend_sample.cpp |  4 +--
 .../swr/rasterizer/core/backend_singlesample.cpp   |  4 +--
 src/gallium/drivers/swr/rasterizer/core/binner.cpp |  2 ++
 src/gallium/drivers/swr/rasterizer/core/state.h    |  2 ++
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp    | 17 +++++++++++
 9 files changed, 105 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 1f87dba..12dfc0e 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -73,6 +73,18 @@ namespace ArchRast
         uint32_t rasterTiles = 0;
     };
 
+    struct CullStats
+    {
+        uint32_t degeneratePrimCount = 0;
+        uint32_t backfacePrimCount = 0;
+    };
+
+    struct AlphaStats
+    {
+        uint32_t alphaTestCount = 0;
+        uint32_t alphaBlendCount = 0;
+    };
+
     //////////////////////////////////////////////////////////////////////////
     /// @brief Event handler that handles API thread events. This is shared
     ///        between the API and its caller (e.g. driver shim) but typically
@@ -280,7 +292,12 @@ namespace ArchRast
             // Rasterized Subspans
             EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
 
-            //Reset Internal Counters
+            // Alpha Subspans
+            EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+
+            // Primitive Culling
+            EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+            
             mDSSingleSample = {};
             mDSSampleRate = {};
             mDSCombined = {};
@@ -288,6 +305,8 @@ namespace ArchRast
             mDSNullPS = {};
 
             rastStats = {};
+            mCullStats = {};
+            mAlphaStats = {};
             mNeedFlush = false;
         }
 
@@ -327,6 +346,18 @@ namespace ArchRast
             rastStats.rasterTiles += event.data.rasterTiles;
         }
 
+        virtual void Handle(const CullInfoEvent& event)
+        {
+            mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+            mCullStats.backfacePrimCount   += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+        }
+
+        virtual void Handle(const AlphaInfoEvent& event)
+        {
+            mAlphaStats.alphaTestCount  += event.data.alphaTestEnable;
+            mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
+        }
+
     protected:
         bool mNeedFlush;
         // Per draw stats
@@ -340,6 +371,8 @@ namespace ArchRast
         TEStats mTS = {};
         GSStats mGS = {};
         RastStats rastStats = {};
+        CullStats mCullStats = {};
+        AlphaStats mAlphaStats = {};
 
     };
 
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 7d9a68d..deb0373 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -180,6 +180,7 @@ event LateStencilSampleRate
     uint64_t failCount;
 };
 
+// Total Early-Z counts, SingleSample and SampleRate
 event EarlyZ
 {
     uint32_t drawId;
@@ -187,6 +188,7 @@ event EarlyZ
     uint64_t failCount;
 };	
 
+// Total LateZ counts, SingleSample and SampleRate
 event LateZ
 {
     uint32_t drawId;
@@ -194,6 +196,7 @@ event LateZ
     uint64_t failCount;
 };
 
+// Total EarlyStencil counts, SingleSample and SampleRate
 event EarlyStencil
 {
     uint32_t drawId; 
@@ -201,6 +204,7 @@ event EarlyStencil
     uint64_t failCount;
 };
 
+// Total LateStencil counts, SingleSample and SampleRate
 event LateStencil
 {
     uint32_t drawId; 
@@ -302,3 +306,18 @@ event ClipperEvent
     uint32_t trivialAcceptCount;
     uint32_t mustClipCount;
 };
+
+event CullEvent
+{
+    uint32_t drawId;
+    uint64_t backfacePrimCount;
+    uint64_t degeneratePrimCount;
+};
+
+event AlphaEvent
+{
+    uint32_t drawId;
+    uint32_t alphaTestCount;
+    uint32_t alphaBlendCount;
+};
+
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index f0a9310..37593be 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -120,6 +120,21 @@ event ClipInfoEvent
     uint32_t clipMask;
 };
 
+event CullInfoEvent
+{
+    uint32_t drawId;
+    uint64_t degeneratePrimMask;
+    uint64_t backfacePrimMask;
+    uint32_t validMask;
+};
+
+event AlphaInfoEvent
+{
+    uint32_t drawId;
+    uint32_t alphaTestEnable;
+    uint32_t alphaBlendEnable;
+};
+
 event DrawInstancedEvent
 {
     uint32_t drawId;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 8c539e3..dd349a1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P
 }
 
 // Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask)
+INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+    const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
 {
     // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
     const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
             }
         }
 
+        // Track alpha events
+        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
         // final write mask 
         simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
 
@@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
 
 #if USE_8x2_TILE_BACKEND
 // Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset)
+INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
 {
     // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
     uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
             }
         }
 
+        // Track alpha events
+        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
         // final write mask 
         simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
 
@@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                 
                 // broadcast the results of the PS to all passing pixels
 #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset);
+                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else // USE_8x2_TILE_BACKEND
-                OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask);
+                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
 #endif // USE_8x2_TILE_BACKEND
 
                 if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
index 16418f7..4982025 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                     // output merger
                     RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                    OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+                    OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else
-                    OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+                    OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
 #endif
 
                     // do final depth write after all pixel kills
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
index 4cc1ed5..452fba1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
@@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                 // output merger
                 RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
 #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
 #else
-                OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
 #endif
 
                 // do final depth write after all pixel kills
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index c9a37cb..d31fd37 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl(
         RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
     }
 
+    AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
+
     /// Note: these variable initializations must stay above any 'goto endBenTriangles'
     // compute per tri backface
     uint32_t frontFaceMask = frontWindingTris;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 8c26ec6..22acbe0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT
     simdvector*             result;
     simdscalari*            oMask;
     simdscalari*            pMask;
+    uint32_t                isAlphaTested;
+    uint32_t                isAlphaBlended;
 };
 
 //////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index 6b7efbf..912a88f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -557,6 +557,8 @@ struct BlendJit : public Builder
         ppoMask->setName("ppoMask");
         Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
         ppMask->setName("pMask");
+        Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+        ppMask->setName("AlphaTest1");
 
         static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
         Value* dst[4];
@@ -590,12 +592,22 @@ struct BlendJit : public Builder
         // alpha test
         if (state.desc.alphaTestEnable)
         {
+            // Gather for archrast stats
+            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
             AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
         }
+        else
+        {
+            // Gather for archrast stats
+            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+        }
 
         // color blend
         if (state.blendState.blendEnable)
         {
+            // Gather for archrast stats
+            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+
             // clamp sources
             Clamp(state.format, src);
             Clamp(state.format, src1);
@@ -647,6 +659,11 @@ struct BlendJit : public Builder
                 STORE(result[i], pResult, { 0, i });
             }
         }
+        else
+        {
+            // Gather for archrast stats
+            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+        }
         
         if(state.blendState.logicOpEnable)
         {
-- 
2.7.4



More information about the mesa-dev mailing list