[Mesa-dev] [PATCH 06/45] swr/rast: Add some archrast stats
George Kyriazis
george.kyriazis at intel.com
Fri Apr 13 19:01:46 UTC 2018
Add stats for degenerate and backfacing primitive counts
Wire archrast stats for alpha blend and alpha test.
pass value to jitter, upon return have archrast event increment a value
---
.../drivers/swr/rasterizer/archrast/archrast.cpp | 35 +++++++++++++++++++++-
.../drivers/swr/rasterizer/archrast/events.proto | 19 ++++++++++++
.../swr/rasterizer/archrast/events_private.proto | 15 ++++++++++
.../drivers/swr/rasterizer/core/backend_impl.h | 18 +++++++----
.../drivers/swr/rasterizer/core/backend_sample.cpp | 4 +--
.../swr/rasterizer/core/backend_singlesample.cpp | 4 +--
src/gallium/drivers/swr/rasterizer/core/binner.cpp | 2 ++
src/gallium/drivers/swr/rasterizer/core/state.h | 2 ++
.../drivers/swr/rasterizer/jitter/blend_jit.cpp | 17 +++++++++++
9 files changed, 105 insertions(+), 11 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 1f87dba..12dfc0e 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -73,6 +73,18 @@ namespace ArchRast
uint32_t rasterTiles = 0;
};
+ struct CullStats
+ {
+ uint32_t degeneratePrimCount = 0;
+ uint32_t backfacePrimCount = 0;
+ };
+
+ struct AlphaStats
+ {
+ uint32_t alphaTestCount = 0;
+ uint32_t alphaBlendCount = 0;
+ };
+
//////////////////////////////////////////////////////////////////////////
/// @brief Event handler that handles API thread events. This is shared
/// between the API and its caller (e.g. driver shim) but typically
@@ -280,7 +292,12 @@ namespace ArchRast
// Rasterized Subspans
EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
- //Reset Internal Counters
+ // Alpha Subspans
+ EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+
+ // Primitive Culling
+ EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+
mDSSingleSample = {};
mDSSampleRate = {};
mDSCombined = {};
@@ -288,6 +305,8 @@ namespace ArchRast
mDSNullPS = {};
rastStats = {};
+ mCullStats = {};
+ mAlphaStats = {};
mNeedFlush = false;
}
@@ -327,6 +346,18 @@ namespace ArchRast
rastStats.rasterTiles += event.data.rasterTiles;
}
+ virtual void Handle(const CullInfoEvent& event)
+ {
+ mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+ mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+ }
+
+ virtual void Handle(const AlphaInfoEvent& event)
+ {
+ mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
+ mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
+ }
+
protected:
bool mNeedFlush;
// Per draw stats
@@ -340,6 +371,8 @@ namespace ArchRast
TEStats mTS = {};
GSStats mGS = {};
RastStats rastStats = {};
+ CullStats mCullStats = {};
+ AlphaStats mAlphaStats = {};
};
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 7d9a68d..deb0373 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -180,6 +180,7 @@ event LateStencilSampleRate
uint64_t failCount;
};
+// Total Early-Z counts, SingleSample and SampleRate
event EarlyZ
{
uint32_t drawId;
@@ -187,6 +188,7 @@ event EarlyZ
uint64_t failCount;
};
+// Total LateZ counts, SingleSample and SampleRate
event LateZ
{
uint32_t drawId;
@@ -194,6 +196,7 @@ event LateZ
uint64_t failCount;
};
+// Total EarlyStencil counts, SingleSample and SampleRate
event EarlyStencil
{
uint32_t drawId;
@@ -201,6 +204,7 @@ event EarlyStencil
uint64_t failCount;
};
+// Total LateStencil counts, SingleSample and SampleRate
event LateStencil
{
uint32_t drawId;
@@ -302,3 +306,18 @@ event ClipperEvent
uint32_t trivialAcceptCount;
uint32_t mustClipCount;
};
+
+event CullEvent
+{
+ uint32_t drawId;
+ uint64_t backfacePrimCount;
+ uint64_t degeneratePrimCount;
+};
+
+event AlphaEvent
+{
+ uint32_t drawId;
+ uint32_t alphaTestCount;
+ uint32_t alphaBlendCount;
+};
+
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index f0a9310..37593be 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
@@ -120,6 +120,21 @@ event ClipInfoEvent
uint32_t clipMask;
};
+event CullInfoEvent
+{
+ uint32_t drawId;
+ uint64_t degeneratePrimMask;
+ uint64_t backfacePrimMask;
+ uint32_t validMask;
+};
+
+event AlphaInfoEvent
+{
+ uint32_t drawId;
+ uint32_t alphaTestEnable;
+ uint32_t alphaBlendEnable;
+};
+
event DrawInstancedEvent
{
uint32_t drawId;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 8c539e3..dd349a1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -709,8 +709,8 @@ static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_P
}
// Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask)
+INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+ const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -747,6 +747,9 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
}
}
+ // Track alpha events
+ AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
// final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
@@ -777,8 +780,8 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
#if USE_8x2_TILE_BACKEND
// Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset)
+INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
+ const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -836,6 +839,9 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SW
}
}
+ // Track alpha events
+ AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+
// final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
@@ -1003,9 +1009,9 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else // USE_8x2_TILE_BACKEND
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
#endif // USE_8x2_TILE_BACKEND
if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
index 16418f7..4982025 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -196,9 +196,9 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
#endif
// do final depth write after all pixel kills
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
index 4cc1ed5..452fba1 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
@@ -181,9 +181,9 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset);
+ OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
#else
- OutputMerger4x2(psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask);
+ OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
#endif
// do final depth write after all pixel kills
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index c9a37cb..d31fd37 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -773,6 +773,8 @@ void SIMDCALL BinTrianglesImpl(
RDTSC_EVENT(FECullZeroAreaAndBackface, _mm_popcnt_u32(origTriMask ^ triMask), 0);
}
+ AR_EVENT(CullInfoEvent(pDC->drawId, cullZeroAreaMask, cullTris, origTriMask));
+
/// Note: these variable initializations must stay above any 'goto endBenTriangles'
// compute per tri backface
uint32_t frontFaceMask = frontWindingTris;
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h
index 8c26ec6..22acbe0 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -887,6 +887,8 @@ struct SWR_BLEND_CONTEXT
simdvector* result;
simdscalari* oMask;
simdscalari* pMask;
+ uint32_t isAlphaTested;
+ uint32_t isAlphaBlended;
};
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
index 6b7efbf..912a88f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -557,6 +557,8 @@ struct BlendJit : public Builder
ppoMask->setName("ppoMask");
Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
ppMask->setName("pMask");
+ Value* AlphaTest1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ ppMask->setName("AlphaTest1");
static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
Value* dst[4];
@@ -590,12 +592,22 @@ struct BlendJit : public Builder
// alpha test
if (state.desc.alphaTestEnable)
{
+ // Gather for archrast stats
+ STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
}
+ else
+ {
+ // Gather for archrast stats
+ STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+ }
// color blend
if (state.blendState.blendEnable)
{
+ // Gather for archrast stats
+ STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+
// clamp sources
Clamp(state.format, src);
Clamp(state.format, src1);
@@ -647,6 +659,11 @@ struct BlendJit : public Builder
STORE(result[i], pResult, { 0, i });
}
}
+ else
+ {
+ // Gather for archrast stats
+ STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ }
if(state.blendState.logicOpEnable)
{
--
2.7.4
More information about the mesa-dev
mailing list