[Mesa-dev] [PATCH 02/45] swr/rast: Introduce JIT_MEM_CLIENT
George Kyriazis
george.kyriazis at intel.com
Fri Apr 13 19:01:42 UTC 2018
Help assist with usage tracking of memory accesses
---
.../drivers/swr/rasterizer/jitter/builder_mem.cpp | 58 ++++++++++++++--------
.../drivers/swr/rasterizer/jitter/builder_mem.h | 47 ++++++++++++------
.../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 6 +--
3 files changed, 71 insertions(+), 40 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 6fa60a1..ac01223 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -36,6 +36,9 @@
namespace SwrJit
{
+ void Builder::AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage)
+ {
+ }
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
{
@@ -69,28 +72,33 @@ namespace SwrJit
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value *Ptr, const char *Name)
+ LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name)
+ LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name)
+ LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name)
+ LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
}
- LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
+ LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(basePtr, usage);
std::vector<Value*> valIndices;
for (auto i : indices)
valIndices.push_back(C(i));
@@ -158,8 +166,10 @@ namespace SwrJit
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(pBase, usage);
+
Value *vGather;
Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
@@ -204,8 +214,10 @@ namespace SwrJit
return vGather;
}
- Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(pBase, usage);
+
Value *vGather = VUNDEF_F_16();
// use AVX512F gather instruction if available
@@ -244,8 +256,10 @@ namespace SwrJit
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
+ Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(pBase, usage);
+
Value* vGather;
// use avx2 gather instruction if available
@@ -286,8 +300,10 @@ namespace SwrJit
return vGather;
}
- Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+ Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
{
+ AssertRastyMemoryParams(pBase, usage);
+
Value *vGather = VUNDEF_I_16();
// use AVX512F gather instruction if available
@@ -380,21 +396,21 @@ namespace SwrJit
}
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
{
- GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+ GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
}
else
{
- GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+ GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
}
}
void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
@@ -407,7 +423,7 @@ namespace SwrJit
// always have at least one component out of x or y to fetch
- vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -419,7 +435,7 @@ namespace SwrJit
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -447,7 +463,7 @@ namespace SwrJit
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+ vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
@@ -461,14 +477,14 @@ namespace SwrJit
}
void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+ Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
case 8:
{
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
- Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
@@ -485,7 +501,7 @@ namespace SwrJit
// always have at least one component out of x or y to fetch
- vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of first 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -497,7 +513,7 @@ namespace SwrJit
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+ vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
@@ -526,7 +542,7 @@ namespace SwrJit
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+ vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index a50ecc0..8f79425 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -29,16 +29,32 @@
******************************************************************************/
#pragma once
+public:
+
+typedef enum _JIT_MEM_CLIENT
+{
+ MEM_CLIENT_RASTY,
+ GFX_MEM_CLIENT_FETCH,
+ GFX_MEM_CLIENT_SAMPLER
+} JIT_MEM_CLIENT;
+
+protected:
+
+virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+void AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage);
+
+public:
+
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
-virtual LoadInst* LOAD(Value *Ptr, const char *Name);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "");
+virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
@@ -47,22 +63,20 @@ StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*
Value *MASKLOADD(Value* src, Value* mask);
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
-
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
-virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
-Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
-virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
-virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
+ Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
@@ -81,4 +95,5 @@ Value* pScatterStackSrc{ nullptr };
Value* pScatterStackOffsets{ nullptr };
-virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }
+
+//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 2ffce1b..5feb5fa 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1333,7 +1333,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
Value *shiftedOffsets16 = LSHR(vOffsets16, 1);
- pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBaseGFX, shiftedOffsets16, vGatherMask16, 2);
+ pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBaseGFX, shiftedOffsets16, vGatherMask16, 2, GFX_MEM_CLIENT_FETCH);
}
else
{
@@ -1362,7 +1362,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
Value *vShiftedOffsets = LSHR(vOffsets, 1);
- vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2);
+ vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2, GFX_MEM_CLIENT_FETCH);
}
else
{
@@ -1838,7 +1838,7 @@ Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value *index = LOAD(pValid, "valid index");
+ Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
index = Z_EXT(index, mInt32Ty);
--
2.7.4
More information about the mesa-dev
mailing list