[Mesa-dev] [PATCH 02/45] swr/rast: Introduce JIT_MEM_CLIENT

George Kyriazis george.kyriazis at intel.com
Fri Apr 13 19:01:42 UTC 2018


Help assist with usage tracking of memory accesses
---
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 58 ++++++++++++++--------
 .../drivers/swr/rasterizer/jitter/builder_mem.h    | 47 ++++++++++++------
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp    |  6 +--
 3 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 6fa60a1..ac01223 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -36,6 +36,9 @@
 
 namespace SwrJit
 {
+    void Builder::AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage)
+    {
+    }
 
     Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
     {
@@ -69,28 +72,33 @@ namespace SwrJit
         return IN_BOUNDS_GEP(ptr, indices);
     }
 
-    LoadInst* Builder::LOAD(Value *Ptr, const char *Name)
+    LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, Name);
     }
 
-    LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name)
+    LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, Name);
     }
 
-    LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name)
+    LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(Ptr, usage);
         return IRB()->CreateLoad(Ty, Ptr, Name);
     }
 
-    LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name)
+    LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(Ptr, usage);
         return IRB()->CreateLoad(Ptr, isVolatile, Name);
     }
 
-    LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
+    LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(basePtr, usage);
         std::vector<Value*> valIndices;
         for (auto i : indices)
             valIndices.push_back(C(i));
@@ -158,8 +166,10 @@ namespace SwrJit
     /// @param vIndices - SIMD wide value of VB byte offsets
     /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
     /// @param scale - value to scale indices by
-    Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+    Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(pBase, usage);
+
         Value *vGather;
         Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
 
@@ -204,8 +214,10 @@ namespace SwrJit
         return vGather;
     }
 
-    Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+    Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(pBase, usage);
+
         Value *vGather = VUNDEF_F_16();
 
         // use AVX512F gather instruction if available
@@ -244,8 +256,10 @@ namespace SwrJit
     /// @param vIndices - SIMD wide value of VB byte offsets
     /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
     /// @param scale - value to scale indices by
-    Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
+    Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(pBase, usage);
+
         Value* vGather;
 
         // use avx2 gather instruction if available
@@ -286,8 +300,10 @@ namespace SwrJit
         return vGather;
     }
 
-    Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
+    Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
     {
+        AssertRastyMemoryParams(pBase, usage);
+
         Value *vGather = VUNDEF_I_16();
 
         // use AVX512F gather instruction if available
@@ -380,21 +396,21 @@ namespace SwrJit
     }
 
     void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
-        Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+        Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
     {
         const SWR_FORMAT_INFO &info = GetFormatInfo(format);
         if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
         {
-            GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+            GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
         }
         else
         {
-            GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
+            GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
         }
     }
 
     void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-        Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+        Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
     {
         switch (info.bpp / info.numComps)
         {
@@ -407,7 +423,7 @@ namespace SwrJit
 
             // always have at least one component out of x or y to fetch
 
-            vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+            vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
             // e.g. result of first 8x32bit integer gather for 16bit components
             // 256i - 0    1    2    3    4    5    6    7
             //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -419,7 +435,7 @@ namespace SwrJit
                 // offset base to the next components(zw) in the vertex to gather
                 pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
 
-                vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+                vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
                 // e.g. result of second 8x32bit integer gather for 16bit components
                 // 256i - 0    1    2    3    4    5    6    7
                 //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
@@ -447,7 +463,7 @@ namespace SwrJit
                 uint32_t swizzleIndex = info.swizzle[i];
 
                 // Gather a SIMD of components
-                vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+                vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
 
                 // offset base to the next component to gather
                 pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
@@ -461,14 +477,14 @@ namespace SwrJit
     }
 
     void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-        Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
+        Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
     {
         switch (info.bpp / info.numComps)
         {
         case 8:
         {
             Value* vGatherMaskedVal = VIMMED1((int32_t)0);
-            Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+            Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
             // e.g. result of an 8x32bit integer gather for 8bit components
             // 256i - 0    1    2    3    4    5    6    7
             //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw 
@@ -485,7 +501,7 @@ namespace SwrJit
 
             // always have at least one component out of x or y to fetch
 
-            vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+            vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
             // e.g. result of first 8x32bit integer gather for 16bit components
             // 256i - 0    1    2    3    4    5    6    7
             //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
@@ -497,7 +513,7 @@ namespace SwrJit
                 // offset base to the next components(zw) in the vertex to gather
                 pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
 
-                vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
+                vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
                 // e.g. result of second 8x32bit integer gather for 16bit components
                 // 256i - 0    1    2    3    4    5    6    7
                 //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
@@ -526,7 +542,7 @@ namespace SwrJit
                 uint32_t swizzleIndex = info.swizzle[i];
 
                 // Gather a SIMD of components
-                vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
+                vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
 
                 // offset base to the next component to gather
                 pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index a50ecc0..8f79425 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -29,16 +29,32 @@
 ******************************************************************************/
 #pragma once
 
+public:
+
+typedef enum _JIT_MEM_CLIENT
+{
+    MEM_CLIENT_RASTY,
+    GFX_MEM_CLIENT_FETCH,
+    GFX_MEM_CLIENT_SAMPLER
+} JIT_MEM_CLIENT;
+
+protected:
+
+virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+void AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage);
+
+public:
+
 Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
 Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
 Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
 Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
 
-virtual LoadInst* LOAD(Value *Ptr, const char *Name);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "");
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "");
+virtual LoadInst* LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
 LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
 StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
@@ -47,22 +63,20 @@ StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*
 Value *MASKLOADD(Value* src, Value* mask);
 
 void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput);
-
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
-virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
-Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
 void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
-virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
+virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
+Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
-virtual void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput);
+void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
+    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
 Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
 
@@ -81,4 +95,5 @@ Value* pScatterStackSrc{ nullptr };
 Value* pScatterStackOffsets{ nullptr };
 
 
-virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }
+
+//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 2ffce1b..5feb5fa 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1333,7 +1333,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                                 // But, we know that elements must be aligned for FETCH. :)
                                 // Right shift the offset by a bit and then scale by 2 to remove the sign extension.
                                 Value *shiftedOffsets16 = LSHR(vOffsets16, 1);
-                                pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBaseGFX, shiftedOffsets16, vGatherMask16, 2);
+                                pVtxSrc2[currentVertexElement++] = GATHERPS_16(gatherSrc16, pStreamBaseGFX, shiftedOffsets16, vGatherMask16, 2, GFX_MEM_CLIENT_FETCH);
                             }
                             else
                             {
@@ -1362,7 +1362,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                                 // But, we know that elements must be aligned for FETCH. :)
                                 // Right shift the offset by a bit and then scale by 2 to remove the sign extension.
                                 Value *vShiftedOffsets = LSHR(vOffsets, 1);
-                                vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2);
+                                vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2, GFX_MEM_CLIENT_FETCH);
                             }
                             else
                             {
@@ -1838,7 +1838,7 @@ Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
 
         // if valid, load the index. if not, load 0 from the stack
         Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-        Value *index = LOAD(pValid, "valid index");
+        Value *index = LOAD(pValid, "valid index", GFX_MEM_CLIENT_FETCH);
 
         // zero extended index to 32 bits and insert into the correct simd lane
         index = Z_EXT(index, mInt32Ty);
-- 
2.7.4



More information about the mesa-dev mailing list