[Mesa-dev] [PATCH v2 38/45] swr/rast: Fix 64bit float loads in x86 lowering pass
George Kyriazis
george.kyriazis at intel.com
Wed Apr 18 01:32:18 UTC 2018
---
.../drivers/swr/rasterizer/jitter/builder_mem.cpp | 39 +---------------------
.../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 31 +++++++++++++----
2 files changed, 25 insertions(+), 45 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index c791278..f0cd441 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -201,44 +201,7 @@ namespace SwrJit
/// @param scale - value to scale indices by
Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
{
- Value* vGather;
-
- // use avx2 gather instruction if available
- if (JM()->mArch.AVX2())
- {
- vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth / 2)), VectorType::get(mDoubleTy, mVWidth / 2));
- vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
- }
- else
- {
- Value* pStack = STACKSAVE();
-
- // store vSrc on the stack. this way we can select between a valid load address and the vSrc address
- Value* vSrcPtr = ALLOCA(vSrc->getType());
- SetTempAlloca(vSrcPtr);
- STORE(vSrc, vSrcPtr);
-
- vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
- Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
- Value *vOffsets = MUL(vIndices, vScaleVec);
- for (uint32_t i = 0; i < mVWidth / 2; ++i)
- {
- // single component byte index
- Value *offset = VEXTRACT(vOffsets, C(i));
- // byte pointer to component
- Value *loadAddress = GEP(pBase, offset);
- loadAddress = BITCAST(loadAddress, PointerType::get(mDoubleTy, 0));
- // pointer to the value to load if we're masking off a component
- Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
- Value *selMask = VEXTRACT(vMask, C(i));
- // switch in a safe address to load if we're trying to access a vertex
- Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
- Value *val = LOAD(validAddress);
- vGather = VINSERT(vGather, val, C(i));
- }
- STACKRESTORE(pStack);
- }
- return vGather;
+ return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index cdfddf3..767866f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -230,7 +230,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
}
// Fetch attributes from memory and output to a simdvertex struct
- // since VGATHER has a perf penalty on HSW vs BDW, allow client to choose which fetch method to use
JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
RET_VOID();
@@ -763,13 +762,31 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// if we need to gather the component
if (compCtrl[i] == StoreSrc)
{
- Value *vMaskLo = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
- Value *vMaskHi = VSHUFFLE(vGatherMask, VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
+ Value* vShufLo;
+ Value* vShufHi;
+ Value* vShufAll;
- Value *vOffsetsLo = VEXTRACTI128(vOffsets, C(0));
- Value *vOffsetsHi = VEXTRACTI128(vOffsets, C(1));
+ if (mVWidth == 8)
+ {
+ vShufLo = C({ 0, 1, 2, 3 });
+ vShufHi = C({ 4, 5, 6, 7 });
+ vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+ }
+ else
+ {
+ SWR_ASSERT(mVWidth == 16);
+ vShufLo = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+ vShufHi = C({ 8, 9, 10, 11, 12, 13, 14, 15 });
+ vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+ }
+
+ Value *vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
+ Value *vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
+
+ Value *vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
+ Value *vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
- Value *vZeroDouble = VECTOR_SPLAT(4, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
+ Value *vZeroDouble = VECTOR_SPLAT(mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
Value* pGatherLo = GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
Value* pGatherHi = GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
@@ -777,7 +794,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
pGatherLo = VCVTPD2PS(pGatherLo);
pGatherHi = VCVTPD2PS(pGatherHi);
- Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, C({0, 1, 2, 3, 4, 5, 6, 7}));
+ Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
vVertexElements[currentVertexElement++] = pGather;
}
--
2.7.4
More information about the mesa-dev
mailing list