[Mesa-dev] [PATCH v3 2/3] nvc0: rewrite query buffer write macro to output 64-bit predicates
Rhys Perry
pendingchaos02 at gmail.com
Tue May 22 23:15:43 UTC 2018
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme | 91 ++++++++++++----------
src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h | 64 ++++++++-------
src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 81 ++++++++++---------
3 files changed, 133 insertions(+), 103 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
index 38c2e86843..0e5ad66f56 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme
@@ -494,62 +494,75 @@ daic_runout_check:
/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
*
- * This is a combination macro for all of our query buffer object needs.
- * It has the option to clamp results to a configurable amount, as well as
+ * This macro writes out a query's result into a resource.
+ * It has the options to either clamp the result to a configurable amount and
* to write out one or two words.
*
* We use the query engine to write out the values, and expect the query
* address to point to the right place.
*
- * arg = clamp value (0 means unclamped). clamped means just 1 written value.
- * parm[0] = LSB of end value
- * parm[1] = MSB of end value
- * parm[2] = LSB of start value
- * parm[3] = MSB of start value
- * parm[4] = desired sequence
- * parm[5] = actual sequence
- * parm[6] = query high address
+ * Also note that although the result availablility is determined at the start,
+ * the macro only exits if the result is unavailable right before clamping.
+ *
+ * arg = write64 | (clamp<<1)
+ * parm[0] = desired sequence
+ * parm[1] = actual sequence
+ * parm[2] = LSB of end value
+ * parm[3] = MSB of end value
+ * parm[4] = LSB of start value
+ * parm[5] = MSB of start value
+ * parm[6] = clamp value
* parm[7] = query low address
+ * parm[8] = query high address
*/
.section #mme9097_query_buffer_write
+/* determine result availability */
+ parm $r2
+ parm $r3
+ mov $r6 (sub $r3 $r2)
+ mov $r6 (sbb 0x0 0x0)
+/* calculate result and write high into $r3 and low into $r2 */
parm $r2
parm $r3
parm $r4
- parm $r5 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
- parm $r6
- parm $r7
- mov $r6 (sub $r7 $r6) /* actual - desired */
- mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
- parm $r7
- exit braz $r6 #qbw_ready
- parm $r6
-qbw_ready:
+ parm $r5
mov $r2 (sub $r2 $r4)
- braz $r1 #qbw_postclamp
mov $r3 (sbb $r3 $r5)
- branz annul $r3 #qbw_clamp
- mov $r4 (sub $r1 $r2)
- mov $r4 (sbb 0x0 0x0)
- braz annul $r4 #qbw_postclamp
-qbw_clamp:
- mov $r2 $r1
-qbw_postclamp:
- send $r7
- send $r6
+ braz $r6 #qbw_available
+ parm $r4 /* clamp value */
+ exit parm $r7 /* result not available - drain remaining parameters and exit */
+ parm $r7
+qbw_available:
+ mov $r6 (extrinsrt 0x0 $r1 1 1 0)
+ braz annul $r6 #qbw_write
+ branz $r3 #qbw_doclamp /* clamp if the high word is set */
+ mov $r7 (sub $r4 $r2)
+ mov $r7 (sbb 0x0 0x0)
+ braz annul $r7 #qbw_write
+qbw_doclamp:
+ mov $r2 $r4
+ mov $r3 0x0
+qbw_write:
+ parm $r5
+ parm $r4 maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
+ send $r4
+ send $r5
send $r2
- branz $r1 #qbw_done
- mov $r4 0x1000
- send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ mov $r6 (extrinsrt 0x0 $r1 0 1 0)
+ braz $r6 #qbw_done
+ mov $r7 0x1000
+ send (extrinsrt 0x0 $r7 0 16 16)
+ /* XXX: things seem to mess up if $r6 is replaced with 0x4 in the add */
+ mov $r6 0x4
+ mov $r5 (add $r5 $r6)
+ mov $r4 (adc $r4 0x0)
maddr 0x16c0 /* QUERY_ADDRESS_HIGH */
- mov $r5 0x4
- mov $r6 (add $r6 $r5)
- mov $r7 (adc $r7 0x0)
- send $r7
- send $r6
+ send $r4
+ send $r5
send $r3
qbw_done:
- exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
- maddrsend 0x44
+ exit send (extrinsrt 0x0 $r7 0 16 16)
+ maddrsend 0x44 /* SERIALIZE */
/* NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE:
*
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
index 49c0891114..3ebfda47ee 100644
--- a/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
+++ b/src/gallium/drivers/nouveau/nvc0/mme/com9097.mme.h
@@ -336,41 +336,47 @@ uint32_t mme9097_draw_arrays_indirect_count[] = {
uint32_t mme9097_query_buffer_write[] = {
0x00000201,
0x00000301,
-/* 0x000b: qbw_ready */
- 0x00000401,
- 0x05b00551,
-/* 0x0012: qbw_clamp */
-/* 0x0013: qbw_postclamp */
- 0x00000601,
- 0x00000701,
- 0x0005be10,
+ 0x00049e10,
+/* 0x000e: qbw_available */
0x00060610,
-/* 0x0020: qbw_done */
- 0x00000701,
- 0x0000b087,
- 0x00000601,
+ 0x00000201,
+/* 0x0014: qbw_doclamp */
+/* 0x0016: qbw_write */
+ 0x00000301,
+ 0x00000401,
+ 0x00000501,
0x00051210,
- 0x0001c807,
+/* 0x0026: qbw_done */
0x00075b10,
- 0x00011837,
- 0x00048c10,
- 0x00060410,
- 0x0000a027,
- 0x00000a11,
- 0x00003841,
- 0x00003041,
+ 0x00013007,
+ 0x00000401,
+ 0x00000781,
+ 0x00000701,
+ 0x00424612,
+ 0x0001f027,
+ 0x00011817,
+ 0x0004a710,
+ 0x00060710,
+ 0x0000f827,
+ 0x00002211,
+ 0x00000311,
+ 0x00000501,
+ 0x05b00451,
+ 0x00002041,
+ 0x00002841,
0x00001041,
- 0x00028817,
- 0x04000411,
- 0x84010042,
+ 0x00404612,
+ 0x0002b007,
+ 0x04000711,
+ 0x8401c042,
+ 0x00010611,
+ 0x0001ad10,
+ 0x00022410,
0x05b00021,
- 0x00010511,
- 0x00017610,
- 0x00023f10,
- 0x00003841,
- 0x00003041,
+ 0x00002041,
+ 0x00002841,
0x00001841,
- 0x840100c2,
+ 0x8401c0c2,
0x00110071,
};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index db5f5092ba..835742bbc6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -381,6 +381,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
struct nvc0_hw_query *hq = nvc0_hw_query(q);
struct nv04_resource *buf = nv04_resource(resource);
unsigned qoffset = 0, stride;
+ bool predicate = false;
+ uint32_t arg;
assert(!hq->funcs || !hq->funcs->get_query_result);
@@ -401,18 +403,27 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
return;
}
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+ predicate = true;
+ break;
+ }
+
+ arg = result_type >= PIPE_QUERY_TYPE_I64 ? 1 : 0;
+ /* Only clamp if the output is 32-bit or a predicate, we don't bother
+ * clamping 64-bit outputs */
+ if ((result_type<PIPE_QUERY_TYPE_I64 || predicate) && index!=-1)
+ arg |= 1 << 1;
+
/* If the fence guarding this query has not been emitted, that makes a lot
* of the following logic more complicated.
*/
if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
nouveau_fence_emit(hq->fence);
- /* We either need to compute a 32- or 64-bit difference between 2 values,
- * and then store the result as either a 32- or 64-bit value. As such let's
- * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
- * ones), and have one macro that clamps result to i32, u32, or just
- * outputs the difference (no need to worry about 64-bit clamping).
- */
if (hq->state != NVC0_HW_QUERY_STATE_READY)
nvc0_hw_query_update(nvc0->screen->base.client, q);
@@ -425,22 +436,20 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
nouveau_pushbuf_space(push, 32, 2, 0);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
- BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 9);
- switch (q->type) {
- case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: /* XXX what if 64-bit? */
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
- PUSH_DATA(push, 0x00000001);
- break;
- default:
- if (result_type == PIPE_QUERY_TYPE_I32)
- PUSH_DATA(push, 0x7fffffff);
- else if (result_type == PIPE_QUERY_TYPE_U32)
- PUSH_DATA(push, 0xffffffff);
- else
- PUSH_DATA(push, 0x00000000);
- break;
+ BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 10);
+ PUSH_DATA(push, arg);
+
+ if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+ } else if (hq->is64bit) {
+ PUSH_DATA(push, hq->fence->sequence);
+ nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ PUSH_DATA(push, hq->sequence);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
}
switch (q->type) {
@@ -460,6 +469,11 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
break;
}
+ /* We need to compute the difference between 2 values, and then store the
+ * result as either a 32- or 64-bit value. As such let's treat all inputs
+ * as 64-bit (and just push an extra 0 for the 32-bit ones), and clamp
+ * the result to an limit if it's 32 bit or a predicate.
+ */
if (hq->is64bit || qoffset) {
nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index,
8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
@@ -480,20 +494,17 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
PUSH_DATA(push, 0);
}
- if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
- PUSH_DATA(push, 0);
- PUSH_DATA(push, 0);
- } else if (hq->is64bit) {
- PUSH_DATA(push, hq->fence->sequence);
- nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
- 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
- } else {
- PUSH_DATA(push, hq->sequence);
- nouveau_pushbuf_data(push, hq->bo, hq->offset,
- 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
- }
- PUSH_DATAh(push, buf->address + offset);
+ if (predicate)
+ PUSH_DATA(push, 0x00000001);
+ else if (result_type == PIPE_QUERY_TYPE_I32)
+ PUSH_DATA(push, 0x7fffffff);
+ else if (result_type == PIPE_QUERY_TYPE_U32)
+ PUSH_DATA(push, 0xffffffff);
+ else
+ PUSH_DATA(push, 0x00000000);
+
PUSH_DATA (push, buf->address + offset);
+ PUSH_DATAh(push, buf->address + offset);
util_range_add(&buf->valid_buffer_range, offset,
offset + (result_type >= PIPE_QUERY_TYPE_I64 ? 8 : 4));
--
2.14.3
More information about the mesa-dev
mailing list