xf86-video-intel: 3 commits - src/i965_render.c
Eric Anholt
anholt at kemper.freedesktop.org
Mon Apr 14 16:49:51 PDT 2008
src/i965_render.c | 243 ++++++++++++++++++++++++++++--------------------------
1 file changed, 130 insertions(+), 113 deletions(-)
New commits:
commit 092962c962fdf3e33a8d387221448fe7c8c36376
Author: Eric Anholt <eric at anholt.net>
Date: Mon Apr 14 14:02:58 2008 -0700
Make the VS unit state static for 965 render.
diff --git a/src/i965_render.c b/src/i965_render.c
index c26b904..cb2debb 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -287,13 +287,10 @@ static struct brw_surface_state *dest_surf_state, dest_surf_state_local;
static struct brw_surface_state *src_surf_state, src_surf_state_local;
static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
-static struct brw_vs_unit_state *vs_state, vs_state_local;
-
static uint32_t *binding_table;
static int binding_table_entries;
static int dest_surf_offset, src_surf_offset, mask_surf_offset;
-static int vs_offset;
static int vb_offset;
static int binding_table_offset;
static int next_offset, total_state_size;
@@ -484,6 +481,9 @@ typedef struct _gen4_state {
KERNEL_DECL (ps_kernel_masknoca_affine);
KERNEL_DECL (ps_kernel_masknoca_projective);
+ struct brw_vs_unit_state vs_state;
+ PAD64 (brw_vs_unit_state, 0);
+
struct brw_sf_unit_state sf_state;
PAD64 (brw_sf_unit_state, 0);
struct brw_sf_unit_state sf_state_mask;
@@ -719,6 +719,15 @@ gen4_state_init (struct gen4_render_state *render_state)
KERNEL_COPY (ps_kernel_masknoca_projective);
#undef KERNEL_COPY
+ /* Set up the vertex shader to be disabled (passthrough) */
+ memset(&card_state->vs_state, 0, sizeof(card_state->vs_state));
+ card_state->vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES;
+ card_state->vs_state.thread4.urb_entry_allocation_size =
+ URB_VS_ENTRY_SIZE - 1;
+ card_state->vs_state.vs6.vs_enable = 0;
+ card_state->vs_state.vs6.vert_cache_disable = 1;
+
+ /* Set up the sampler default color (always transparent black) */
memset(&card_state->sampler_default_color, 0,
sizeof(card_state->sampler_default_color));
card_state->sampler_default_color.color[0] = 0.0; /* R */
@@ -900,8 +909,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
/* Set up our layout of state in framebuffer. First the general state: */
next_offset = offsetof(gen4_state_t, other_state);
- vs_offset = ALIGN(next_offset, 64);
- next_offset = vs_offset + sizeof(*vs_state);
/* Align VB to native size of elements, for safety */
vb_offset = ALIGN(next_offset, 32);
@@ -1069,18 +1076,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
mask_extend = SAMPLER_STATE_EXTEND_NONE;
}
- /* Set up the vertex shader to be disabled (passthrough) */
- vs_state = &vs_state_local;
- memset(vs_state, 0, sizeof(*vs_state));
- vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
- vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
- vs_state->vs6.vs_enable = 0;
- vs_state->vs6.vert_cache_disable = 1;
-
- vs_state = (void *)(state_base + vs_offset);
- memcpy (vs_state, &vs_state_local, sizeof (vs_state_local));
-
-
/* Begin the long sequence of commands needed to set up the 3D
* rendering pipe
*/
@@ -1159,7 +1154,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
/* Set the pointers to the 3d pipeline state */
OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
- OUT_BATCH(state_base_offset + vs_offset); /* 32 byte aligned */
+ assert((offsetof(gen4_state_t, vs_state) & 31) == 0);
+ OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vs_state));
OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
commit 4bbdd7096d4927fe48673006fff7df922972f116
Author: Eric Anholt <eric at anholt.net>
Date: Mon Apr 14 13:53:36 2008 -0700
Rename some variables in i965_render.c for clarity.
The gen4_render_state is now always called "render_state" (i965_render.c
bookkeeping) and gen4_state_t is now always called "card_state" (the buffer
for state used by the chip).
diff --git a/src/i965_render.c b/src/i965_render.c
index bed49df..c26b904 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -124,12 +124,6 @@ static struct formatinfo i965_tex_formats[] = {
{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
};
-/** Private data for gen4 render accel implementation. */
-struct gen4_render_state {
- unsigned char *state_addr;
- unsigned int state_offset;
-};
-
static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format,
uint32_t *sblend, uint32_t *dblend)
{
@@ -522,6 +516,12 @@ typedef struct _gen4_state {
uint8_t other_state[65536];
} gen4_state_t;
+/** Private data for gen4 render accel implementation. */
+struct gen4_render_state {
+ gen4_state_t *card_state;
+ uint32_t card_state_offset;
+};
+
/**
* Sets up the SF state pointing at an SF kernel.
*
@@ -697,12 +697,14 @@ wm_state_init (struct brw_wm_unit_state *wm_state,
* Called at EnterVT to fill in our state buffer with any static information.
*/
static void
-gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
+gen4_state_init (struct gen4_render_state *render_state)
{
int i, j, k, l;
+ gen4_state_t *card_state = render_state->card_state;
+ uint32_t state_base_offset = render_state->card_state_offset;
#define KERNEL_COPY(kernel) \
- memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static))
+ memcpy(card_state->kernel, kernel ## _static, sizeof(kernel ## _static))
KERNEL_COPY (sip_kernel);
KERNEL_COPY (sf_kernel);
@@ -717,20 +719,20 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
KERNEL_COPY (ps_kernel_masknoca_projective);
#undef KERNEL_COPY
- memset(&state->sampler_default_color, 0,
- sizeof(state->sampler_default_color));
- state->sampler_default_color.color[0] = 0.0; /* R */
- state->sampler_default_color.color[1] = 0.0; /* G */
- state->sampler_default_color.color[2] = 0.0; /* B */
- state->sampler_default_color.color[3] = 0.0; /* A */
+ memset(&card_state->sampler_default_color, 0,
+ sizeof(card_state->sampler_default_color));
+ card_state->sampler_default_color.color[0] = 0.0; /* R */
+ card_state->sampler_default_color.color[1] = 0.0; /* G */
+ card_state->sampler_default_color.color[2] = 0.0; /* B */
+ card_state->sampler_default_color.color[3] = 0.0; /* A */
- state->cc_viewport.min_depth = -1.e35;
- state->cc_viewport.max_depth = 1.e35;
+ card_state->cc_viewport.min_depth = -1.e35;
+ card_state->cc_viewport.max_depth = 1.e35;
- sf_state_init (&state->sf_state,
+ sf_state_init (&card_state->sf_state,
state_base_offset +
offsetof (gen4_state_t, sf_kernel));
- sf_state_init (&state->sf_state_mask,
+ sf_state_init (&card_state->sf_state_mask,
state_base_offset +
offsetof (gen4_state_t, sf_kernel_mask));
@@ -738,12 +740,12 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) {
for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) {
for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) {
- sampler_state_init (&state->sampler_state[i][j][k][l][0],
+ sampler_state_init (&card_state->sampler_state[i][j][k][l][0],
i, j,
state_base_offset +
offsetof (gen4_state_t,
sampler_default_color));
- sampler_state_init (&state->sampler_state[i][j][k][l][1],
+ sampler_state_init (&card_state->sampler_state[i][j][k][l][1],
k, l,
state_base_offset +
offsetof (gen4_state_t,
@@ -756,14 +758,14 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) {
- cc_state_init (&state->cc_state[i][j].state, i, j,
+ cc_state_init (&card_state->cc_state[i][j].state, i, j,
state_base_offset +
offsetof (gen4_state_t, cc_viewport));
}
}
#define SETUP_WM_STATE(kernel, has_mask) \
- wm_state_init(&state->wm_state_ ## kernel [i][j][k][l], \
+ wm_state_init(&card_state->wm_state_ ## kernel [i][j][k][l], \
has_mask, \
state_base_offset + offsetof(gen4_state_t, \
wm_scratch), \
@@ -1500,17 +1502,18 @@ void
gen4_render_state_init(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
- struct gen4_render_state *state;
+ struct gen4_render_state *render_state;
if (pI830->gen4_render_state == NULL)
- pI830->gen4_render_state = calloc(sizeof(*state), 1);
+ pI830->gen4_render_state = calloc(sizeof(*render_state), 1);
- state = pI830->gen4_render_state;
+ render_state = pI830->gen4_render_state;
- state->state_offset = pI830->gen4_render_state_mem->offset;
- state->state_addr = pI830->FbBase + pI830->gen4_render_state_mem->offset;
+ render_state->card_state_offset = pI830->gen4_render_state_mem->offset;
+ render_state->card_state = (gen4_state_t *)
+ (pI830->FbBase + render_state->card_state_offset);
- gen4_state_init((gen4_state_t *)state->state_addr, state->state_offset);
+ gen4_state_init(render_state);
}
/**
@@ -1521,7 +1524,7 @@ gen4_render_state_cleanup(ScrnInfoPtr pScrn)
{
I830Ptr pI830 = I830PTR(pScrn);
- pI830->gen4_render_state->state_addr = NULL;
+ pI830->gen4_render_state->card_state = NULL;
}
/**
commit 456bb529335c28cf60f3b6e5900b804efa5c185a
Author: Carl Worth <cworth at cworth.org>
Date: Fri Nov 9 17:24:21 2007 -0800
Associate one sf_state object with each sf_kernel
(cherry picked from a2b5c23184d19b386fdfd04f578a55566df60132 commit)
diff --git a/src/i965_render.c b/src/i965_render.c
index 5a112d1..bed49df 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -265,6 +265,24 @@ i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+/* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 0
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
+
static int urb_vs_start, urb_vs_size;
static int urb_gs_start, urb_gs_size;
static int urb_clip_start, urb_clip_size;
@@ -276,14 +294,13 @@ static struct brw_surface_state *src_surf_state, src_surf_state_local;
static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
static struct brw_vs_unit_state *vs_state, vs_state_local;
-static struct brw_sf_unit_state *sf_state, sf_state_local;
static uint32_t *binding_table;
static int binding_table_entries;
static int dest_surf_offset, src_surf_offset, mask_surf_offset;
static int vs_offset;
-static int sf_offset, vb_offset;
+static int vb_offset;
static int binding_table_offset;
static int next_offset, total_state_size;
static char *state_base;
@@ -473,6 +490,11 @@ typedef struct _gen4_state {
KERNEL_DECL (ps_kernel_masknoca_affine);
KERNEL_DECL (ps_kernel_masknoca_projective);
+ struct brw_sf_unit_state sf_state;
+ PAD64 (brw_sf_unit_state, 0);
+ struct brw_sf_unit_state sf_state_mask;
+ PAD64 (brw_sf_unit_state, 1);
+
WM_STATE_DECL (nomask_affine);
WM_STATE_DECL (nomask_projective);
WM_STATE_DECL (maskca_affine);
@@ -500,6 +522,49 @@ typedef struct _gen4_state {
uint8_t other_state[65536];
} gen4_state_t;
+/**
+ * Sets up the SF state pointing at an SF kernel.
+ *
+ * The SF kernel does coord interp: for each attribute,
+ * calculate dA/dx and dA/dy. Hand these interpolation coefficients
+ * back to SF which then hands pixels off to WM.
+ */
+static void
+sf_state_init (struct brw_sf_unit_state *sf_state, int kernel_offset)
+{
+ memset(sf_state, 0, sizeof(*sf_state));
+ sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->sf1.single_program_flow = 1;
+ sf_state->sf1.binding_table_entry_count = 0;
+ sf_state->sf1.thread_priority = 0;
+ sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
+ sf_state->sf1.illegal_op_exception_enable = 1;
+ sf_state->sf1.mask_stack_exception_enable = 1;
+ sf_state->sf1.sw_exception_enable = 1;
+ sf_state->thread2.per_thread_scratch_space = 0;
+ /* scratch space is not used in our kernel */
+ sf_state->thread2.scratch_space_base_pointer = 0;
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ /* don't smash vertex header, read start from dw8 */
+ sf_state->thread3.urb_entry_read_offset = 1;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ assert((kernel_offset & 63) == 0);
+ sf_state->thread0.kernel_start_pointer = kernel_offset >> 6;
+}
+
static void
sampler_state_init (struct brw_sampler_state *sampler_state,
sampler_state_filter_t filter,
@@ -662,6 +727,13 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset)
state->cc_viewport.min_depth = -1.e35;
state->cc_viewport.max_depth = 1.e35;
+ sf_state_init (&state->sf_state,
+ state_base_offset +
+ offsetof (gen4_state_t, sf_kernel));
+ sf_state_init (&state->sf_state_mask,
+ state_base_offset +
+ offsetof (gen4_state_t, sf_kernel_mask));
+
for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) {
for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) {
for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) {
@@ -772,6 +844,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
mask_tiled = 0;
uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0,
dst_tiled = 0;
+ uint32_t sf_state_offset;
sampler_state_filter_t src_filter, mask_filter;
sampler_state_extend_t src_extend, mask_extend;
Bool is_affine_src, is_affine_mask, is_affine;
@@ -828,9 +901,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
vs_offset = ALIGN(next_offset, 64);
next_offset = vs_offset + sizeof(*vs_state);
- sf_offset = ALIGN(next_offset, 32);
- next_offset = sf_offset + sizeof(*sf_state);
-
/* Align VB to native size of elements, for safety */
vb_offset = ALIGN(next_offset, 32);
next_offset = vb_offset + vb_size;
@@ -862,24 +932,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
vb = (void *)(state_base + vb_offset);
- /* Set up a default static partitioning of the URB, which is supposed to
- * allow anything we would want to do, at potentially lower performance.
- */
-#define URB_CS_ENTRY_SIZE 0
-#define URB_CS_ENTRIES 0
-
-#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
-#define URB_VS_ENTRIES 8 // we needs at least 8 entries
-
-#define URB_GS_ENTRY_SIZE 0
-#define URB_GS_ENTRIES 0
-
-#define URB_CLIP_ENTRY_SIZE 0
-#define URB_CLIP_ENTRIES 0
-
-#define URB_SF_ENTRY_SIZE 2
-#define URB_SF_ENTRIES 1
-
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
@@ -1026,49 +1078,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
vs_state = (void *)(state_base + vs_offset);
memcpy (vs_state, &vs_state_local, sizeof (vs_state_local));
- /* Set up the SF kernel to do coord interp: for each attribute,
- * calculate dA/dx and dA/dy. Hand these interpolation coefficients
- * back to SF which then hands pixels off to WM.
- */
- sf_state = &sf_state_local;
- memset(sf_state, 0, sizeof(*sf_state));
- if (pMask) {
- sf_state->thread0.kernel_start_pointer = (state_base_offset +
- offsetof(gen4_state_t, sf_kernel_mask)) >> 6;
- } else {
- sf_state->thread0.kernel_start_pointer = (state_base_offset +
- offsetof(gen4_state_t, sf_kernel)) >> 6;
- }
- sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
- sf_state->sf1.single_program_flow = 1;
- sf_state->sf1.binding_table_entry_count = 0;
- sf_state->sf1.thread_priority = 0;
- sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
- sf_state->sf1.illegal_op_exception_enable = 1;
- sf_state->sf1.mask_stack_exception_enable = 1;
- sf_state->sf1.sw_exception_enable = 1;
- sf_state->thread2.per_thread_scratch_space = 0;
- /* scratch space is not used in our kernel */
- sf_state->thread2.scratch_space_base_pointer = 0;
- sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
- sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
- sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
- /* don't smash vertex header, read start from dw8 */
- sf_state->thread3.urb_entry_read_offset = 1;
- sf_state->thread3.dispatch_grf_start_reg = 3;
- sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
- sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
- sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
- sf_state->thread4.stats_enable = 1;
- sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
- sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
- sf_state->sf6.scissor = 0;
- sf_state->sf7.trifan_pv = 2;
- sf_state->sf6.dest_org_vbias = 0x8;
- sf_state->sf6.dest_org_hbias = 0x8;
-
- sf_state = (void *)(state_base + sf_offset);
- memcpy (sf_state, &sf_state_local, sizeof (sf_state_local));
/* Begin the long sequence of commands needed to set up the 3D
* rendering pipe
@@ -1151,7 +1160,16 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
OUT_BATCH(state_base_offset + vs_offset); /* 32 byte aligned */
OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
- OUT_BATCH(state_base_offset + sf_offset); /* 32 byte aligned */
+
+ if (pMask) {
+ sf_state_offset = state_base_offset +
+ offsetof(gen4_state_t, sf_state_mask);
+ } else {
+ sf_state_offset = state_base_offset +
+ offsetof(gen4_state_t, sf_state);
+ }
+ assert((sf_state_offset & 31) == 0);
+ OUT_BATCH(sf_state_offset);
/* Shorthand for long array lookup */
#define OUT_WM_KERNEL(kernel) do { \
More information about the xorg-commit
mailing list