xf86-video-intel: Branch 'modesetting' - 29 commits - src/exa_sf.g4a src/exa_sf_mask.g4a src/exa_sf_mask_prog.h src/exa_sf_prog.h src/exa_wm_masknoca.g4a src/exa_wm_masknoca_prog.h src/exa_wm_nomask.g4a src/exa_wm_nomask_prog.h src/i830_driver.c src/i830_exa.c src/i830.h src/i830_memory.c src/i830_rotate.c src/i830_tv.c src/i965_exa_render.c src/Makefile.am src/rotation_sf0.g4a src/rotation_sf90.g4a src/rotation_sf_prog0.h src/rotation_sf_prog90.h src/rotation_wm0.g4a src/rotation_wm90.g4a src/rotation_wm_prog0.h src/rotation_wm_prog90.h
Keith Packard
keithp at kemper.freedesktop.org
Sun Jan 7 01:08:30 EET 2007
src/Makefile.am | 11
src/exa_sf.g4a | 17
src/exa_sf_mask.g4a | 53 ++
src/exa_sf_mask_prog.h | 25 +
src/exa_sf_prog.h | 17
src/exa_wm_masknoca.g4a | 202 ++++++++
src/exa_wm_masknoca_prog.h | 95 +++
src/exa_wm_nomask.g4a | 143 +++++
src/exa_wm_nomask_prog.h | 70 ++
src/i830.h | 3
src/i830_driver.c | 27 -
src/i830_exa.c | 11
src/i830_memory.c | 76 ++-
src/i830_rotate.c | 753 ++++++++++++++++++++++++++++++
src/i830_tv.c | 177 ++++++-
src/i965_exa_render.c | 1108 +++++++++++++++++++++++++++++++++++++++++++++
src/rotation_sf0.g4a | 17
src/rotation_sf90.g4a | 17
src/rotation_sf_prog0.h | 17
src/rotation_sf_prog90.h | 17
src/rotation_wm0.g4a | 123 ++++
src/rotation_wm90.g4a | 127 +++++
src/rotation_wm_prog0.h | 68 ++
src/rotation_wm_prog90.h | 68 ++
24 files changed, 3201 insertions(+), 41 deletions(-)
New commits:
diff-tree 736d82a6b43f174cb95b425faacd4b0b889916fa (from parents)
Merge: 53b42f5bc7a58d02106436486e5bb56e56dbbfa1 4c790f614ecba1f6468e51779cfaf0e36b6b17ad
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Mon Dec 4 15:48:04 2006 +0800
Merge branch 'modesetting-origin' into modesetting
diff-tree 53b42f5bc7a58d02106436486e5bb56e56dbbfa1 (from 71946bcdc3c68c220996afac944698eea1974a36)
Author: Zou Nan hai <nanhai.zou at intel.com>
Date: Sat Jan 6 14:59:14 2007 -0800
support NTSC 480i M-J, PAL 576i for 640x480-1280x1024 sizes
I still have problem with non-interlace mode and Hi Res mode.
also I don't know how to pickup those mode in xorg.conf
diff --git a/src/i830_tv.c b/src/i830_tv.c
index 5cf36a5..4a79d2e 100644
--- a/src/i830_tv.c
+++ b/src/i830_tv.c
@@ -250,6 +250,53 @@ const tv_mode_t tv_modes[] = {
.ru =-0.0957, .gu =-0.1879, .bu = 0.2836, .au = 1.0000,
.rv = 0.3992, .gv =-0.3343, .bv =-0.0649, .av = 1.0000,
},
+ },
+ {
+ /* 625 Lines, 50 Fields, 15.625KHz line, Sub-Carrier 4.434MHz */
+ .name = "PAL 576i",
+ .oversample = TV_OVERSAMPLE_8X,
+
+ .hsync_end = 64, .hblank_end = 128,
+ .hblank_start = 844, .htotal = 863,
+
+ .progressive = FALSE,
+
+ .vsync_start_f1 = 6, .vsync_start_f2 = 7,
+ .vsync_len = 6,
+
+ .veq_ena = TRUE, .veq_start_f1 = 0,
+ .veq_start_f2 = 1, .veq_len = 18,
+
+ .vi_end_f1 = 24, .vi_end_f2 = 25,
+ .nbr_end = 286,
+
+ .burst_ena = TRUE,
+ .hburst_start = 73, .hburst_len = 34,
+ .vburst_start_f1 = 8, .vburst_end_f1 = 285,
+ .vburst_start_f2 = 8, .vburst_end_f2 = 286,
+ .vburst_start_f3 = 9, .vburst_end_f3 = 286,
+ .vburst_start_f4 = 9, .vburst_end_f4 = 285,
+
+ /* desired 4.4336180 actual 4.4336180 clock 107.52 */
+ .dda1_inc = 168,
+ .dda2_inc = 18557, .dda2_size = 20625,
+ .dda3_inc = 0, .dda3_size = 0,
+ .sc_reset = TV_SC_RESET_EVERY_8,
+ .pal_burst = TRUE,
+
+ .composite_levels = { .blank = 237, .black = 237, .burst = 118 },
+ .composite_color = {
+ .ry = 0.2990, .gy = 0.5870, .by = 0.1140, .ay = 0.5379,
+ .ru =-0.0793, .gu =-0.1557, .bu = 0.2350, .au = 1.0000,
+ .rv = 0.3307, .gv =-0.2769, .bv =-0.0538, .av = 1.0000,
+ },
+
+ .svideo_levels = { .blank = 280, .black = 280, .burst = 139 },
+ .svideo_color = {
+ .ry = 0.2990, .gy = 0.5870, .by = 0.1140, .ay = 0.6357,
+ .ru =-0.0937, .gu =-0.1840, .bu = 0.2777, .au = 1.0000,
+ .rv = 0.3908, .gv =-0.3273, .bv =-0.0636, .av = 1.0000,
+ },
}
#if 0
{
@@ -802,22 +849,122 @@ i830_tv_mode_set(xf86OutputPtr output, D
}
static const DisplayModeRec reported_modes[] = {
- {
- .name = "NTSC 480i",
- .Clock = TV_PLL_CLOCK,
-
- .HDisplay = 1024,
- .HSyncStart = 1048,
- .HSyncEnd = 1184,
- .HTotal = 1344,
-
- .VDisplay = 768,
- .VSyncStart = 771,
- .VSyncEnd = 777,
- .VTotal = 806,
+ {
+ .name = "NTSC 480i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 1280,
+ .HSyncStart = 1368,
+ .HSyncEnd = 1496,
+ .HTotal = 1712,
+
+ .VDisplay = 1024,
+ .VSyncStart = 1027,
+ .VSyncEnd = 1034,
+ .VTotal = 1104,
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "NTSC 480i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 1024,
+ .HSyncStart = 1080,
+ .HSyncEnd = 1184,
+ .HTotal = 1344,
+
+ .VDisplay = 768,
+ .VSyncStart = 771,
+ .VSyncEnd = 777,
+ .VTotal = 806,
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "NTSC 480i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 800,
+ .HSyncStart = 832,
+ .HSyncEnd = 912,
+ .HTotal = 1024,
+
+ .VDisplay = 600,
+ .VSyncStart = 603,
+ .VSyncEnd = 607,
+ .VTotal = 650,
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "NTSC 480i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 640,
+ .HSyncStart = 664,
+ .HSyncEnd = 720,
+ .HTotal = 800,
+
+ .VDisplay = 480,
+ .VSyncStart = 483,
+ .VSyncEnd = 487,
+ .VTotal = 552,
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "PAL 576i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 1280,
+ .HSyncStart = 1352,
+ .HSyncEnd = 1480,
+ .HTotal = 1680,
+
+ .VDisplay = 1024,
+ .VSyncStart = 1027,
+ .VSyncEnd = 1034,
+ .VTotal = 1092,
- .type = M_T_DRIVER
- }
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "PAL 576i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 1024,
+ .HSyncStart = 1072,
+ .HSyncEnd = 1168,
+ .HTotal = 1312,
+ .VDisplay = 768,
+ .VSyncStart = 771,
+ .VSyncEnd = 775,
+ .VTotal = 820,
+ .VRefresh = 50.0f,
+
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "PAL 576i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 800,
+ .HSyncStart = 832,
+ .HSyncEnd = 904,
+ .HTotal = 1008,
+ .VDisplay = 600,
+ .VSyncStart = 603,
+ .VSyncEnd = 607,
+ .VTotal = 642,
+ .VRefresh = 50.0f,
+
+ .type = M_T_DRIVER
+ },
+ {
+ .name = "PAL 576i",
+ .Clock = TV_PLL_CLOCK,
+ .HDisplay = 640,
+ .HSyncStart = 664,
+ .HSyncEnd = 720,
+ .HTotal = 800,
+
+ .VDisplay = 480,
+ .VSyncStart = 483,
+ .VSyncEnd = 487,
+ .VTotal = 516,
+ .VRefresh = 50.0f,
+ .type = M_T_DRIVER
+ },
};
/**
diff-tree 71946bcdc3c68c220996afac944698eea1974a36 (from 35cebed70827999812f8343ac97ad0dffda20786)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Jan 3 22:37:32 2007 -0800
[PATCH] Add rotation support for 965.
diff --git a/src/i830.h b/src/i830.h
index 3b7301e..f89d022 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -288,6 +288,7 @@ typedef struct _I830Rec {
XF86ModReqInfo shadowReq; /* to test for later libshadow */
I830MemRange RotatedMem;
I830MemRange RotatedMem2;
+ I830MemRange RotateStateMem; /* for G965 state buffer */
Rotation rotation;
int InitialRotation;
int displayWidth;
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 6b76d12..ffa391f 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -2534,6 +2534,10 @@ I830ScreenInit(int scrnIndex, ScreenPtr
/* Rotated2 Buffer */
memset(&(pI830->RotatedMem2), 0, sizeof(pI830->RotatedMem2));
pI830->RotatedMem2.Key = -1;
+ if (IS_I965G(pI830)) {
+ memset(&(pI830->RotateStateMem), 0, sizeof(pI830->RotateStateMem));
+ pI830->RotateStateMem.Key = -1;
+ }
}
#ifdef HAS_MTRR_SUPPORT
@@ -2902,11 +2906,7 @@ I830ScreenInit(int scrnIndex, ScreenPtr
shadowSetup(pScreen);
/* support all rotations */
xf86RandR12Init (pScreen);
- if (IS_I965G(pI830)) {
- xf86RandR12SetRotations (pScreen, RR_Rotate_0); /* only 0 degrees for I965G */
- } else {
- xf86RandR12SetRotations (pScreen, RR_Rotate_0 | RR_Rotate_90 | RR_Rotate_180 | RR_Rotate_270);
- }
+ xf86RandR12SetRotations (pScreen, RR_Rotate_0 | RR_Rotate_90 | RR_Rotate_180 | RR_Rotate_270);
pI830->PointerMoved = pScrn->PointerMoved;
pScrn->PointerMoved = I830PointerMoved;
pI830->CreateScreenResources = pScreen->CreateScreenResources;
@@ -3249,8 +3249,7 @@ I830SwitchMode(int scrnIndex, DisplayMod
* The extra WindowTable check detects a rotation at startup.
*/
if ( (!WindowTable[pScrn->scrnIndex] || pspix->devPrivate.ptr == NULL) &&
- !pI830->DGAactive && (pScrn->PointerMoved == I830PointerMoved) &&
- !IS_I965G(pI830)) {
+ !pI830->DGAactive && (pScrn->PointerMoved == I830PointerMoved)) {
if (!I830Rotate(pScrn, mode))
ret = FALSE;
}
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 60257b9..3a3836c 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -531,6 +531,28 @@ I830AllocateRotatedBuffer(ScrnInfoPtr pS
xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, verbosity,
"%sAllocated %ld kB for the rotated buffer at 0x%lx.\n", s,
alloced / 1024, pI830->RotatedMem.Start);
+
+#define BRW_LINEAR_EXTRA (32*1024)
+ if (IS_I965G(pI830)) {
+ memset(&(pI830->RotateStateMem), 0, sizeof(I830MemRange));
+ pI830->RotateStateMem.Key = -1;
+ size = ROUND_TO_PAGE(BRW_LINEAR_EXTRA);
+ align = GTT_PAGE_SIZE;
+ alloced = I830AllocVidMem(pScrn, &(pI830->RotateStateMem),
+ &(pI830->StolenPool), size, align,
+ flags | FROM_ANYWHERE | ALLOCATE_AT_TOP);
+ if (alloced < size) {
+ if (!dryrun) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "G965: Failed to allocate rotate state buffer space.\n");
+ }
+ return FALSE;
+ }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, verbosity,
+ "%sAllocated %ld kB for the G965 rotate state buffer at 0x%lx - 0x%lx.\n", s,
+ alloced / 1024, pI830->RotateStateMem.Start, pI830->RotateStateMem.End);
+ }
+
return TRUE;
}
@@ -1743,8 +1765,13 @@ I830SetupMemoryTiling(ScrnInfoPtr pScrn)
int i;
/* Clear out */
- for (i = 0; i < 8; i++)
- pI830->ModeReg.Fence[i] = 0;
+ if (IS_I965G(pI830)) {
+ for (i = 0; i < FENCE_NEW_NR*2; i++)
+ pI830->ModeReg.Fence[i] = 0;
+ } else {
+ for (i = 0; i < 8; i++)
+ pI830->ModeReg.Fence[i] = 0;
+ }
nextTile = 0;
tileGeneration = -1;
@@ -1814,6 +1841,9 @@ I830SetupMemoryTiling(ScrnInfoPtr pScrn)
}
}
+/* XXX tiled rotate mem not ready on G965*/
+
+ if(!IS_I965G(pI830)) {
if (pI830->RotatedMem.Alignment >= KB(512)) {
if (MakeTiles(pScrn, &(pI830->RotatedMem), FENCE_XMAJOR)) {
xf86DrvMsg(pScrn->scrnIndex, X_INFO,
@@ -1824,7 +1854,7 @@ I830SetupMemoryTiling(ScrnInfoPtr pScrn)
"MakeTiles failed for the rotated buffer.\n");
}
}
-
+ }
#if 0
if (pI830->RotatedMem2.Alignment >= KB(512)) {
if (MakeTiles(pScrn, &(pI830->RotatedMem2), FENCE_XMAJOR)) {
diff --git a/src/i830_rotate.c b/src/i830_rotate.c
index 9fa3290..b2587b2 100644
--- a/src/i830_rotate.c
+++ b/src/i830_rotate.c
@@ -60,6 +60,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
#include "i830.h"
#include "i915_reg.h"
#include "i915_3d.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
#ifdef XF86DRI
#include "dri.h"
@@ -194,6 +196,718 @@ static void draw_poly(CARD32 *vb,
}
}
+
+/* Our PS kernel uses less than 32 GRF registers (about 20) */
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 32
+
+#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
+
+static const CARD32 ps_kernel_static0[][4] = {
+#include "rotation_wm_prog0.h"
+};
+
+static const CARD32 ps_kernel_static90[][4] = {
+#include "rotation_wm_prog90.h"
+};
+
+#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define BRW_LINEAR_EXTRA (32*1024)
+#define WM_BINDING_TABLE_ENTRIES 2
+
+static const CARD32 sip_kernel_static[][4] = {
+/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */
+ { 0x00000030, 0x20000108, 0x00001220, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+};
+
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 1
+
+static const CARD32 sf_kernel_static0[][4] = {
+#include "rotation_sf_prog0.h"
+};
+
+
+static const CARD32 sf_kernel_static90[][4] = {
+#include "rotation_sf_prog90.h"
+};
+
+static void
+I965UpdateRotate (ScreenPtr pScreen,
+ shadowBufPtr pBuf)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+ I830Ptr pI830 = I830PTR(pScrn);
+ ScrnInfoPtr pScrn1 = pScrn;
+ I830Ptr pI8301 = NULL;
+ RegionPtr damage = shadowDamage(pBuf);
+ int nbox = REGION_NUM_RECTS (damage);
+ BoxPtr pbox = REGION_RECTS (damage);
+ int box_x1, box_x2, box_y1, box_y2;
+ float verts[4][2];
+ struct matrix23 rotMatrix;
+ Bool updateInvarient = FALSE;
+#ifdef XF86DRI
+ drmI830Sarea *sarea = NULL;
+ drm_context_t myContext = 0;
+#endif
+ Bool didLock = FALSE;
+
+/* Gen4 states */
+ int urb_vs_start, urb_vs_size;
+ int urb_gs_start, urb_gs_size;
+ int urb_clip_start, urb_clip_size;
+ int urb_sf_start, urb_sf_size;
+ int urb_cs_start, urb_cs_size;
+ struct brw_surface_state *dest_surf_state;
+ struct brw_surface_state *src_surf_state;
+ struct brw_sampler_state *src_sampler_state;
+ struct brw_vs_unit_state *vs_state;
+ struct brw_sf_unit_state *sf_state;
+ struct brw_wm_unit_state *wm_state;
+ struct brw_cc_unit_state *cc_state;
+ struct brw_cc_viewport *cc_viewport;
+ struct brw_instruction *sf_kernel;
+ struct brw_instruction *ps_kernel;
+ struct brw_instruction *sip_kernel;
+ float *vb;
+ BOOL first_output = TRUE;
+ CARD32 *binding_table;
+ int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset;
+ int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
+ int wm_scratch_offset;
+ int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+ int binding_table_offset;
+ int next_offset, total_state_size;
+ int vb_size = (4 * 4) * 4; /* 4 DWORDS per vertex */
+ char *state_base;
+ int state_base_offset;
+
+ DPRINTF(PFX, "I965UpdateRotate: from (%d x %d) -> (%d x %d)\n",
+ pScrn->virtualX, pScrn->virtualY, pScreen->width, pScreen->height);
+
+ if (I830IsPrimary(pScrn)) {
+ pI8301 = pI830;
+ } else {
+ pI8301 = I830PTR(pI830->entityPrivate->pScrn_1);
+ pScrn1 = pI830->entityPrivate->pScrn_1;
+ }
+
+ switch (pI830->rotation) {
+ case RR_Rotate_90:
+ matrix23Rotate(&rotMatrix,
+ pScreen->width, pScreen->height,
+ 90);
+ break;
+ case RR_Rotate_180:
+ matrix23Rotate(&rotMatrix,
+ pScreen->width, pScreen->height,
+ 180);
+ break;
+ case RR_Rotate_270:
+ matrix23Rotate(&rotMatrix,
+ pScreen->width, pScreen->height,
+ 270);
+ break;
+ default:
+ break;
+ }
+
+#ifdef XF86DRI
+ if (pI8301->directRenderingEnabled) {
+ sarea = DRIGetSAREAPrivate(pScrn1->pScreen);
+ myContext = DRIGetContext(pScrn1->pScreen);
+ didLock = I830DRILock(pScrn1);
+ }
+#endif
+
+ if (pScrn->scrnIndex != *pI830->used3D)
+ updateInvarient = TRUE;
+
+#ifdef XF86DRI
+ if (sarea && sarea->ctxOwner != myContext)
+ updateInvarient = TRUE;
+#endif
+
+ /*XXX we'll always update state */
+ *pI830->used3D = pScrn->scrnIndex;
+#ifdef XF86DRI
+ if (sarea)
+ sarea->ctxOwner = myContext;
+#endif
+
+ /* this starts initialize 3D engine for rotation mapping*/
+ next_offset = 0;
+
+ /* Set up our layout of state in framebuffer. First the general state: */
+ vs_offset = ALIGN(next_offset, 64);
+ next_offset = vs_offset + sizeof(*vs_state);
+ sf_offset = ALIGN(next_offset, 32);
+ next_offset = sf_offset + sizeof(*sf_state);
+ wm_offset = ALIGN(next_offset, 32);
+ next_offset = wm_offset + sizeof(*wm_state);
+ wm_scratch_offset = ALIGN(next_offset, 1024);
+ next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
+ cc_offset = ALIGN(next_offset, 32);
+ next_offset = cc_offset + sizeof(*cc_state);
+
+ sf_kernel_offset = ALIGN(next_offset, 64);
+
+ switch (pI830->rotation) {
+ case RR_Rotate_90:
+ case RR_Rotate_270:
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static90);
+ ps_kernel_offset = ALIGN(next_offset, 64);
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static90);
+ break;
+ case RR_Rotate_180:
+ default:
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static0);
+ ps_kernel_offset = ALIGN(next_offset, 64);
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static0);
+ break;
+ }
+
+ sip_kernel_offset = ALIGN(next_offset, 64);
+ next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
+ cc_viewport_offset = ALIGN(next_offset, 32);
+ next_offset = cc_viewport_offset + sizeof(*cc_viewport);
+
+ src_sampler_offset = ALIGN(next_offset, 32);
+ next_offset = src_sampler_offset + sizeof(*src_sampler_state);
+
+ /* Align VB to native size of elements, for safety */
+ vb_offset = ALIGN(next_offset, 8);
+ next_offset = vb_offset + vb_size;
+
+ dest_surf_offset = ALIGN(next_offset, 32);
+ next_offset = dest_surf_offset + sizeof(*dest_surf_state);
+ src_surf_offset = ALIGN(next_offset, 32);
+ next_offset = src_surf_offset + sizeof(*src_surf_state);
+ binding_table_offset = ALIGN(next_offset, 32);
+ next_offset = binding_table_offset + (WM_BINDING_TABLE_ENTRIES * 4);
+
+ total_state_size = next_offset;
+ assert (total_state_size < BRW_LINEAR_EXTRA);
+
+ state_base_offset = pI830->RotateStateMem.Start;
+ state_base_offset = ALIGN(state_base_offset, 64);
+ state_base = (char *)(pI830->FbBase + state_base_offset);
+ DPRINTF(PFX, "rotate state buffer start 0x%x, addr 0x%x, base 0x%x\n",
+ pI830->RotateStateMem.Start, state_base, pI830->FbBase);
+
+ vs_state = (void *)(state_base + vs_offset);
+ sf_state = (void *)(state_base + sf_offset);
+ wm_state = (void *)(state_base + wm_offset);
+ cc_state = (void *)(state_base + cc_offset);
+ sf_kernel = (void *)(state_base + sf_kernel_offset);
+ ps_kernel = (void *)(state_base + ps_kernel_offset);
+ sip_kernel = (void *)(state_base + sip_kernel_offset);
+
+ cc_viewport = (void *)(state_base + cc_viewport_offset);
+ dest_surf_state = (void *)(state_base + dest_surf_offset);
+ src_surf_state = (void *)(state_base + src_surf_offset);
+ src_sampler_state = (void *)(state_base + src_sampler_offset);
+ binding_table = (void *)(state_base + binding_table_offset);
+ vb = (void *)(state_base + vb_offset);
+
+ /* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it.
+ * A VUE consists of a 256-bit vertex header followed by the vertex data,
+ * which in our case is 4 floats (128 bits), thus a single 512-bit URB
+ * entry.
+ */
+#define URB_VS_ENTRIES 8
+#define URB_VS_ENTRY_SIZE 1
+
+#define URB_GS_ENTRIES 0
+#define URB_GS_ENTRY_SIZE 0
+
+#define URB_CLIP_ENTRIES 0
+#define URB_CLIP_ENTRY_SIZE 0
+
+ /* The SF kernel we use outputs only 4 256-bit registers, leading to an
+ * entry size of 2 512-bit URBs. We don't need to have many entries to
+ * output as we're generally working on large rectangles and don't care
+ * about having WM threads running on different rectangles simultaneously.
+ */
+#define URB_SF_ENTRIES 1
+#define URB_SF_ENTRY_SIZE 2
+
+#define URB_CS_ENTRIES 0
+#define URB_CS_ENTRY_SIZE 0
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ memset (cc_viewport, 0, sizeof (*cc_viewport));
+ cc_viewport->min_depth = -1.e35;
+ cc_viewport->max_depth = 1.e35;
+
+ memset(cc_state, 0, sizeof(*cc_state));
+ cc_state->cc0.stencil_enable = 0; /* disable stencil */
+ cc_state->cc2.depth_test = 0; /* disable depth test */
+ cc_state->cc2.logicop_enable = 1; /* enable logic op */
+ cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */
+ cc_state->cc3.blend_enable = 0; /* disable color blend */
+ cc_state->cc3.alpha_test = 0; /* disable alpha test */
+ cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
+ cc_state->cc5.dither_enable = 0; /* disable dither */
+ cc_state->cc5.logicop_func = 0xc; /* COPY S*/
+ cc_state->cc5.statistics_enable = 1;
+ cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
+ cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
+ cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+
+ /* Upload system kernel */
+ memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
+
+ memset(dest_surf_state, 0, sizeof(*dest_surf_state));
+ dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+ dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
+ if (pI8301->cpp == 2)
+ dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ else
+ dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ dest_surf_state->ss0.writedisable_alpha = 0;
+ dest_surf_state->ss0.writedisable_red = 0;
+ dest_surf_state->ss0.writedisable_green = 0;
+ dest_surf_state->ss0.writedisable_blue = 0;
+ dest_surf_state->ss0.color_blend = 0;
+ dest_surf_state->ss0.vert_line_stride = 0;
+ dest_surf_state->ss0.vert_line_stride_ofs = 0;
+ dest_surf_state->ss0.mipmap_layout_mode = 0;
+ dest_surf_state->ss0.render_cache_read_mode = 0;
+
+ if (I830IsPrimary(pScrn))
+ dest_surf_state->ss1.base_addr = pI830->FrontBuffer.Start;
+ else
+ dest_surf_state->ss1.base_addr = pI8301->FrontBuffer2.Start;
+ dest_surf_state->ss2.width = pScrn->virtualX - 1;
+ dest_surf_state->ss2.height = pScrn->virtualY - 1;
+ dest_surf_state->ss2.mip_count = 0;
+ dest_surf_state->ss2.render_target_rotation = 0; /*XXX how to use? */
+ dest_surf_state->ss3.pitch = (pI830->displayWidth * pI830->cpp) - 1;
+ if (pI830->front_tiled) {
+ dest_surf_state->ss3.tiled_surface = 1;
+ dest_surf_state->ss3.tile_walk = 0; /* X major */
+ }
+
+ memset(src_surf_state, 0, sizeof(*src_surf_state));
+ src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+/* src_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;*/
+ if (pI8301->cpp == 2)
+ src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ else
+ src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ src_surf_state->ss0.writedisable_alpha = 0;
+ src_surf_state->ss0.writedisable_red = 0;
+ src_surf_state->ss0.writedisable_green = 0;
+ src_surf_state->ss0.writedisable_blue = 0;
+ src_surf_state->ss0.color_blend = 0;
+ src_surf_state->ss0.vert_line_stride = 0;
+ src_surf_state->ss0.vert_line_stride_ofs = 0;
+ src_surf_state->ss0.mipmap_layout_mode = 0;
+ src_surf_state->ss0.render_cache_read_mode = 0;
+
+ if (I830IsPrimary(pScrn))
+ src_surf_state->ss1.base_addr = pI830->RotatedMem.Start;
+ else
+ src_surf_state->ss1.base_addr = pI8301->RotatedMem2.Start;
+ src_surf_state->ss2.width = pScreen->width - 1;
+ src_surf_state->ss2.height = pScreen->height - 1;
+ src_surf_state->ss2.mip_count = 0;
+ src_surf_state->ss2.render_target_rotation = 0;
+ src_surf_state->ss3.pitch = (pScrn->displayWidth * pI830->cpp) - 1;
+ if (pI830->rotated_tiled) {
+ src_surf_state->ss3.tiled_surface = 1;
+ src_surf_state->ss3.tile_walk = 0; /* X major */
+ }
+
+ binding_table[0] = state_base_offset + dest_surf_offset;
+ binding_table[1] = state_base_offset + src_surf_offset;
+
+ memset(src_sampler_state, 0, sizeof(*src_sampler_state));
+ src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ memset(vs_state, 0, sizeof(*vs_state));
+ vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
+ vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+ vs_state->vs6.vs_enable = 0;
+ vs_state->vs6.vert_cache_disable = 1;
+
+ /* Set up the SF kernel to do coord interp: for each attribute,
+ * calculate dA/dx and dA/dy. Hand these interpolation coefficients
+ * back to SF which then hands pixels off to WM.
+ */
+
+ switch (pI830->rotation) {
+ case RR_Rotate_90:
+ case RR_Rotate_270:
+ memcpy (sf_kernel, sf_kernel_static90, sizeof (sf_kernel_static90));
+ memcpy (ps_kernel, ps_kernel_static90, sizeof (ps_kernel_static90));
+ break;
+ case RR_Rotate_180:
+ default:
+ memcpy (sf_kernel, sf_kernel_static0, sizeof (sf_kernel_static0));
+ memcpy (ps_kernel, ps_kernel_static0, sizeof (ps_kernel_static0));
+ break;
+ }
+
+ memset(sf_state, 0, sizeof(*sf_state));
+ sf_state->thread0.kernel_start_pointer =
+ (state_base_offset + sf_kernel_offset) >> 6;
+ sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+ sf_state->sf1.single_program_flow = 1; /* XXX */
+ sf_state->sf1.binding_table_entry_count = 0;
+ sf_state->sf1.thread_priority = 0;
+ sf_state->sf1.floating_point_mode = 0;
+ sf_state->sf1.illegal_op_exception_enable = 1;
+ sf_state->sf1.mask_stack_exception_enable = 1;
+ sf_state->sf1.sw_exception_enable = 1;
+ sf_state->thread2.per_thread_scratch_space = 0;
+ sf_state->thread2.scratch_space_base_pointer = 0; /* not used in our kernel */
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ sf_state->thread3.urb_entry_read_offset = 0;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ memset (wm_state, 0, sizeof (*wm_state));
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset + ps_kernel_offset) >> 6;
+ wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+ wm_state->thread1.single_program_flow = 1; /* XXX */
+ wm_state->thread1.binding_table_entry_count = 2;
+ /* Though we never use the scratch space in our WM kernel, it has to be
+ * set, and the minimum allocation is 1024 bytes.
+ */
+ wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
+ wm_scratch_offset) >> 10;
+ wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
+ wm_state->thread3.dispatch_grf_start_reg = 3;
+ wm_state->thread3.const_urb_entry_read_length = 0;
+ wm_state->thread3.const_urb_entry_read_offset = 0;
+ wm_state->thread3.urb_entry_read_length = 1;
+ wm_state->thread3.urb_entry_read_offset = 0;
+ wm_state->wm4.stats_enable = 1;
+ wm_state->wm4.sampler_state_pointer = (state_base_offset + src_sampler_offset) >> 5;
+ wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
+ wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
+ wm_state->wm5.thread_dispatch_enable = 1;
+ wm_state->wm5.enable_16_pix = 1;
+ wm_state->wm5.enable_8_pix = 0;
+ wm_state->wm5.early_depth_test = 1;
+
+
+ {
+ BEGIN_LP_RING(2);
+ OUT_RING(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ BRW_MI_GLOBAL_SNAPSHOT_RESET);
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+
+ {
+ BEGIN_LP_RING(12);
+ OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ /* Mesa does this. Who knows... */
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING((0 << 4) | /* URB Entry Allocation Size */
+ (0 << 0)); /* Number of URB Entries */
+
+ /* Zero out the two base address registers so all offsets are absolute */
+ OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
+
+ /* Set system instruction pointer */
+ OUT_RING(BRW_STATE_SIP | 0);
+ OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
+
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+
+
+ {
+ BEGIN_LP_RING(36);
+ /* Enable VF statistics */
+ OUT_RING(BRW_3DSTATE_VF_STATISTICS | 1);
+
+ /* Pipe control */
+ OUT_RING(BRW_PIPE_CONTROL |
+ BRW_PIPE_CONTROL_NOWRITE |
+ BRW_PIPE_CONTROL_IS_FLUSH |
+ 2);
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
+
+ /* Binding table pointers */
+ OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_RING(0); /* vs */
+ OUT_RING(0); /* gs */
+ OUT_RING(0); /* clip */
+ OUT_RING(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_RING(state_base_offset + binding_table_offset); /* ps */
+
+ /* XXX: Blend constant color (magenta is fun) */
+ //OUT_RING(BRW_3DSTATE_CONSTANT_COLOR | 3);
+ //OUT_RING(float_to_uint (1.0));
+ //OUT_RING(float_to_uint (0.0));
+ //OUT_RING(float_to_uint (1.0));
+ //OUT_RING(float_to_uint (1.0));
+
+ /* The drawing rectangle clipping is always on. Set it to values that
+ * shouldn't do any clipping.
+ */
+ OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
+ OUT_RING(0x00000000); /* ymin, xmin */
+ OUT_RING((pScrn->virtualX - 1) |
+ (pScrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_RING(0x00000000); /* yorigin, xorigin */
+
+ /* skip the depth buffer */
+ /* skip the polygon stipple */
+ /* skip the polygon stipple offset */
+ /* skip the line stipple */
+
+ /* Set the pointers to the 3d pipeline state */
+ OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
+ OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
+ OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
+ OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
+
+ /* URB fence */
+ OUT_RING(BRW_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
+ (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
+
+ /* Set up the pointer to our vertex buffer */
+ OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 2);
+ OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
+ VB0_VERTEXDATA |
+ ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); /* four 32-bit floats per vertex */
+ OUT_RING(state_base_offset + vb_offset);
+ OUT_RING(3); /* four corners to our rectangle */
+
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | 3);
+ /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+
+ //OUT_RING(MI_NOOP); /* pad to quadword */
+ ADVANCE_LP_RING();
+ }
+
+ {
+ BEGIN_LP_RING(2);
+ OUT_RING(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ BRW_MI_GLOBAL_SNAPSHOT_RESET);
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+
+ while (nbox--)
+ {
+ float src_scale_x, src_scale_y;
+ int i;
+ box_x1 = pbox->x1;
+ box_y1 = pbox->y1;
+ box_x2 = pbox->x2;
+ box_y2 = pbox->y2;
+
+ if (!first_output) {
+ /* Since we use the same little vertex buffer over and over, sync for
+ * subsequent rectangles.
+ */
+ if (pI830->AccelInfoRec && pI830->AccelInfoRec->NeedToSync) {
+ (*pI830->AccelInfoRec->Sync)(pScrn);
+ pI830->AccelInfoRec->NeedToSync = FALSE;
+ }
+ }
+
+ pbox++;
+
+ verts[0][0] = box_x1; verts[0][1] = box_y1;
+ verts[1][0] = box_x2; verts[1][1] = box_y1;
+ verts[2][0] = box_x2; verts[2][1] = box_y2;
+ verts[3][0] = box_x1; verts[3][1] = box_y2;
+
+ /* transform coordinates to rotated versions, but leave texcoords unchanged */
+ for (i = 0; i < 4; i++)
+ matrix23TransformCoordf(&rotMatrix, &verts[i][0], &verts[i][1]);
+
+ src_scale_x = (float)1.0 / (float)pScreen->width;
+ src_scale_y = (float)1.0 / (float)pScreen->height;
+ i = 0;
+
+ DPRINTF(PFX, "box size (%d, %d) -> (%d, %d)\n",
+ box_x1, box_y1, box_x2, box_y2);
+
+ switch (pI830->rotation) {
+ case RR_Rotate_90:
+ vb[i++] = (float)box_x1 * src_scale_x;
+ vb[i++] = (float)box_y2 * src_scale_y;
+ vb[i++] = verts[3][0];
+ vb[i++] = verts[3][1];
+
+ vb[i++] = (float)box_x1 * src_scale_x;
+ vb[i++] = (float)box_y1 * src_scale_y;
+ vb[i++] = verts[0][0];
+ vb[i++] = verts[0][1];
+
+ vb[i++] = (float)box_x2 * src_scale_x;
+ vb[i++] = (float)box_y1 * src_scale_y;
+ vb[i++] = verts[1][0];
+ vb[i++] = verts[1][1];
+ break;
+ case RR_Rotate_270:
+ vb[i++] = (float)box_x2 * src_scale_x;
+ vb[i++] = (float)box_y1 * src_scale_y;
+ vb[i++] = verts[1][0];
+ vb[i++] = verts[1][1];
+
+ vb[i++] = (float)box_x2 * src_scale_x;
+ vb[i++] = (float)box_y2 * src_scale_y;
+ vb[i++] = verts[2][0];
+ vb[i++] = verts[2][1];
+
+ vb[i++] = (float)box_x1 * src_scale_x;
+ vb[i++] = (float)box_y2 * src_scale_y;
+ vb[i++] = verts[3][0];
+ vb[i++] = verts[3][1];
+ break;
+ case RR_Rotate_180:
+ default:
+ vb[i++] = (float)box_x1 * src_scale_x;
+ vb[i++] = (float)box_y1 * src_scale_y;
+ vb[i++] = verts[0][0];
+ vb[i++] = verts[0][1];
+
+ vb[i++] = (float)box_x2 * src_scale_x;
+ vb[i++] = (float)box_y1 * src_scale_y;
+ vb[i++] = verts[1][0];
+ vb[i++] = verts[1][1];
+
+ vb[i++] = (float)box_x2 * src_scale_x;
+ vb[i++] = (float)box_y2 * src_scale_y;
+ vb[i++] = verts[2][0];
+ vb[i++] = verts[2][1];
+ break;
+ }
+
+ BEGIN_LP_RING(6);
+ OUT_RING(BRW_3DPRIMITIVE |
+ BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) | /* CTG - indirect vertex count */
+ 4);
+ OUT_RING(3); /* vertex count per instance */
+ OUT_RING(0); /* start vertex offset */
+ OUT_RING(1); /* single instance */
+ OUT_RING(0); /* start instance location */
+ OUT_RING(0); /* index buffer offset, ignored */
+ ADVANCE_LP_RING();
+
+ first_output = FALSE;
+ if (pI830->AccelInfoRec)
+ pI830->AccelInfoRec->NeedToSync = TRUE;
+ }
+
+ if (pI830->AccelInfoRec)
+ (*pI830->AccelInfoRec->Sync)(pScrn);
+#ifdef XF86DRI
+ if (didLock)
+ I830DRIUnlock(pScrn1);
+#endif
+}
+
+
static void
I915UpdateRotate (ScreenPtr pScreen,
shadowBufPtr pBuf)
@@ -657,11 +1371,15 @@ I830Rotate(ScrnInfoPtr pScrn, DisplayMod
if (pI830->noAccel)
func = LoaderSymbol("shadowUpdateRotatePacked");
- else
- if (IS_I9XX(pI830))
- func = I915UpdateRotate;
- else
+ else {
+ if (IS_I9XX(pI830)) {
+ if (IS_I965G(pI830))
+ func = I965UpdateRotate;
+ else
+ func = I915UpdateRotate;
+ } else
func = I830UpdateRotate;
+ }
if (I830IsPrimary(pScrn)) {
pI8301 = pI830;
@@ -738,6 +1456,15 @@ I830Rotate(ScrnInfoPtr pScrn, DisplayMod
memset(&(pI8301->RotatedMem), 0, sizeof(pI8301->RotatedMem));
pI8301->RotatedMem.Key = -1;
+ if (IS_I965G(pI8301)) {
+ if (pI8301->RotateStateMem.Key != -1)
+ xf86UnbindGARTMemory(pScrn1->scrnIndex, pI8301->RotateStateMem.Key);
+
+ I830FreeVidMem(pScrn1, &(pI8301->RotateStateMem));
+ memset(&(pI8301->RotateStateMem), 0, sizeof(pI8301->RotateStateMem));
+ pI8301->RotateStateMem.Key = -1;
+ }
+
if (pI830->entityPrivate) {
if (pI8301->RotatedMem2.Key != -1)
xf86UnbindGARTMemory(pScrn1->scrnIndex, pI8301->RotatedMem2.Key);
@@ -820,6 +1547,12 @@ I830Rotate(ScrnInfoPtr pScrn, DisplayMod
I830FixOffset(pScrn1, &(pI8301->RotatedMem));
if (pI8301->RotatedMem.Key != -1)
xf86BindGARTMemory(pScrn1->scrnIndex, pI8301->RotatedMem.Key, pI8301->RotatedMem.Offset);
+ if (IS_I965G(pI8301)) {
+ I830FixOffset(pScrn1, &(pI8301->RotateStateMem));
+ if (pI8301->RotateStateMem.Key != -1)
+ xf86BindGARTMemory(pScrn1->scrnIndex, pI8301->RotateStateMem.Key,
+ pI8301->RotateStateMem.Offset);
+ }
}
}
@@ -887,8 +1620,16 @@ I830Rotate(ScrnInfoPtr pScrn, DisplayMod
}
I830SetupMemoryTiling(pScrn1);
/* update fence registers */
- for (i = 0; i < 8; i++)
- OUTREG(FENCE + i * 4, pI8301->ModeReg.Fence[i]);
+ if (IS_I965G(pI830)) {
+ for (i = 0; i < FENCE_NEW_NR; i++) {
+ OUTREG(FENCE_NEW + i * 8, pI830->ModeReg.Fence[i]);
+ OUTREG(FENCE_NEW + 4 + i * 8, pI830->ModeReg.Fence[i+FENCE_NEW_NR]);
+ }
+ } else {
+ for (i = 0; i < 8; i++)
+ OUTREG(FENCE + i * 4, pI8301->ModeReg.Fence[i]);
+ }
+
{
drmI830Sarea *sarea = DRIGetSAREAPrivate(pScrn1->pScreen);
I830UpdateDRIBuffers(pScrn1, sarea );
diff --git a/src/rotation_sf0.g4a b/src/rotation_sf0.g4a
new file mode 100644
index 0000000..8c1398f
--- /dev/null
+++ b/src/rotation_sf0.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/rotation_sf90.g4a b/src/rotation_sf90.g4a
new file mode 100644
index 0000000..2648dff
--- /dev/null
+++ b/src/rotation_sf90.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/rotation_sf_prog0.h b/src/rotation_sf_prog0.h
new file mode 100644
index 0000000..830d176
--- /dev/null
+++ b/src/rotation_sf_prog0.h
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/rotation_sf_prog90.h b/src/rotation_sf_prog90.h
new file mode 100644
index 0000000..2e94b8f
--- /dev/null
+++ b/src/rotation_sf_prog90.h
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x00000034, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x0000002c, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/rotation_wm0.g4a b/src/rotation_wm0.g4a
new file mode 100644
index 0000000..fe09734
--- /dev/null
+++ b/src/rotation_wm0.g4a
@@ -0,0 +1,123 @@
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
+ * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
+ * subspan are given in GRF register 1.2 through 1.5 (which, with the word
+ * addressing below, are 1.4 through 1.11).
+ *
+ * The result is WM_X*_R and WM_Y*R being:
+ *
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+ /* XXX: double check the fields in Cx,Cy,Co and attributes*/
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+ /* sampler */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 };
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 };
+
+ /*
+ * g0 holds the PS thread payload, which (oddly) contains
+ * precisely what the sampler wants to see in m0
+ */
+send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 };
+mov (8) g19<1>UD g19<8,8,1>UD { align1 };
+
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+ /* Pass through control information:
+ */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+ /* Send framebuffer write message: XXX: acc0? */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding table index 0 */
+ 8, /* pixel scoreboard clear */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ ) mlen 10 rlen 0 { align1 EOT };
+ /* padding */
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/rotation_wm90.g4a b/src/rotation_wm90.g4a
new file mode 100644
index 0000000..fd600bf
--- /dev/null
+++ b/src/rotation_wm90.g4a
@@ -0,0 +1,127 @@
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
+ * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
+ * subspan are given in GRF register 1.2 through 1.5 (which, with the word
+ * addressing below, are 1.4 through 1.11).
+ *
+ * The result is WM_X*_R and WM_Y*R being:
+ *
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+/* XXX: convert it to calculate (u,v) in 90 and 270 case */
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+
+/* (Yp - Ystart) * Cx */
+mul (8) g6<1>F g6<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3<0,1,0>F { align1 };
+
+ /* scale by texture Y increment */
+add (8) g6<1>F g6<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.12<0,1,0>F { align1 };
+
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.28<0,1,0>F { align1 };
+
+ /* sampler */
+mov (8) m1<1>F g6<8,8,1>F { align1 };
+mov (8) m2<1>F g7<8,8,1>F { align1 };
+mov (8) m3<1>F g4<8,8,1>F { align1 };
+mov (8) m4<1>F g5<8,8,1>F { align1 };
+
+ /*
+ * g0 holds the PS thread payload, which (oddly) contains
+ * precisely what the sampler wants to see in m0
+ */
+send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 };
+mov (8) g19<1>UD g19<8,8,1>UD { align1 };
+
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+ /* Pass through control information:
+ */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+ /* Send framebuffer write message: XXX: acc0? */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding table index 0 */
+ 8, /* pixel scoreboard clear */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ ) mlen 10 rlen 0 { align1 EOT };
+ /* padding */
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/rotation_wm_prog0.h b/src/rotation_wm_prog0.h
new file mode 100644
index 0000000..08269b7
--- /dev/null
+++ b/src/rotation_wm_prog0.h
@@ -0,0 +1,68 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/rotation_wm_prog90.h b/src/rotation_wm_prog90.h
new file mode 100644
index 0000000..9b87750
--- /dev/null
+++ b/src/rotation_wm_prog90.h
@@ -0,0 +1,68 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000060 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000060 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000006c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000006c },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000074 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000074 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000007c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00e0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00a0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff-tree 35cebed70827999812f8343ac97ad0dffda20786 (from 33e912aca08fa11ef588eb386e16ba5f9ea13727)
Author: Eric Anholt <eric at anholt.net>
Date: Thu Nov 16 15:12:43 2006 -0800
[PATCH] Replace broken PCI resource size detection with pciGetBaseSize() call.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 94cba05..6b76d12 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1189,16 +1189,12 @@ I830PreInit(ScrnInfoPtr pScrn, int flags
}
} else {
if (IS_I9XX(pI830)) {
- if (pI830->PciInfo->memBase[2] & 0x08000000)
- pI830->FbMapSize = 0x8000000; /* 128MB aperture */
- else
- pI830->FbMapSize = 0x10000000; /* 256MB aperture */
-
- if (pI830->PciInfo->chipType == PCI_CHIP_E7221_G)
- pI830->FbMapSize = 0x8000000; /* 128MB aperture */
- } else
- /* 128MB aperture for later chips */
+ pI830->FbMapSize = 1UL << pciGetBaseSize(pI830->PciTag, 2, TRUE,
+ NULL);
+ } else {
+ /* 128MB aperture for later i8xx series. */
pI830->FbMapSize = 0x8000000;
+ }
}
if (pI830->PciInfo->chipType == PCI_CHIP_E7221_G)
diff-tree 33e912aca08fa11ef588eb386e16ba5f9ea13727 (from fa54a3c08301e59558ab0493b3d22324f4162496)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Thu Jan 4 11:25:31 2007 +0800
[PATCH] Fix EXA mem binding
We should check if EXA is really enabled.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 20e3afb..60257b9 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -841,7 +841,7 @@ I830Allocate2DMemory(ScrnInfoPtr pScrn,
pI830->Offscreen.Start, pI830->Offscreen.Size/1024);
}
}
- if (IS_I965G(pI830)) {
+ if (pI830->useEXA && IS_I965G(pI830)) {
memset(&(pI830->EXAStateMem), 0, sizeof(I830MemRange));
pI830->EXAStateMem.Key = -1;
size = ROUND_TO_PAGE(EXA_LINEAR_EXTRA);
@@ -1513,9 +1513,11 @@ I830FixupOffsets(ScrnInfoPtr pScrn)
}
#endif
#ifdef I830_USE_EXA
- I830FixOffset(pScrn, &(pI830->Offscreen));
- if (IS_I965G(pI830))
- I830FixOffset(pScrn, &(pI830->EXAStateMem));
+ if (pI830->useEXA) {
+ I830FixOffset(pScrn, &(pI830->Offscreen));
+ if (IS_I965G(pI830))
+ I830FixOffset(pScrn, &(pI830->EXAStateMem));
+ }
#endif
return TRUE;
}
@@ -1919,10 +1921,12 @@ I830BindAGPMemory(ScrnInfoPtr pScrn)
}
#endif
#ifdef I830_USE_EXA
- if (!BindMemRange(pScrn, &(pI830->Offscreen)))
- return FALSE;
- if (IS_I965G(pI830) && !BindMemRange(pScrn, &(pI830->EXAStateMem)))
- return FALSE;
+ if (pI830->useEXA) {
+ if (!BindMemRange(pScrn, &(pI830->Offscreen)))
+ return FALSE;
+ if (IS_I965G(pI830) && !BindMemRange(pScrn, &(pI830->EXAStateMem)))
+ return FALSE;
+ }
#endif
pI830->GttBound = 1;
}
diff-tree fa54a3c08301e59558ab0493b3d22324f4162496 (from 2f2c443e971faa54ffcf751b6acb444e8e7875ce)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Dec 6 13:24:44 2006 +0800
[PATCH] fix Makefile.am
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/Makefile.am b/src/Makefile.am
index a9f427d..80cea10 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -94,7 +94,6 @@ i810_drv_la_SOURCES = \
i830_xaa.c \
i830_exa_render.c \
i915_exa_render.c \
- i965_composite_wm_nomask.h \
i965_exa_render.c
if HAVE_GEN4ASM
@@ -102,8 +101,14 @@ sf_prog.h: packed_yuv_sf.g4a
intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
wm_prog.h: packed_yuv_wm.g4a
intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
+exa_sf_prog.h: exa_sf.g4a
+ intel-gen4asm -o exa_sf_prog.h exa_sf.g4a
+exa_sf_mask_prog.h: exa_sf_mask.g4a
+ intel-gen4asm -o exa_sf_mask_prog.h exa_sf_mask.g4a
exa_wm_nomask_prog.h: exa_wm_nomask.g4a
intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
+exa_wm_masknoca_prog.h: exa_wm_masknoca.g4a
+ intel-gen4asm -o exa_wm_masknoca_prog.h exa_wm_masknoca.g4a
endif
if DRI
diff-tree 2f2c443e971faa54ffcf751b6acb444e8e7875ce (from 0bf04fe78a8a915310ef8a90f5c7872be7476e2e)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Dec 6 10:43:29 2006 +0800
[PATCH] Formats fixes
We should use card_fmt for src/mask picture, and use dest color
buffer format helper. Also fix wrong name for G965 texture formats,
and pict_x1r5g5b5 isn't supported by sampler engine.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 7e9c1e3..583bc26 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -121,13 +121,12 @@ static struct blendinfo I965BlendOp[] =
/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 1.7.2*/
static struct formatinfo I965TexFormats[] = {
- {PICT_a8r8g8b8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM },
- {PICT_x8r8g8b8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM },
- {PICT_a8b8g8r8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM },
- {PICT_x8b8g8r8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM },
+ {PICT_a8r8g8b8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM },
+ {PICT_x8r8g8b8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM },
+ {PICT_a8b8g8r8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM },
+ {PICT_x8b8g8r8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM },
{PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM },
{PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM },
- {PICT_x1r5g5b5, BRW_SURFACEFORMAT_B5G5R5X1_UNORM },
{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
};
@@ -366,6 +365,16 @@ static const CARD32 ps_kernel_static_mas
#include "exa_wm_masknoca_prog.h"
};
+static CARD32 i965_get_card_format(PicturePtr pPict)
+{
+ int i;
+ for (i = 0; i < sizeof(I965TexFormats) / sizeof(I965TexFormats[0]); i++) {
+ if (I965TexFormats[i].fmt == pPict->format)
+ break;
+ }
+ return I965TexFormats[i].card_fmt;
+}
+
Bool
I965EXAPrepareComposite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
@@ -376,10 +385,7 @@ I965EXAPrepareComposite(int op, PictureP
CARD32 src_offset, src_pitch;
CARD32 mask_offset = 0, mask_pitch = 0;
CARD32 dst_format, dst_offset, dst_pitch;
-
-ErrorF("i965 prepareComposite\n");
- I965GetDestFormat(pDstPicture, &dst_format);
src_offset = exaGetPixmapOffset(pSrc);
src_pitch = exaGetPixmapPitch(pSrc);
dst_offset = exaGetPixmapOffset(pDst);
@@ -590,11 +596,9 @@ ErrorF("i965 prepareComposite\n");
memset(dest_surf_state, 0, sizeof(*dest_surf_state));
dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
- if (pDst->drawable.bitsPerPixel == 16) {
- dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- } else {
- dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- }
+ I965GetDestFormat(pDstPicture, &dst_format);
+ dest_surf_state->ss0.surface_format = dst_format;
+
dest_surf_state->ss0.writedisable_alpha = 0;
dest_surf_state->ss0.writedisable_red = 0;
dest_surf_state->ss0.writedisable_green = 0;
@@ -615,12 +619,7 @@ ErrorF("i965 prepareComposite\n");
/* Set up the source surface state buffer */
memset(src_surf_state, 0, sizeof(*src_surf_state));
src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- if (pSrc->drawable.bitsPerPixel == 8)
- src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_A8_UNORM; //XXX?
- else if (pSrc->drawable.bitsPerPixel == 16)
- src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- else
- src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
src_surf_state->ss0.writedisable_alpha = 0;
src_surf_state->ss0.writedisable_red = 0;
@@ -643,12 +642,7 @@ ErrorF("i965 prepareComposite\n");
if (pMask) {
memset(mask_surf_state, 0, sizeof(*mask_surf_state));
mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
- if (pMask->drawable.bitsPerPixel == 8)
- mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_A8_UNORM; //XXX?
- else if (pMask->drawable.bitsPerPixel == 16)
- mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
- else
- mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ mask_surf_state->ss0.surface_format = i965_get_card_format(pMaskPicture);
mask_surf_state->ss0.writedisable_alpha = 0;
mask_surf_state->ss0.writedisable_red = 0;
diff-tree 0bf04fe78a8a915310ef8a90f5c7872be7476e2e (from 5c461063cde68092e778c44ac6abd9129cd8019e)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Mon Dec 4 15:48:04 2006 +0800
[PATCH] set correct default border color
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 2d1ce5f..7e9c1e3 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -691,10 +691,10 @@ ErrorF("i965 prepareComposite\n");
}
memset(default_color_state, 0, sizeof(*default_color_state));
- default_color_state->color[0] = 1.0; /* RGBA format */
- default_color_state->color[1] = 0.0;
- default_color_state->color[2] = 0.0;
- default_color_state->color[3] = 0.0;
+ default_color_state->color[0] = 0.0; /* R */
+ default_color_state->color[1] = 0.0; /* G */
+ default_color_state->color[2] = 0.0; /* B */
+ default_color_state->color[3] = 1.0; /* A */
src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
diff-tree 5c461063cde68092e778c44ac6abd9129cd8019e (from 89a42d489bd370b89e5ff4e01f026b4d64723cd8)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Mon Dec 4 15:47:31 2006 +0800
[PATCH] fix typo in ps kernel
fix corrupt in some subspans
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/exa_wm_masknoca.g4a b/src/exa_wm_masknoca.g4a
index 195203c..c2049fd 100644
--- a/src/exa_wm_masknoca.g4a
+++ b/src/exa_wm_masknoca.g4a
@@ -51,10 +51,10 @@ mov (1) g8.20<1>F g1.14<0,1,0>UW { align
add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
/* Set up ss2.x coordinates */
-mov (1) g9<1>F g1.16<0,1,0>UW { align1 };
-add (1) g9.4<1>F g1.16<0,1,0>UW 1UB { align1 };
-mov (1) g9.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g9.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g7<1>F g1.16<0,1,0>UW { align1 };
+add (1) g7.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g7.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g7.12<1>F g1.16<0,1,0>UW 1UB { align1 };
/* Set up ss2.y coordinates */
mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
diff --git a/src/exa_wm_masknoca_prog.h b/src/exa_wm_masknoca_prog.h
index 66eb960..5fcf3b5 100644
--- a/src/exa_wm_masknoca_prog.h
+++ b/src/exa_wm_masknoca_prog.h
@@ -14,10 +14,10 @@
{ 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
{ 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
{ 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
- { 0x00000001, 0x2120013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x21240d3d, 0x00000030, 0x00000001 },
- { 0x00000001, 0x2128013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x212c0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20e40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ec0d3d, 0x00000030, 0x00000001 },
{ 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
{ 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
{ 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
diff-tree 89a42d489bd370b89e5ff4e01f026b4d64723cd8 (from 01bfa4fa6fc0ceec8581676e5d72c68dd71efa96)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:16:46 2006 +0800
[PATCH] shut up warning
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 6f2bc84..2d1ce5f 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -1011,10 +1011,8 @@ I965EXAComposite(PixmapPtr pDst, int src
srcXend = srcX + w;
srcYend = srcY + h;
- if (pMask) {
- maskXend = maskX + w;
- maskYend = maskY + h;
- }
+ maskXend = maskX + w;
+ maskYend = maskY + h;
if (is_transform[0]) {
v.vector[0] = IntToxFixed(srcX);
v.vector[1] = IntToxFixed(srcY);
diff-tree 01bfa4fa6fc0ceec8581676e5d72c68dd71efa96 (from 79018bb47c43510d59c592592f06204189bd12dc)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:14:55 2006 +0800
[PATCH] fix alpha blending state
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index c4a3f97..6f2bc84 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -562,21 +562,26 @@ ErrorF("i965 prepareComposite\n");
cc_state->cc0.stencil_enable = 0; /* disable stencil */
cc_state->cc2.depth_test = 0; /* disable depth test */
cc_state->cc2.logicop_enable = 0; /* disable logic op */
- cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
+ cc_state->cc3.ia_blend_enable = 1; /* blend alpha just like colors */
cc_state->cc3.blend_enable = 1; /* enable color blend */
cc_state->cc3.alpha_test = 0; /* disable alpha test */
cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
cc_state->cc5.dither_enable = 0; /* disable dither */
-// cc_state->cc5.logicop_func = 0xc; /* COPY */
-// cc_state->cc5.statistics_enable = 1;
-// cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
-// cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
-// cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
- cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
+ cc_state->cc5.logicop_func = 0xc; /* COPY */
+ cc_state->cc5.statistics_enable = 1;
+ cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
I965GetBlendCntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
+ /* XXX: alpha blend factor should be same as color, but check
+ for CA case in future */
+ cc_state->cc5.ia_src_blend_factor = src_blend;
+ cc_state->cc5.ia_dest_blend_factor = dst_blend;
+ cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
cc_state->cc6.src_blend_factor = src_blend;
cc_state->cc6.dest_blend_factor = dst_blend;
+ cc_state->cc6.clamp_post_alpha_blend = 1;
+ cc_state->cc6.clamp_pre_alpha_blend = 1;
+ cc_state->cc6.clamp_range = 0; /* clamp range [0,1] */
/* Upload system kernel */
memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
diff-tree 79018bb47c43510d59c592592f06204189bd12dc (from a5b9b438469f171b002fa0b99d8cab83e51ec968)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 17:05:32 2006 +0800
[PATCH] Add in sf/wm program for mask picture without CA
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
new file mode 100644
index 0000000..ab519ce
--- /dev/null
+++ b/src/exa_sf_mask.g4a
@@ -0,0 +1,53 @@
+
+/* FIXME how to setup second coeffient for mask tex coord */
+
+/*
+ g3 (v0) { u0, v0, 1.0, 1.0 } ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0} Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0)
+ g4 (v1) { u1, v1, 1.0, 1.0 } ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0}
+ g5 (v2) { u2, v2 } ==> (u2, v2, mu2, mv2}
+ g6 { 1/(x1-x0), 1/(y1-y0) }
+ g7 { u1-u0, v1-v0, 0, 0} ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0}
+ -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) } ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0)
+ Cx, Cy Cx[0], Cy[0], Cx[1], Cy[1]
+ */
+
+/* assign Cx[0], Cx[1] to src, same to Cy, Co
+ Cx[2], Cx[3] to mask, same to Cy, Co */
+
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+/* Cx[0] */
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[0] */
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+/* Cx[2] */
+mul (1) g7.16<1>F g7.16<0,1,0>F g6<0,1,0>F { align1 };
+/* Cy[2] */
+mul (1) g7.20<1>F g7.20<0,1,0>F g6.4<0,1,0>F { align1 };
+
+/* src Cx[0], Cx[1] */
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+/* mask Cx[2], Cx[3] */
+mov (1) m1.8<1>F g7.16<0,1,0>F { align1 };
+mov (1) m1.12<1>F g7.16<0,1,0>F { align1 };
+/* src Cy[0], Cy[1] */
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+/* mask Cy[2], Cy[3] */
+mov (1) m2.8<1>F g7.20<0,1,0>F { align1 };
+mov (1) m2.12<1>F g7.20<0,1,0>F { align1 };
+/* src Co[0], Co[1] */
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+/* mask Co[2], Co[3] */
+mov (1) m3.8<1>F g3.16<0,1,0>F { align1 };
+mov (1) m3.12<1>F g3.20<0,1,0>F { align1 };
+
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_sf_mask_prog.h b/src/exa_sf_mask_prog.h
new file mode 100644
index 0000000..cd7f460
--- /dev/null
+++ b/src/exa_sf_mask_prog.h
@@ -0,0 +1,25 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00000041, 0x20f077bd, 0x000000f0, 0x000000c0 },
+ { 0x00000041, 0x20f477bd, 0x000000f4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00000001, 0x202803be, 0x000000f0, 0x00000000 },
+ { 0x00000001, 0x202c03be, 0x000000f0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00000001, 0x204803be, 0x000000f4, 0x00000000 },
+ { 0x00000001, 0x204c03be, 0x000000f4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00000001, 0x206803be, 0x00000070, 0x00000000 },
+ { 0x00000001, 0x206c03be, 0x00000074, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_masknoca.g4a b/src/exa_wm_masknoca.g4a
new file mode 100644
index 0000000..195203c
--- /dev/null
+++ b/src/exa_wm_masknoca.g4a
@@ -0,0 +1,202 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture.
+ * XXX: This is still experimental, and should be fixed to support multiple texture
+ * map, and conditional mul actions.
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ As mask texture coeffient needs extra setup urb starting from g4, we should
+ shift this location.
+
+ * X0_R is g4->g6
+ * X1_R is g5->g7
+ * Y0_R is g6->g8
+ * Y1_R is g7->g9
+
+ * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
+ * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
+ * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
+ * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
+ */
+
+/* multitexture program with src and mask texture */
+/* - load src texture */
+/* - load mask texture */
+/* - mul src.X with mask's alpha */
+/* - write out src.X */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g6<1>F g1.8<0,1,0>UW { align1 };
+add (1) g6.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g6.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g6.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g8<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g8.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g8.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g8.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g6.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g6.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g6.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g6.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g8.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g8.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g8.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g8.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g9<1>F g1.16<0,1,0>UW { align1 };
+add (1) g9.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g9.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g9.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g9<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g9.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g9.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g9.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g7.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g7.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g7.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g7.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g9.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g9.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g9.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g9.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+/* This is for src texture */
+/* I don't want to change origin ssX coords, as it will be used later in mask */
+/* so store tex coords in g10, g11, g12, g13 */
+
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+/* Cx[0] */
+mul (8) g10<1>F g10<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g11<1>F g11<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+/* Co[0] */
+add (8) g10<1>F g10<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g11<1>F g11<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+/* Cy[0] */
+mul (8) g12<1>F g12<8,8,1>F g3.4<0,1,0>F { align1 };
+mul (8) g13<1>F g13<8,8,1>F g3.4<0,1,0>F { align1 };
+ /* add in texture Y offset */
+/* Co[1] */
+add (8) g12<1>F g12<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g13<1>F g13<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g10<8,8,1>F { align1 };
+mov (8) m2<1>F g11<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
+mov (8) m3<1>F g12<8,8,1>F { align1 };
+mov (8) m4<1>F g13<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+
+/* src texture readback: g14-g21 */
+send (16) 0 /* msg reg index */
+ g14<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description,
+ (binding_table,sampler_index,datatype).
+ here(src->dst) we should use src_sampler and
+ src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mov (8) g21<1>UD g21<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* sampler mask texture, use g10, g11, g12, g13 */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g10<1>F g6<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g11<1>F g7<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+/* Cx[2] */
+mul (8) g10<1>F g10<8,8,1>F g4<0,1,0>F { align1 };
+mul (8) g11<1>F g11<8,8,1>F g4<0,1,0>F { align1 };
+ /* add in texture X offset */
+/* Co[2] */
+add (8) g10<1>F g10<8,8,1>F g4.12<0,1,0>F { align1 };
+add (8) g11<1>F g11<8,8,1>F g4.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g12<1>F g8<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g13<1>F g9<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+/* Cy[2] */
+mul (8) g12<1>F g12<8,8,1>F g4.4<0,1,0>F { align1 };
+mul (8) g13<1>F g13<8,8,1>F g4.4<0,1,0>F { align1 };
+ /* add in texture Y offset */
+/* Co[3] */
+add (8) g12<1>F g12<8,8,1>F g4.28<0,1,0>F { align1 };
+add (8) g13<1>F g13<8,8,1>F g4.28<0,1,0>F { align1 };
+
+mov (8) m1<1>F g10<8,8,1>F { align1 };
+mov (8) m2<1>F g11<8,8,1>F { align1 };
+mov (8) m3<1>F g12<8,8,1>F { align1 };
+mov (8) m4<1>F g13<8,8,1>F { align1 };
+
+/* mask sampler g22-g29 */
+/* binding_table (2), sampler (1) */
+send (16) 0 g22<1>UW g0<8,8,1>UW sampler (2,1,F) mlen 5 rlen 8 { align1 };
+mov (8) g29<1>UD g29<8,8,1>UD { align1 }; /* wait sampler return */
+
+/* mul mask's alpha channel g28,g29 to src (g14-g21), then write out src */
+mul (8) g14<1>F g14<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g15<1>F g15<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g16<1>F g16<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g17<1>F g17<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g18<1>F g18<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g19<1>F g19<8,8,1>F g29<8,8,1>F { align1 };
+mul (8) g20<1>F g20<8,8,1>F g28<8,8,1>F { align1 };
+mul (8) g21<1>F g21<8,8,1>F g29<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+mov (8) m2<1>F g14<8,8,1>F { align1 };
+mov (8) m3<1>F g16<8,8,1>F { align1 };
+mov (8) m4<1>F g18<8,8,1>F { align1 };
+mov (8) m5<1>F g20<8,8,1>F { align1 };
+mov (8) m6<1>F g15<8,8,1>F { align1 };
+mov (8) m7<1>F g17<8,8,1>F { align1 };
+mov (8) m8<1>F g19<8,8,1>F { align1 };
+mov (8) m9<1>F g21<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_wm_masknoca_prog.h b/src/exa_wm_masknoca_prog.h
new file mode 100644
index 0000000..66eb960
--- /dev/null
+++ b/src/exa_wm_masknoca_prog.h
@@ -0,0 +1,95 @@
+ { 0x00000001, 0x20c0013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20c40d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c8013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20cc0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2100013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x2104013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x21080d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x210c0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20d40d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d8013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2110013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x2114013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x21180d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x211c0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x21240d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2128013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x212c0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x2120013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x2124013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x21280d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x212c0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20f40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20fc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x2130013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x2134013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x21380d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x213c0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000060 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000060 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000006c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000006c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000064 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000064 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000007c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x21c01d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22a00021, 0x008d02a0, 0x00000000 },
+ { 0x00600040, 0x214077bd, 0x008d00c0, 0x00004020 },
+ { 0x00600040, 0x216077bd, 0x008d00e0, 0x00004020 },
+ { 0x00600041, 0x214077bd, 0x008d0140, 0x00000080 },
+ { 0x00600041, 0x216077bd, 0x008d0160, 0x00000080 },
+ { 0x00600040, 0x214077bd, 0x008d0140, 0x0000008c },
+ { 0x00600040, 0x216077bd, 0x008d0160, 0x0000008c },
+ { 0x00600040, 0x218077bd, 0x008d0100, 0x00004024 },
+ { 0x00600040, 0x21a077bd, 0x008d0120, 0x00004024 },
+ { 0x00600041, 0x218077bd, 0x008d0180, 0x00000084 },
+ { 0x00600041, 0x21a077bd, 0x008d01a0, 0x00000084 },
+ { 0x00600040, 0x218077bd, 0x008d0180, 0x0000009c },
+ { 0x00600040, 0x21a077bd, 0x008d01a0, 0x0000009c },
+ { 0x00600001, 0x202003be, 0x008d0140, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0160, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d01a0, 0x00000000 },
+ { 0x00800031, 0x22c01d29, 0x008d0000, 0x02580102 },
+ { 0x00600001, 0x23a00021, 0x008d03a0, 0x00000000 },
+ { 0x00600041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
+ { 0x00600041, 0x21e077bd, 0x008d01e0, 0x008d03a0 },
+ { 0x00600041, 0x220077bd, 0x008d0200, 0x008d0380 },
+ { 0x00600041, 0x222077bd, 0x008d0220, 0x008d03a0 },
+ { 0x00600041, 0x224077bd, 0x008d0240, 0x008d0380 },
+ { 0x00600041, 0x226077bd, 0x008d0260, 0x008d03a0 },
+ { 0x00600041, 0x228077bd, 0x008d0280, 0x008d0380 },
+ { 0x00600041, 0x22a077bd, 0x008d02a0, 0x008d03a0 },
+ { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 4bc90c1..c4a3f97 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -344,12 +344,16 @@ static const CARD32 sf_kernel_static[][4
#include "exa_sf_prog.h"
};
+static const CARD32 sf_kernel_static_mask[][4] = {
+#include "exa_sf_mask_prog.h"
+};
+
/* ps kernels */
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
- #include "exa_wm_nomask_prog.h"
+#include "exa_wm_nomask_prog.h"
};
/* 2: mask with componentAlpha, src * mask color, XXX: later */
@@ -359,7 +363,7 @@ static const CARD32 ps_kernel_static_mas
/* 3: mask without componentAlpha, src * mask alpha */
static const CARD32 ps_kernel_static_masknoca [][4] = {
-/*#include "i965_composite_wm_masknoca.h" */
+#include "exa_wm_masknoca_prog.h"
};
Bool
@@ -375,11 +379,6 @@ I965EXAPrepareComposite(int op, PictureP
ErrorF("i965 prepareComposite\n");
- /* FIXME: fallback in pMask for now, would be enable after finish
- wm kernel program */
- if (pMask)
- I830FALLBACK("No mask support yet.\n");
-
I965GetDestFormat(pDstPicture, &dst_format);
src_offset = exaGetPixmapOffset(pSrc);
src_pitch = exaGetPixmapPitch(pSrc);
@@ -436,7 +435,10 @@ ErrorF("i965 prepareComposite\n");
/* keep current sf_kernel, which will send one setup urb entry to
PS kernel */
sf_kernel_offset = ALIGN(next_offset, 64);
- next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
+ if (pMask)
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
+ else
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
//XXX: ps_kernel may be seperated, fix with offset
ps_kernel_offset = ALIGN(next_offset, 64);
@@ -746,7 +748,10 @@ ErrorF("i965 prepareComposite\n");
* calculate dA/dx and dA/dy. Hand these interpolation coefficients
* back to SF which then hands pixels off to WM.
*/
- memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
+ if (pMask)
+ memcpy (sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static));
+ else
+ memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
memset(sf_state, 0, sizeof(*sf_state));
sf_state->thread0.kernel_start_pointer =
@@ -780,7 +785,6 @@ ErrorF("i965 prepareComposite\n");
/* Set up the PS kernel (dispatched by WM)
*/
- // XXX: replace to texture blend shader, and different cases
if (pMask) {
if (pMaskPicture->componentAlpha)
memcpy (ps_kernel, ps_kernel_static_maskca, sizeof (ps_kernel_static_maskca));
diff-tree a5b9b438469f171b002fa0b99d8cab83e51ec968 (from b7c1e1656f45e43ea2f9a47f1a487050c0884c22)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:52:44 2006 +0800
[PATCH] misc cleanup for G965 vs/sf/wm states
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 51b2c60..4bc90c1 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -266,6 +266,7 @@ I965EXACheckComposite(int op, PicturePtr
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
@@ -336,9 +337,8 @@ static const CARD32 sip_kernel_static[][
* with the base texture coordinate. It was extracted from the Mesa driver
*/
-#define SF_KERNEL_NUM_GRF 10
-#define SF_KERNEL_NUM_URB 8
-#define SF_MAX_THREADS 4
+#define SF_KERNEL_NUM_GRF 16
+#define SF_MAX_THREADS 1
static const CARD32 sf_kernel_static[][4] = {
#include "exa_sf_prog.h"
@@ -468,7 +468,6 @@ ErrorF("i965 prepareComposite\n");
next_offset = vb_offset + vb_size;
/* And then the general state: */
- //XXX: fix for texture map and target surface
dest_surf_offset = ALIGN(next_offset, 32);
next_offset = dest_surf_offset + sizeof(*dest_surf_state);
@@ -534,8 +533,8 @@ ErrorF("i965 prepareComposite\n");
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
-#define URB_SF_ENTRY_SIZE 4
-#define URB_SF_ENTRIES 8
+#define URB_SF_ENTRY_SIZE 2
+#define URB_SF_ENTRIES 1
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
@@ -564,7 +563,6 @@ ErrorF("i965 prepareComposite\n");
cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
cc_state->cc3.blend_enable = 1; /* enable color blend */
cc_state->cc3.alpha_test = 0; /* disable alpha test */
- // XXX:cc_viewport needed?
cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
cc_state->cc5.dither_enable = 0; /* disable dither */
// cc_state->cc5.logicop_func = 0xc; /* COPY */
@@ -585,7 +583,6 @@ ErrorF("i965 prepareComposite\n");
memset(dest_surf_state, 0, sizeof(*dest_surf_state));
dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
- // XXX: should compare with picture's cpp?...8 bit surf?
if (pDst->drawable.bitsPerPixel == 16) {
dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
} else {
@@ -601,14 +598,12 @@ ErrorF("i965 prepareComposite\n");
dest_surf_state->ss0.mipmap_layout_mode = 0;
dest_surf_state->ss0.render_cache_read_mode = 0;
- // XXX: fix to picture address & size
dest_surf_state->ss1.base_addr = dst_offset;
dest_surf_state->ss2.height = pDst->drawable.height - 1;
dest_surf_state->ss2.width = pDst->drawable.width - 1;
dest_surf_state->ss2.mip_count = 0;
dest_surf_state->ss2.render_target_rotation = 0;
dest_surf_state->ss3.pitch = dst_pitch - 1;
- // tiled surface?
/* Set up the source surface state buffer */
memset(src_surf_state, 0, sizeof(*src_surf_state));
@@ -741,8 +736,10 @@ ErrorF("i965 prepareComposite\n");
/* Set up the vertex shader to be disabled (passthrough) */
memset(vs_state, 0, sizeof(*vs_state));
- // XXX: vs URB should be defined for VF vertex URB store. done already?
+ vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
+ vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs_state->vs6.vs_enable = 0;
+ vs_state->vs6.vert_cache_disable = 1;
// XXX: sf_kernel? keep it as now
/* Set up the SF kernel to do coord interp: for each attribute,
@@ -754,7 +751,7 @@ ErrorF("i965 prepareComposite\n");
memset(sf_state, 0, sizeof(*sf_state));
sf_state->thread0.kernel_start_pointer =
(state_base_offset + sf_kernel_offset) >> 6;
- sf_state->thread0.grf_reg_count = ((SF_KERNEL_NUM_GRF & ~15) / 16);
+ sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf_state->sf1.single_program_flow = 1;
sf_state->sf1.binding_table_entry_count = 0;
sf_state->sf1.thread_priority = 0;
@@ -795,7 +792,7 @@ ErrorF("i965 prepareComposite\n");
memset (wm_state, 0, sizeof (*wm_state));
wm_state->thread0.kernel_start_pointer =
(state_base_offset + ps_kernel_offset) >> 6;
- wm_state->thread0.grf_reg_count = ((PS_KERNEL_NUM_GRF & ~15) / 16);
+ wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
wm_state->thread1.single_program_flow = 1;
if (!pMask)
wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
@@ -808,7 +805,10 @@ ErrorF("i965 prepareComposite\n");
// XXX: urb allocation
wm_state->thread3.const_urb_entry_read_length = 0;
wm_state->thread3.const_urb_entry_read_offset = 0;
- wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
+ if (pMask)
+ wm_state->thread3.urb_entry_read_length = 2; /* two per pair of attrib */
+ else
+ wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
wm_state->thread3.urb_entry_read_offset = 0;
// wm kernel use urb from 3, see wm_program in compiler module
wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
diff-tree b7c1e1656f45e43ea2f9a47f1a487050c0884c22 (from db9cfaa35adaf79ea57bc06b27c7e37935d3e1a7)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:40:15 2006 +0800
[PATCH] WM kernel needs scratch space
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 51c2006..51b2c60 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -297,6 +297,7 @@ int dest_surf_offset, src_surf_offset, m
int src_sampler_offset, mask_sampler_offset,vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+int wm_scratch_offset;
int binding_table_offset;
int default_color_offset;
int next_offset, total_state_size;
@@ -426,6 +427,9 @@ ErrorF("i965 prepareComposite\n");
wm_offset = ALIGN(next_offset, 32);
next_offset = wm_offset + sizeof(*wm_state);
+ wm_scratch_offset = ALIGN(next_offset, 1024);
+ next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
+
cc_offset = ALIGN(next_offset, 32);
next_offset = cc_offset + sizeof(*cc_state);
@@ -798,7 +802,8 @@ ErrorF("i965 prepareComposite\n");
else
wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
- wm_state->thread2.scratch_space_base_pointer = 0;
+ wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
+ wm_scratch_offset)>>10;
wm_state->thread2.per_thread_scratch_space = 0;
// XXX: urb allocation
wm_state->thread3.const_urb_entry_read_length = 0;
diff-tree db9cfaa35adaf79ea57bc06b27c7e37935d3e1a7 (from 70276e4e9a8a5026ec436d2be5bf5eab868aa178)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:37:06 2006 +0800
[PATCH] Setup default border color for our samplers
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 94eabfb..51c2006 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -278,6 +278,7 @@ struct brw_surface_state *src_surf_state
struct brw_surface_state *mask_surf_state;
struct brw_sampler_state *src_sampler_state;
struct brw_sampler_state *mask_sampler_state;
+struct brw_sampler_default_color *default_color_state;
struct brw_vs_unit_state *vs_state;
struct brw_sf_unit_state *sf_state;
@@ -297,6 +298,7 @@ int src_sampler_offset, mask_sampler_off
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
int binding_table_offset;
+int default_color_offset;
int next_offset, total_state_size;
char *state_base;
int state_base_offset;
@@ -478,6 +480,9 @@ ErrorF("i965 prepareComposite\n");
binding_table_offset = ALIGN(next_offset, 32);
next_offset = binding_table_offset + (binding_table_entries * 4);
+ default_color_offset = ALIGN(next_offset, 32);
+ next_offset = default_color_offset + sizeof(*default_color_state);
+
total_state_size = next_offset;
assert(total_state_size < EXA_LINEAR_EXTRA);
@@ -508,6 +513,8 @@ ErrorF("i965 prepareComposite\n");
vb = (void *)(state_base + vb_offset);
+ default_color_state = (void*)(state_base + default_color_offset);
+
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
@@ -541,7 +548,6 @@ ErrorF("i965 prepareComposite\n");
* here, but we should have synced the 3D engine already in I830PutImage.
*/
-// needed?
memset (cc_viewport, 0, sizeof (*cc_viewport));
cc_viewport->min_depth = -1.e35;
cc_viewport->max_depth = 1.e35;
@@ -678,18 +684,25 @@ ErrorF("i965 prepareComposite\n");
I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
}
+ memset(default_color_state, 0, sizeof(*default_color_state));
+ default_color_state->color[0] = 1.0; /* RGBA format */
+ default_color_state->color[1] = 0.0;
+ default_color_state->color[2] = 0.0;
+ default_color_state->color[3] = 0.0;
+
+ src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
+
if (!pSrcPicture->repeat) {
- /* XXX: clamp_border and set border to 0 */
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ src_sampler_state->ss2.default_color_pointer =
+ (state_base_offset + default_color_offset) >> 5;
} else {
src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- /* XXX: ss2 has border color pointer, which should be in general state address,
- and just a single texel tex map, with R32G32B32A32_FLOAT */
src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
if (pMask) {
@@ -709,17 +722,16 @@ ErrorF("i965 prepareComposite\n");
}
if (!pMaskPicture->repeat) {
- /* XXX: clamp_border and set border to 0 */
- mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
+ mask_sampler_state->ss2.default_color_pointer =
+ (state_base_offset + default_color_offset)>>5;
} else {
mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
- /* XXX: ss2 has border color pointer, which should be in general state address,
- and just a single texel tex map, with R32G32B32A32_FLOAT */
mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
}
diff-tree 70276e4e9a8a5026ec436d2be5bf5eab868aa178 (from e8a4cbdeff4125e28d807d0a563efc0606d21a75)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:30:53 2006 +0800
[PATCH] fix vertex buffer size
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 9127d65..94eabfb 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -233,16 +233,12 @@ Bool
I965EXACheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
PicturePtr pDstPicture)
{
- /* check op*/
- /* check op with mask's componentAlpha*/
- /* check textures */
- /* check dst buffer format */
CARD32 tmp1;
/* Check for unsupported compositing operations. */
if (op >= sizeof(I965BlendOp) / sizeof(I965BlendOp[0]))
I830FALLBACK("Unsupported Composite op 0x%x\n", op);
-
+
if (pMaskPicture != NULL && pMaskPicture->componentAlpha) {
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
@@ -305,7 +301,7 @@ int next_offset, total_state_size;
char *state_base;
int state_base_offset;
float *vb;
-int vb_size = 4 * 4 ; /* 4 DWORDS per vertex, 4 vertices for TRIFAN*/
+int vb_size = (4 * 4) * 4 ; /* 4 DWORDS per vertex*/
CARD32 src_blend, dst_blend;
diff-tree e8a4cbdeff4125e28d807d0a563efc0606d21a75 (from 42534474fd2556e5987205626cca8f30e25855a8)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:24:24 2006 +0800
[PATCH] clean up issue cmd to ring buffer
Make it easy to track different part of ring state, and
use rectlist primitive instead.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 2c3e43b..9127d65 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -376,11 +376,6 @@ I965EXAPrepareComposite(int op, PictureP
ErrorF("i965 prepareComposite\n");
-// i965_3d_pipeline_setup(pScrn);
-// i965_surf_setup(pScrn, pSrcPicture, pMaskPicture, pDstPicture,
-// pSrc, pMask, pDst);
- // then setup blend, and shader program
-
/* FIXME: fallback in pMask for now, would be enable after finish
wm kernel program */
if (pMask)
@@ -819,62 +814,65 @@ ErrorF("i965 prepareComposite\n");
* rendering pipe
*/
{
-
- BEGIN_LP_RING((pMask?48:46));
- // MI_FLUSH prior to PIPELINE_SELECT
- OUT_RING(MI_FLUSH |
+ BEGIN_LP_RING(2);
+ OUT_RING(MI_FLUSH |
MI_STATE_INSTRUCTION_CACHE_FLUSH |
BRW_MI_GLOBAL_SNAPSHOT_RESET);
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+ {
+ BEGIN_LP_RING(12);
- /* Match Mesa driver setup */
- OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ /* Match Mesa driver setup */
+ OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING((0 << 4) | /* URB Entry Allocation Size */
+ (0 << 0)); /* Number of URB Entries */
+
/* Zero out the two base address registers so all offsets are absolute */
- // XXX: zero out...
- OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
- // why this's not state_base_offset? -> because later we'll always add on
- // state_base_offset to offset params. see SIP
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
- OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
- OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
- OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
+ OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
/* Set system instruction pointer */
- OUT_RING(BRW_STATE_SIP | 0);
- OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
-
+ OUT_RING(BRW_STATE_SIP | 0);
+ OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
+ OUT_RING(MI_NOOP);
+ ADVANCE_LP_RING();
+ }
+ {
+ BEGIN_LP_RING(26);
/* Pipe control */
- // XXX: pipe control write cache before enabling color blending
- // vol2, geometry pipeline 1.8.4
- OUT_RING(BRW_PIPE_CONTROL |
+ OUT_RING(BRW_PIPE_CONTROL |
BRW_PIPE_CONTROL_NOWRITE |
BRW_PIPE_CONTROL_IS_FLUSH |
2);
- OUT_RING(0); /* Destination address */
- OUT_RING(0); /* Immediate data low DW */
- OUT_RING(0); /* Immediate data high DW */
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
/* Binding table pointers */
- OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
- OUT_RING(0); /* vs */
- OUT_RING(0); /* gs */
- OUT_RING(0); /* clip */
- OUT_RING(0); /* sf */
+ OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_RING(0); /* vs */
+ OUT_RING(0); /* gs */
+ OUT_RING(0); /* clip */
+ OUT_RING(0); /* sf */
/* Only the PS uses the binding table */
- OUT_RING(state_base_offset + binding_table_offset); /* ps */
-
- //ring 20
+ OUT_RING(state_base_offset + binding_table_offset); /* ps */
/* The drawing rectangle clipping is always on. Set it to values that
* shouldn't do any clipping.
*/
- //XXX: fix for picture size
- OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
- OUT_RING(0x00000000); /* ymin, xmin */
- OUT_RING((pScrn->virtualX - 1) |
- (pScrn->virtualY - 1) << 16); /* ymax, xmax */
- OUT_RING(0x00000000); /* yorigin, xorigin */
+ OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
+ OUT_RING(0x00000000); /* ymin, xmin */
+ OUT_RING((pScrn->virtualX - 1) |
+ (pScrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_RING(0x00000000); /* yorigin, xorigin */
/* skip the depth buffer */
/* skip the polygon stipple */
@@ -882,90 +880,82 @@ ErrorF("i965 prepareComposite\n");
/* skip the line stipple */
/* Set the pointers to the 3d pipeline state */
- OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
- OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
- OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
- OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
- OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
- OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
- OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
+ OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
+ OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
+ OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
+ OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
/* URB fence */
- // XXX: CS for const URB needed? if not, cs_fence should be equal to sf_fence
- OUT_RING(BRW_URB_FENCE |
- UF0_CS_REALLOC |
- UF0_SF_REALLOC |
- UF0_CLIP_REALLOC |
- UF0_GS_REALLOC |
- UF0_VS_REALLOC |
- 1);
- OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
- ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
- ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
- OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
- ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+ OUT_RING(BRW_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
/* Constant buffer state */
- // XXX: needed? seems no usage, as we don't have CONSTANT_BUFFER definition
- OUT_RING(BRW_CS_URB_STATE | 0);
- OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
- (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
-
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
+ (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
+ ADVANCE_LP_RING();
+ }
+ {
+ int nelem = pMask ? 3: 2;
+ BEGIN_LP_RING(pMask?12:10);
/* Set up the pointer to our vertex buffer */
- // XXX: double check
- // int vb_pitch = 4 * 4; // XXX: pitch should include mask's coords? possible
- // all three coords on one row?
- int nelem = pMask ? 3: 2;
- OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); //XXX: should be 4n-1 -> 3
- OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
- VB0_VERTEXDATA |
- ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
- // pitch includes all vertex data, 4bytes for 1 dword, each
- // element has 2 coords (x,y)(s0,t0), nelem to reflect possible
- // mask
- OUT_RING(state_base_offset + vb_offset);
- OUT_RING(4 * nelem); // max index, prim has 4 coords
- OUT_RING(0); // ignore for VERTEXDATA, but still there
+ OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3);
+ OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
+ VB0_VERTEXDATA |
+ ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
+ OUT_RING(state_base_offset + vb_offset);
+ OUT_RING(2); // max index, prim has 4 coords
+ OUT_RING(0); // ignore for VERTEXDATA, but still there
/* Set up our vertex elements, sourced from the single vertex buffer. */
- OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); // XXX: 2n-1, (x,y) + (s0,t0) +
- // possible (s1, t1)
+ OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
/* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
- OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (0 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
/* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
- OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (8 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
-
- if (pMask) {
OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
- VE0_VALID |
- (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
- (16 << VE0_OFFSET_SHIFT));
- OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
- (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
- (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
- (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
- //XXX: is this has alignment issue? and thread access problem?
- }
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+
+ if (pMask) {
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (16 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ }
- ADVANCE_LP_RING();
-
+ ADVANCE_LP_RING();
}
#ifdef I830DEBUG
@@ -983,7 +973,7 @@ I965EXAComposite(PixmapPtr pDst, int src
I830Ptr pI830 = I830PTR(pScrn);
int srcXend, srcYend, maskXend, maskYend;
PictVector v;
- int pMask = 1, i = 0;
+ int pMask = 1, i;
DPRINTF(PFX, "Composite: srcX %d, srcY %d\n\t maskX %d, maskY %d\n\t"
"dstX %d, dstY %d\n\twidth %d, height %d\n\t"
@@ -999,8 +989,10 @@ I965EXAComposite(PixmapPtr pDst, int src
srcXend = srcX + w;
srcYend = srcY + h;
- maskXend = maskX + w;
- maskYend = maskY + h;
+ if (pMask) {
+ maskXend = maskX + w;
+ maskYend = maskY + h;
+ }
if (is_transform[0]) {
v.vector[0] = IntToxFixed(srcX);
v.vector[1] = IntToxFixed(srcY);
@@ -1035,51 +1027,45 @@ I965EXAComposite(PixmapPtr pDst, int src
"dstX %d, dstY %d\n", srcX, srcY, srcXend, srcYend,
maskX, maskY, maskXend, maskYend, dstX, dstY);
-
- vb[i++] = (float)dstX;
- vb[i++] = (float)dstY;
- vb[i++] = (float)srcX / scale_units[0][0];
- vb[i++] = (float)srcY / scale_units[0][1];
- if (pMask) {
- vb[i++] = (float)maskX / scale_units[1][0];
- vb[i++] = (float)maskY / scale_units[1][1];
- }
-
- vb[i++] = (float)dstX;
- vb[i++] = (float)(dstY + h);
- vb[i++] = (float)srcX / scale_units[0][0];
- vb[i++] = (float)srcYend / scale_units[0][1];
+ i = 0;
+ /* rect (x2,y2) */
+ vb[i++] = (float)(srcXend) / scale_units[0][0];
+ vb[i++] = (float)(srcYend) / scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskX / scale_units[1][0];
+ vb[i++] = (float)maskXend / scale_units[1][0];
vb[i++] = (float)maskYend / scale_units[1][1];
}
-
vb[i++] = (float)(dstX + w);
vb[i++] = (float)(dstY + h);
- vb[i++] = (float)srcXend / scale_units[0][0];
- vb[i++] = (float)srcYend / scale_units[0][1];
+
+ /* rect (x1,y2) */
+ vb[i++] = (float)(srcX)/ scale_units[0][0];
+ vb[i++] = (float)(srcYend)/ scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskX / scale_units[1][0];
vb[i++] = (float)maskYend / scale_units[1][1];
}
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)(dstY + h);
- vb[i++] = (float)(dstX + w);
- vb[i++] = (float)dstY;
- vb[i++] = (float)srcXend / scale_units[0][0];
- vb[i++] = (float)srcY / scale_units[0][1];
+ /* rect (x1,y1) */
+ vb[i++] = (float)(srcX) / scale_units[0][0];
+ vb[i++] = (float)(srcY) / scale_units[0][1];
if (pMask) {
- vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskX / scale_units[1][0];
vb[i++] = (float)maskY / scale_units[1][1];
}
-
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)dstY;
+
{
BEGIN_LP_RING(6);
OUT_RING(BRW_3DPRIMITIVE |
BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
- (_3DPRIM_TRIFAN << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) | /* CTG - indirect vertex count */
4);
- OUT_RING(4); /* vertex count per instance */
+ OUT_RING(3); /* vertex count per instance */
OUT_RING(0); /* start vertex offset */
OUT_RING(1); /* single instance */
OUT_RING(0); /* start instance location */
@@ -1090,4 +1076,19 @@ I965EXAComposite(PixmapPtr pDst, int src
ErrorF("sync after 3dprimitive");
I830Sync(pScrn);
#endif
+ /* we must be sure that the pipeline is flushed before next exa draw,
+ because that will be new state, binding state and instructions*/
+ {
+ BEGIN_LP_RING(4);
+ OUT_RING(BRW_PIPE_CONTROL |
+ BRW_PIPE_CONTROL_NOWRITE |
+ BRW_PIPE_CONTROL_WC_FLUSH |
+ BRW_PIPE_CONTROL_IS_FLUSH |
+ (1 << 10) | /* XXX texture cache flush for BLC/CTG */
+ 2);
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
+ ADVANCE_LP_RING();
+ }
}
diff-tree 42534474fd2556e5987205626cca8f30e25855a8 (from 05e8a7989db1b8b9dfa7b688629af65d9910cc53)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:04:16 2006 +0800
[PATCH] remove wrong scale_units
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 2751778..2c3e43b 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -397,8 +397,6 @@ ErrorF("i965 prepareComposite\n");
}
scale_units[0][0] = pSrc->drawable.width;
scale_units[0][1] = pSrc->drawable.height;
- scale_units[2][0] = pDst->drawable.width;
- scale_units[2][1] = pDst->drawable.height;
if (pSrcPicture->transform) {
is_transform[0] = TRUE;
diff-tree 05e8a7989db1b8b9dfa7b688629af65d9910cc53 (from 1329c86f2a4b3664920ded970a984a745a530da4)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 16:02:16 2006 +0800
[PATCH] Rename exa sf/wm program files
Also fix some minors in wm program.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/Makefile.am b/src/Makefile.am
index 54e5657..a9f427d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -102,8 +102,8 @@ sf_prog.h: packed_yuv_sf.g4a
intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
wm_prog.h: packed_yuv_wm.g4a
intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
-i965_composite_wm_nomask.h: i965_composite_wm_nomask.g4a
- intel-gen4asm -o i965_composite_wm_nomask.h i965_composite_wm_nomask.g4a
+exa_wm_nomask_prog.h: exa_wm_nomask.g4a
+ intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
endif
if DRI
diff --git a/src/exa_sf.g4a b/src/exa_sf.g4a
new file mode 100644
index 0000000..8c1398f
--- /dev/null
+++ b/src/exa_sf.g4a
@@ -0,0 +1,17 @@
+send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
+add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
+mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
+mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
+mov (8) m1<1>F g7<0,1,0>F { align1 };
+mov (8) m2<1>F g7.4<0,1,0>F { align1 };
+mov (8) m3<1>F g3<8,8,1>F { align1 };
+send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_sf_prog.h b/src/exa_sf_prog.h
new file mode 100644
index 0000000..830d176
--- /dev/null
+++ b/src/exa_sf_prog.h
@@ -0,0 +1,17 @@
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/exa_wm_nomask.g4a b/src/exa_wm_nomask.g4a
new file mode 100644
index 0000000..8e851a3
--- /dev/null
+++ b/src/exa_wm_nomask.g4a
@@ -0,0 +1,143 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture.
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ g12<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>F g1<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+/* g12 -> m2
+ g13 -> m6
+ g14 -> m3
+ g15 -> m7
+ g16 -> m4
+ g17 -> m8
+ g18 -> m5
+ g19 -> m9
+*/
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
+
+/* write */
+send (16) 0 acc0<1>UW g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff --git a/src/exa_wm_nomask_prog.h b/src/exa_wm_nomask_prog.h
new file mode 100644
index 0000000..7870b3b
--- /dev/null
+++ b/src/exa_wm_nomask_prog.h
@@ -0,0 +1,70 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
+ { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
+ { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_composite_wm_nomask.g4a b/src/i965_composite_wm_nomask.g4a
deleted file mode 100644
index 927d86a..0000000
--- a/src/i965_composite_wm_nomask.g4a
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture.
- * XXX: This is still experimental, and should be fixed to support multiple texture
- * map, and conditional mul actions.
- */
-
-/* I think this should be same as in g4a program for texture video,
- as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
-
-/* The initial payload of the thread is always g0.
- * WM_URB (incoming URB entries) is g3
- * X0_R is g4
- * X1_R is g5
- * Y0_R is g6
- * Y1_R is g7
- */
-
- /* Set up ss0.x coordinates*/
-mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
-mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
-add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
- /* Set up ss0.y coordinates */
-mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
-mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
-add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
-add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
- /* set up ss1.x coordinates */
-mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
-mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
-add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
- /* set up ss1.y coordinates */
-mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
-mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
-add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
-add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
- /* Set up ss2.x coordinates */
-mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
-mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
-add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
- /* Set up ss2.y coordinates */
-mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
-mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
-add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
-add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
- /* Set up ss3.x coordinates */
-mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
-mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
-add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
- /* Set up ss3.y coordinates */
-mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
-mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
-add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
-add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
-
- /* Now, map these screen space coordinates into texture coordinates. */
- /* subtract screen-space X origin of vertex 0. */
-add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
-add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
- /* scale by texture X increment */
-mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
-mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
- /* add in texture X offset */
-add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
-add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
- /* subtract screen-space Y origin of vertex 0. */
-add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
-add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
- /* scale by texture Y increment */
-mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
-mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
- /* add in texture Y offset */
-add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
-add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
-
-/* prepare sampler read back gX register, which would be written back to output */
-
-/* use simd16 sampler, param 0 is u, param 1 is v. */
-/* 'payload' loading, assuming tex coord start from g4 */
-mov (8) m1<1>F g4<8,8,1>F { align1 };
-mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
-mov (8) m3<1>F g6<8,8,1>F { align1 };
-mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
-
-/* m0 will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
-send (16) 0 /* msg reg index */
- g12<1>UW /* readback */
- g0<8,8,1>UW /* copy to msg start reg*/
- sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
- /* here(src->dst) we should use src_sampler and src_surface */
- mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
-
-/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
-
-/* m0, m1 are all direct passed by PS thread payload */
-mov (8) m1<1>F g1<8,8,1>F { align1 };
-
-/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
-/* g12 -> m2
- g13 -> m6
- g14 -> m3
- g15 -> m7
- g16 -> m4
- g17 -> m8
- g18 -> m5
- g19 -> m9
-*/
-mov (8) m2<1>F g12<8,8,1>F { align1 };
-mov (8) m3<1>F g14<8,8,1>F { align1 };
-mov (8) m4<1>F g16<8,8,1>F { align1 };
-mov (8) m5<1>F g18<8,8,1>F { align1 };
-mov (8) m6<1>F g13<8,8,1>F { align1 };
-mov (8) m7<1>F g15<8,8,1>F { align1 };
-mov (8) m8<1>F g17<8,8,1>F { align1 };
-mov (8) m9<1>F g19<8,8,1>F { align1 };
-
-/* write */
-send (16) 0 null g0<8,8,1>UW write (
- 0, /* binding_table */
- 8, /* pixel scordboard clear, msg type simd16 single source */
- 4, /* render target write */
- 0 /* no write commit message */
- )
- mlen 10
- rlen 0
- { align1 EOT };
-
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
-nop;
diff --git a/src/i965_composite_wm_nomask.h b/src/i965_composite_wm_nomask.h
deleted file mode 100644
index bd99dd9..0000000
--- a/src/i965_composite_wm_nomask.h
+++ /dev/null
@@ -1,68 +0,0 @@
- { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
- { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
- { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
- { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
- { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
- { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
- { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
- { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
- { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
- { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
- { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
- { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
- { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
- { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
- { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
- { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
- { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
- { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
- { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
- { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
- { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
- { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
- { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
- { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
- { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
- { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
- { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
- { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
- { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
- { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
- { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
- { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
- { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
- { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
- { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
- { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
- { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
- { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
- { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
- { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
- { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
- { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
- { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
- { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
- { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
- { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
- { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
- { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
- { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
- { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
- { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
- { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
- { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
- { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
- { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
- { 0x00800031, 0x20001d3c, 0x008d0000, 0x85a04800 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
- { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index fe3007b..2751778 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -342,7 +342,7 @@ static const CARD32 sip_kernel_static[][
#define SF_MAX_THREADS 4
static const CARD32 sf_kernel_static[][4] = {
-#include "sf_prog.h"
+#include "exa_sf_prog.h"
};
/* ps kernels */
@@ -350,7 +350,7 @@ static const CARD32 sf_kernel_static[][4
#define PS_MAX_THREADS 32
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
- #include "i965_composite_wm_nomask.h"
+ #include "exa_wm_nomask_prog.h"
};
/* 2: mask with componentAlpha, src * mask color, XXX: later */
diff-tree 1329c86f2a4b3664920ded970a984a745a530da4 (from f9c3ef40100acb85fca6e49c5c0e98f5c9ac97ca)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Nov 29 15:47:19 2006 +0800
[PATCH] EXA state mem for G965
Not split offscreen mem for exa, but alloc a dedicated one
for G965 states.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i830.h b/src/i830.h
index d5ca5d4..3b7301e 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -73,6 +73,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
#ifdef I830_USE_EXA
#include "exa.h"
Bool I830EXAInit(ScreenPtr pScreen);
+#define EXA_LINEAR_EXTRA (64*1024)
#endif
#ifdef I830_USE_XAA
@@ -267,6 +268,7 @@ typedef struct _I830Rec {
I830MemRange Scratch2;
#ifdef I830_USE_EXA
I830MemRange Offscreen;
+ I830MemRange EXAStateMem; /* specific exa state for G965 */
#endif
/* Regions allocated either from the above pools, or from agpgart. */
I830MemRange *CursorMem;
diff --git a/src/i830_exa.c b/src/i830_exa.c
index c5b91b0..3e874c9 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -425,7 +425,6 @@ IntelEXADoneComposite(PixmapPtr pDst)
#endif
}
-#define BRW_LINEAR_EXTRA (32*1024)
/*
* TODO:
* - Dual head?
@@ -448,11 +447,7 @@ I830EXAInit(ScreenPtr pScreen)
pI830->EXADriverPtr->exa_minor = 0;
pI830->EXADriverPtr->memoryBase = pI830->FbBase;
pI830->EXADriverPtr->offScreenBase = pI830->Offscreen.Start;
- if (IS_I965G(pI830))
- pI830->EXADriverPtr->memorySize = pI830->Offscreen.End -
- BRW_LINEAR_EXTRA; /* BRW needs state buffer*/
- else
- pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
+ pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
DPRINTF(PFX, "EXA Mem: memoryBase 0x%x, end 0x%x, offscreen base 0x%x, memorySize 0x%x\n",
pI830->EXADriverPtr->memoryBase,
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 5bbf3e3..20e3afb 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -841,6 +841,25 @@ I830Allocate2DMemory(ScrnInfoPtr pScrn,
pI830->Offscreen.Start, pI830->Offscreen.Size/1024);
}
}
+ if (IS_I965G(pI830)) {
+ memset(&(pI830->EXAStateMem), 0, sizeof(I830MemRange));
+ pI830->EXAStateMem.Key = -1;
+ size = ROUND_TO_PAGE(EXA_LINEAR_EXTRA);
+ align = GTT_PAGE_SIZE;
+ alloced = I830AllocVidMem(pScrn, &(pI830->EXAStateMem),
+ &(pI830->StolenPool), size, align,
+ flags | FROM_ANYWHERE | ALLOCATE_AT_TOP);
+ if (alloced < size) {
+ if (!dryrun) {
+ xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+ "G965: Failed to allocate exa state buffer space.\n");
+ }
+ return FALSE;
+ }
+ xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, verbosity,
+ "%sAllocated %ld kB for the G965 exa state buffer at 0x%lx - 0x%lx.\n", s,
+ alloced / 1024, pI830->EXAStateMem.Start, pI830->EXAStateMem.End);
+ }
#endif
} else {
long lineSize;
@@ -1493,6 +1512,11 @@ I830FixupOffsets(ScrnInfoPtr pScrn)
}
}
#endif
+#ifdef I830_USE_EXA
+ I830FixOffset(pScrn, &(pI830->Offscreen));
+ if (IS_I965G(pI830))
+ I830FixOffset(pScrn, &(pI830->EXAStateMem));
+#endif
return TRUE;
}
@@ -1894,6 +1918,12 @@ I830BindAGPMemory(ScrnInfoPtr pScrn)
return FALSE;
}
#endif
+#ifdef I830_USE_EXA
+ if (!BindMemRange(pScrn, &(pI830->Offscreen)))
+ return FALSE;
+ if (IS_I965G(pI830) && !BindMemRange(pScrn, &(pI830->EXAStateMem)))
+ return FALSE;
+#endif
pI830->GttBound = 1;
}
@@ -1979,6 +2009,12 @@ I830UnbindAGPMemory(ScrnInfoPtr pScrn)
return FALSE;
}
#endif
+#ifdef I830_USE_EXA
+ if (!UnbindMemRange(pScrn, &(pI830->Offscreen)))
+ return FALSE;
+ if (IS_I965G(pI830) && !UnbindMemRange(pScrn, &(pI830->EXAStateMem)))
+ return FALSE;
+#endif
if (!xf86ReleaseGART(pScrn->scrnIndex))
return FALSE;
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 7fbf99c..fe3007b 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -490,21 +490,12 @@ ErrorF("i965 prepareComposite\n");
next_offset = binding_table_offset + (binding_table_entries * 4);
total_state_size = next_offset;
+ assert(total_state_size < EXA_LINEAR_EXTRA);
- /*
- * XXX: Use the extra space allocated at the end of the exa offscreen buffer?
- */
-#define BRW_LINEAR_EXTRA (32*1024)
-
- state_base_offset = (pI830->Offscreen.End -
- BRW_LINEAR_EXTRA);
-
+ state_base_offset = pI830->EXAStateMem.Start;
state_base_offset = ALIGN(state_base_offset, 64);
state_base = (char *)(pI830->FbBase + state_base_offset);
- /* Set up our pointers to state structures in framebuffer. It would probably
- * be a good idea to fill these structures out in system memory and then dump
- * them there, instead.
- */
+
vs_state = (void *)(state_base + vs_offset);
sf_state = (void *)(state_base + sf_offset);
wm_state = (void *)(state_base + wm_offset);
diff-tree f9c3ef40100acb85fca6e49c5c0e98f5c9ac97ca (from ef2f25e5c6ce04c202698c5122bc8627130073c0)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Tue Oct 10 15:50:10 2006 +0800
[PATCH] Mark current ps kernel is experimential with little test.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_composite_wm_nomask.g4a b/src/i965_composite_wm_nomask.g4a
index 8791631..927d86a 100644
--- a/src/i965_composite_wm_nomask.g4a
+++ b/src/i965_composite_wm_nomask.g4a
@@ -1,6 +1,8 @@
/*
* This's for exa composite operation in no mask picture case.
- * The simplest case is just sending what src picture has to dst picture
+ * The simplest case is just sending what src picture has to dst picture.
+ * XXX: This is still experimental, and should be fixed to support multiple texture
+ * map, and conditional mul actions.
*/
/* I think this should be same as in g4a program for texture video,
diff-tree ef2f25e5c6ce04c202698c5122bc8627130073c0 (from ca608028c5301700444d39a1c631cc0d5648e1a2)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Tue Oct 10 14:11:35 2006 +0800
[PATCH] Use sf_prog.h instead
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 5528388..7fbf99c 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -89,7 +89,6 @@ struct formatinfo {
// refer vol2, 3d rasterization 3.8.1
-/* XXX: bad!bad! broadwater has different blend factor definition */
/* defined in brw_defines.h */
static struct blendinfo I965BlendOp[] = {
/* Clear */
@@ -163,8 +162,6 @@ static void I965GetBlendCntl(int op, Pic
}
-
-/* FIXME */
static Bool I965GetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format)
{
switch (pDstPicture->format) {
@@ -221,7 +218,6 @@ static Bool I965CheckCompositeTexture(Pi
I830FALLBACK("Unsupported picture format 0x%x\n",
(int)pPict->format);
- /* XXX: fallback when repeat? */
if (pPict->repeat && pPict->repeatType != RepeatNormal)
I830FALLBACK("extended repeat (%d) not supported\n",
pPict->repeatType);
@@ -346,40 +342,7 @@ static const CARD32 sip_kernel_static[][
#define SF_MAX_THREADS 4
static const CARD32 sf_kernel_static[][4] = {
-/* send 0 (1) g6<1>F g1.12<0,1,0>F math mlen 1 rlen 1 { align1 + } */
- { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
-/* send 0 (1) g6.4<1>F g1.20<0,1,0>F math mlen 1 rlen 1 { align1 + } */
- { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
-/* add (8) g7<1>F g4<8,8,1>F g3<8,8,1>F { align1 + } */
- { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
-/* mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 + } */
- { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
-/* mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 + } */
- { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
-/* mov (8) m1<1>F g7<0,1,0>F { align1 + } */
- { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
-/* mov (8) m2<1>F g7.4<0,1,0>F { align1 + } */
- { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
-/* mov (8) m3<1>F g3<8,8,1>F { align1 + } */
- { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-/* send 0 (8) a0<1>F g0<8,8,1>F urb mlen 4 rlen 0 write +0 transpose used complete EOT{ align1 + } */
- { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
-/* nop (4) g0<1>UD { align1 + } */
- { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+#include "sf_prog.h"
};
/* ps kernels */
@@ -475,9 +438,8 @@ ErrorF("i965 prepareComposite\n");
cc_offset = ALIGN(next_offset, 32);
next_offset = cc_offset + sizeof(*cc_state);
-// fixup sf_kernel_static, is sf_kernel needed? or not? why?
-// -> just keep current sf_kernel, which will send one setup urb entry to
-// PS kernel
+ /* keep current sf_kernel, which will send one setup urb entry to
+ PS kernel */
sf_kernel_offset = ALIGN(next_offset, 64);
next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
@@ -965,7 +927,7 @@ ErrorF("i965 prepareComposite\n");
// int vb_pitch = 4 * 4; // XXX: pitch should include mask's coords? possible
// all three coords on one row?
int nelem = pMask ? 3: 2;
- OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); //should be 4n-1 -> 3
+ OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); //XXX: should be 4n-1 -> 3
OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
VB0_VERTEXDATA |
((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
diff-tree ca608028c5301700444d39a1c631cc0d5648e1a2 (from 848368d5d0b90e03d3ec447cb5bd39fc87aea8df)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Thu Sep 28 13:55:52 2006 +0800
[PATCH] Fix picture's transform checking
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index f7093f2..5528388 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -437,11 +437,22 @@ ErrorF("i965 prepareComposite\n");
scale_units[2][0] = pDst->drawable.width;
scale_units[2][1] = pDst->drawable.height;
+ if (pSrcPicture->transform) {
+ is_transform[0] = TRUE;
+ transform[0] = pSrcPicture->transform;
+ } else
+ is_transform[0] = FALSE;
+
if (!pMask) {
is_transform[1] = FALSE;
scale_units[1][0] = -1;
scale_units[1][1] = -1;
} else {
+ if (pMaskPicture->transform) {
+ is_transform[1] = TRUE;
+ transform[1] = pMaskPicture->transform;
+ } else
+ is_transform[1] = FALSE;
scale_units[1][0] = pMask->drawable.width;
scale_units[1][1] = pMask->drawable.height;
}
diff-tree 848368d5d0b90e03d3ec447cb5bd39fc87aea8df (from df23624eebe938fa444c80cbedcd61919ec1aeda)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Thu Sep 28 11:15:33 2006 +0800
[PATCH] Fallback in mask picture for now
Do it later after finish wm kernel program.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index b56bf7f..f7093f2 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -417,6 +417,11 @@ ErrorF("i965 prepareComposite\n");
// i965_surf_setup(pScrn, pSrcPicture, pMaskPicture, pDstPicture,
// pSrc, pMask, pDst);
// then setup blend, and shader program
+
+ /* FIXME: fallback in pMask for now, would be enable after finish
+ wm kernel program */
+ if (pMask)
+ I830FALLBACK("No mask support yet.\n");
I965GetDestFormat(pDstPicture, &dst_format);
src_offset = exaGetPixmapOffset(pSrc);
@@ -995,68 +1000,12 @@ ErrorF("i965 prepareComposite\n");
(BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
(8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
//XXX: is this has alignment issue? and thread access problem?
-
}
ADVANCE_LP_RING();
}
- {
- /* cc states */
- /* dest buffer */
- /* urbs */
- /* binding tables */
- /* clipping */
- /* color blend (color calculator, dataport shared function)
- COLOR_CALC_STATE/SURFACE_STATE(rendertarget's color blend enable
- bit)
- Errata!!!: brw-a/b, rendertarget 'local' color blending always
- enabled! only control by global enable bit.
- surface format for blend, "Surface format table in Sampling Engine"
- XXX: if surface format not support, we should fallback.
- */
- /*
- render target should be defined in SURFACE_STATE
- o render target SURFTYPE_BUFFER? 2D? Keith has 2D set.
- o depth buffer SURFTYPE_NULL?
- color blend:
- o Errata!!: mush issue PIPE_CONTROL with Write Cache Flush
- enable set, before transite to read-write color buffer.
- o disable pre/post-blending clamping
- o enable color buffer blending enable in COLOR_CALC_STATE,(vol2, 3d rasterization 3.8)
- enable color blending enable in SURFACE_STATE.(shared,
- sampling engine 1.7)
- disable depth test
- o (we don't use BLENDFACT_SRC_ALPHA_SATURATE, so don't care
- the Errata for independent alpha blending, just use color
- blending factor for all) disable independent alpha blending
- in COLOR_CALC_STATE
- o set src/dst blend factor in COLOR_CALC_STATE
-
- */
- }
-
- /* shader program
- o use sampler shared function for texture data
- o submit result to dataport for later color blending */
- {
- /* PS program:
- o declare sampler and variables??
- o 'send' cmd to Sampling Engine to load 'src' picture
- o if (!pMask) then 'send' 'src' texture value to DataPort
- target render cache
- o else
- - 'send' cmd to SE to load 'mask' picture
- - if no alpha, force to 1 (move 1 to W element of mask)
- - if (mask->componentAlpha) then mul 'src' & 'mask', 'send'
- output to DataPort render cache
- - else mul 'src' & 'mask''s W element(alpha), 'send' output
- to Dataport render cache
- */
-
- }
-
#ifdef I830DEBUG
ErrorF("try to sync to show any errors...");
I830Sync(pScrn);
diff-tree df23624eebe938fa444c80cbedcd61919ec1aeda (from fc944859b1b9605c748162bad1c93a6303c84aae)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Thu Sep 28 11:09:52 2006 +0800
[PATCH] Fix compile, add wm header file.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/Makefile.am b/src/Makefile.am
index b0c6c92..54e5657 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -94,6 +94,7 @@ i810_drv_la_SOURCES = \
i830_xaa.c \
i830_exa_render.c \
i915_exa_render.c \
+ i965_composite_wm_nomask.h \
i965_exa_render.c
if HAVE_GEN4ASM
diff --git a/src/i965_composite_wm_nomask.h b/src/i965_composite_wm_nomask.h
new file mode 100644
index 0000000..bd99dd9
--- /dev/null
+++ b/src/i965_composite_wm_nomask.h
@@ -0,0 +1,68 @@
+ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
+ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
+ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
+ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
+ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
+ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
+ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
+ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
+ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
+ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
+ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
+ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
+ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
+ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
+ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
+ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
+ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
+ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
+ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
+ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
+ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
+ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
+ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
+ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
+ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 },
+ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 },
+ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c },
+ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
+ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 },
+ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 },
+ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c },
+ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c },
+ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 },
+ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
+ { 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
+ { 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
+ { 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
+ { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+ { 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
+ { 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
+ { 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
+ { 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
+ { 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
+ { 0x00800031, 0x20001d3c, 0x008d0000, 0x85a04800 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+ { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index dfa9a04..b56bf7f 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -68,6 +68,9 @@ extern void
I965EXAComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int width, int height);
+static void I965GetBlendCntl(int op, PicturePtr pMask, CARD32 dst_format,
+ CARD32 *sblend, CARD32 *dblend);
+
extern float scale_units[2][2];
extern Bool is_transform[2];
extern PictTransform *transform[2];
@@ -90,31 +93,31 @@ struct formatinfo {
/* defined in brw_defines.h */
static struct blendinfo I965BlendOp[] = {
/* Clear */
- {0, 0, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_ZERO},
+ {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ZERO},
/* Src */
- {0, 0, BRW_BLENDFACT_ONE, BRW_BLENDFACT_ZERO},
+ {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ZERO},
/* Dst */
- {0, 0, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_ONE},
+ {0, 0, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_ONE},
/* Over */
- {0, 1, BRW_BLENDFACT_ONE, BRW_BLENDFACT_INV_SRC_ALPHA},
+ {0, 1, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */
- {1, 0, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_ONE},
+ {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ONE},
/* In */
- {1, 0, BRW_BLENDFACT_DST_ALPHA, BRW_BLENDFACT_ZERO},
+ {1, 0, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
/* InReverse */
- {0, 1, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_SRC_ALPHA},
+ {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_SRC_ALPHA},
/* Out */
- {1, 0, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_ZERO},
+ {1, 0, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_ZERO},
/* OutReverse */
- {0, 1, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_INV_SRC_ALPHA},
+ {0, 1, BRW_BLENDFACTOR_ZERO, BRW_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */
- {1, 1, BRW_BLENDFACT_DST_ALPHA, BRW_BLENDFACT_INV_SRC_ALPHA},
+ {1, 1, BRW_BLENDFACTOR_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */
- {1, 1, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_SRC_ALPHA},
+ {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_SRC_ALPHA},
/* Xor */
- {1, 1, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_INV_SRC_ALPHA},
+ {1, 1, BRW_BLENDFACTOR_INV_DST_ALPHA, BRW_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */
- {0, 0, BRW_BLENDFACT_ONE, BRW_BLENDFACT_ONE},
+ {0, 0, BRW_BLENDFACTOR_ONE, BRW_BLENDFACTOR_ONE},
};
/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 1.7.2*/
@@ -124,8 +127,8 @@ static struct formatinfo I965TexFormats[
{PICT_a8b8g8r8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM },
{PICT_x8b8g8r8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM },
{PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM },
- {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G6R5A1_UNORM },
- {PICT_x1r5g5b5, BRW_SURFACEFORMAT_B5G6R5X1_UNORM },
+ {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G5R5A1_UNORM },
+ {PICT_x1r5g5b5, BRW_SURFACEFORMAT_B5G5R5X1_UNORM },
{PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
};
@@ -140,10 +143,10 @@ static void I965GetBlendCntl(int op, Pic
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0 && I965BlendOp[op].dst_alpha) {
- if (*sblend == BRW_BLENDFACT_DST_ALPHA)
- *sblend = BRW_BLENDFACT_ONE;
- else if (*sblend == BRW_BLENDFACT_INV_DST_ALPHA)
- *sblend = BRW_BLENDFACT_ZERO;
+ if (*sblend == BRW_BLENDFACTOR_DST_ALPHA)
+ *sblend = BRW_BLENDFACTOR_ONE;
+ else if (*sblend == BRW_BLENDFACTOR_INV_DST_ALPHA)
+ *sblend = BRW_BLENDFACTOR_ZERO;
}
/* If the source alpha is being used, then we should only be in a case where
@@ -151,10 +154,10 @@ static void I965GetBlendCntl(int op, Pic
* channels multiplied by the source picture's alpha.
*/
if (pMask && pMask->componentAlpha && I965BlendOp[op].src_alpha) {
- if (*dblend == BRW_BLENDFACT_SRC_ALPHA) {
- *dblend = BRW_BLENDFACT_SRC_COLR;
- } else if (*dblend == BRW_BLENDFACT_INV_SRC_ALPHA) {
- *dblend = BRW_BLENDFACT_INV_SRC_COLR;
+ if (*dblend == BRW_BLENDFACTOR_SRC_ALPHA) {
+ *dblend = BRW_BLENDFACTOR_SRC_COLOR;
+ } else if (*dblend == BRW_BLENDFACTOR_INV_SRC_ALPHA) {
+ *dblend = BRW_BLENDFACTOR_INV_SRC_COLOR;
}
}
@@ -173,10 +176,10 @@ static Bool I965GetDestFormat(PicturePtr
*dst_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
break;
case PICT_a1r5g5b5:
- *dst_format = BRW_SURFACEFORMAT_B5G6R5A1_UNORM;
+ *dst_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
break;
case PICT_x1r5g5b5:
- *dst_format = BRW_SURFACEFORMAT_B5G6R5X1_UNORM;
+ *dst_format = BRW_SURFACEFORMAT_B5G5R5X1_UNORM;
break;
/* COLR_BUF_8BIT is special for YUV surfaces. While we may end up being
* able to use it depending on how the hardware implements it, disable it
@@ -250,7 +253,7 @@ I965EXACheckComposite(int op, PicturePtr
* source value that we get to blend with.
*/
if (I965BlendOp[op].src_alpha &&
- (I965BlendOp[op].src_blend != BRW_BLENDFACT_ZERO))
+ (I965BlendOp[op].src_blend != BRW_BLENDFACTOR_ZERO))
I830FALLBACK("Component alpha not supported with source "
"alpha and source value blending.\n");
/* XXX: fallback now for mask with componentAlpha */
@@ -297,7 +300,7 @@ struct brw_instruction *sip_kernel;
CARD32 *binding_table;
int binding_table_entries;
-int dest_surf_offset, src_surf_offset;
+int dest_surf_offset, src_surf_offset, mask_surf_offset;
int src_sampler_offset, mask_sampler_offset,vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
@@ -308,7 +311,7 @@ int state_base_offset;
float *vb;
int vb_size = 4 * 4 ; /* 4 DWORDS per vertex, 4 vertices for TRIFAN*/
-int src_blend, dst_blend;
+CARD32 src_blend, dst_blend;
static const CARD32 sip_kernel_static[][4] = {
/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */
@@ -380,6 +383,8 @@ static const CARD32 sf_kernel_static[][4
};
/* ps kernels */
+#define PS_KERNEL_NUM_GRF 32
+#define PS_MAX_THREADS 32
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
#include "i965_composite_wm_nomask.h"
@@ -387,12 +392,12 @@ static const CARD32 ps_kernel_static_nom
/* 2: mask with componentAlpha, src * mask color, XXX: later */
static const CARD32 ps_kernel_static_maskca [][4] = {
- #include "i965_composite_wm_maskca.h"
+/*#include "i965_composite_wm_maskca.h" */
};
/* 3: mask without componentAlpha, src * mask alpha */
static const CARD32 ps_kernel_static_masknoca [][4] = {
- #include "i965_composite_wm_masknoca.h"
+/*#include "i965_composite_wm_masknoca.h" */
};
Bool
@@ -403,9 +408,8 @@ I965EXAPrepareComposite(int op, PictureP
ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
I830Ptr pI830 = I830PTR(pScrn);
CARD32 src_offset, src_pitch;
- CARD32 mask_offset, mask_pitch;
+ CARD32 mask_offset = 0, mask_pitch = 0;
CARD32 dst_format, dst_offset, dst_pitch;
- CARD32 blendctl;
ErrorF("i965 prepareComposite\n");
@@ -437,7 +441,6 @@ ErrorF("i965 prepareComposite\n");
scale_units[1][1] = pMask->drawable.height;
}
-/* FIXME */
/* setup 3d pipeline state */
binding_table_entries = 2; /* default no mask */
@@ -602,7 +605,7 @@ ErrorF("i965 prepareComposite\n");
// cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
// cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
- I965GetBlendCntl(op, pMask, pDstPicture->format,
+ I965GetBlendCntl(op, pMaskPicture, pDstPicture->format,
&src_blend, &dst_blend);
cc_state->cc6.src_blend_factor = src_blend;
cc_state->cc6.dest_blend_factor = dst_blend;
@@ -703,7 +706,7 @@ ErrorF("i965 prepareComposite\n");
/* PS kernel use this sampler */
memset(src_sampler_state, 0, sizeof(*src_sampler_state));
- src_sampler_state->ss0.lod_peclamp = 1; /* GL mode */
+ src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
switch(pSrcPicture->filter) {
case PictFilterNearest:
src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
@@ -733,7 +736,7 @@ ErrorF("i965 prepareComposite\n");
if (pMask) {
memset(mask_sampler_state, 0, sizeof(*mask_sampler_state));
- mask_sampler_state->ss0.lod_peclamp = 1; /* GL mode */
+ mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
switch(pMaskPicture->filter) {
case PictFilterNearest:
mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
@@ -1065,6 +1068,8 @@ void
I965EXAComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int w, int h)
{
+ ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+ I830Ptr pI830 = I830PTR(pScrn);
int srcXend, srcYend, maskXend, maskYend;
PictVector v;
int pMask = 1, i = 0;
diff-tree fc944859b1b9605c748162bad1c93a6303c84aae (from acdc2da77b445e9347a4c6e53e35c81763cbb0b8)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Thu Sep 28 10:36:00 2006 +0800
[PATCH] Add simplest wm kernel program for no mask picture composite
This is a try to use new gen4asm language, and will finish
composite program for mask picture with or without CA case later.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/Makefile.am b/src/Makefile.am
index 8285406..b0c6c92 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -101,6 +101,8 @@ sf_prog.h: packed_yuv_sf.g4a
intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a
wm_prog.h: packed_yuv_wm.g4a
intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a
+i965_composite_wm_nomask.h: i965_composite_wm_nomask.g4a
+ intel-gen4asm -o i965_composite_wm_nomask.h i965_composite_wm_nomask.g4a
endif
if DRI
diff --git a/src/i965_composite_wm_nomask.g4a b/src/i965_composite_wm_nomask.g4a
new file mode 100644
index 0000000..8791631
--- /dev/null
+++ b/src/i965_composite_wm_nomask.g4a
@@ -0,0 +1,139 @@
+/*
+ * This's for exa composite operation in no mask picture case.
+ * The simplest case is just sending what src picture has to dst picture
+ */
+
+/* I think this should be same as in g4a program for texture video,
+ as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
+
+/* The initial payload of the thread is always g0.
+ * WM_URB (incoming URB entries) is g3
+ * X0_R is g4
+ * X1_R is g5
+ * Y0_R is g6
+ * Y1_R is g7
+ */
+
+ /* Set up ss0.x coordinates*/
+mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
+mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
+add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
+ /* Set up ss0.y coordinates */
+mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
+mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
+add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
+add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
+ /* set up ss1.x coordinates */
+mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
+mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
+add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
+ /* set up ss1.y coordinates */
+mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
+mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
+add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
+add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.x coordinates */
+mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
+mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
+add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
+ /* Set up ss2.y coordinates */
+mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
+mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
+add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
+add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.x coordinates */
+mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
+mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
+add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
+ /* Set up ss3.y coordinates */
+mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
+mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
+add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
+add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
+
+ /* Now, map these screen space coordinates into texture coordinates. */
+ /* subtract screen-space X origin of vertex 0. */
+add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
+ /* scale by texture X increment */
+mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 };
+mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 };
+ /* add in texture X offset */
+add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 };
+add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 };
+ /* subtract screen-space Y origin of vertex 0. */
+add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
+ /* scale by texture Y increment */
+mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 };
+mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 };
+ /* add in texture Y offset */
+add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 };
+add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 };
+
+/* prepare sampler read back gX register, which would be written back to output */
+
+/* use simd16 sampler, param 0 is u, param 1 is v. */
+/* 'payload' loading, assuming tex coord start from g4 */
+mov (8) m1<1>F g4<8,8,1>F { align1 };
+mov (8) m2<1>F g5<8,8,1>F { align1 }; /* param 0 u in m1, m2 */
+mov (8) m3<1>F g6<8,8,1>F { align1 };
+mov (8) m4<1>F g7<8,8,1>F { align1 }; /* param 1 v in m3, m4 */
+
+/* m0 will be copied with g0, as it contains send desc */
+/* emit sampler 'send' cmd */
+send (16) 0 /* msg reg index */
+ g12<1>UW /* readback */
+ g0<8,8,1>UW /* copy to msg start reg*/
+ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
+ /* here(src->dst) we should use src_sampler and src_surface */
+ mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
+
+/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
+
+/* m0, m1 are all direct passed by PS thread payload */
+mov (8) m1<1>F g1<8,8,1>F { align1 };
+
+/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
+/* g12 -> m2
+ g13 -> m6
+ g14 -> m3
+ g15 -> m7
+ g16 -> m4
+ g17 -> m8
+ g18 -> m5
+ g19 -> m9
+*/
+mov (8) m2<1>F g12<8,8,1>F { align1 };
+mov (8) m3<1>F g14<8,8,1>F { align1 };
+mov (8) m4<1>F g16<8,8,1>F { align1 };
+mov (8) m5<1>F g18<8,8,1>F { align1 };
+mov (8) m6<1>F g13<8,8,1>F { align1 };
+mov (8) m7<1>F g15<8,8,1>F { align1 };
+mov (8) m8<1>F g17<8,8,1>F { align1 };
+mov (8) m9<1>F g19<8,8,1>F { align1 };
+
+/* write */
+send (16) 0 null g0<8,8,1>UW write (
+ 0, /* binding_table */
+ 8, /* pixel scordboard clear, msg type simd16 single source */
+ 4, /* render target write */
+ 0 /* no write commit message */
+ )
+ mlen 10
+ rlen 0
+ { align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
diff-tree acdc2da77b445e9347a4c6e53e35c81763cbb0b8 (from 926d7fb09aaaabf050949ce7c6127c68441c8801)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Sep 27 16:48:43 2006 +0800
[PATCH] Add mask sampler state
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 942f0eb..dfa9a04 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -282,7 +282,7 @@ struct brw_surface_state *dest_surf_stat
struct brw_surface_state *src_surf_state;
struct brw_surface_state *mask_surf_state;
struct brw_sampler_state *src_sampler_state;
-struct brw_sampler_state *mask_sampler_state; // could just use one sampler?
+struct brw_sampler_state *mask_sampler_state;
struct brw_vs_unit_state *vs_state;
struct brw_sf_unit_state *sf_state;
@@ -297,7 +297,8 @@ struct brw_instruction *sip_kernel;
CARD32 *binding_table;
int binding_table_entries;
-int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset;
+int dest_surf_offset, src_surf_offset;
+int src_sampler_offset, mask_sampler_offset,vs_offset;
int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
int binding_table_offset;
@@ -381,17 +382,17 @@ static const CARD32 sf_kernel_static[][4
/* ps kernels */
/* 1: no mask */
static const CARD32 ps_kernel_static_nomask [][4] = {
- #include "i965_composite_ps_nomask.h"
+ #include "i965_composite_wm_nomask.h"
};
/* 2: mask with componentAlpha, src * mask color, XXX: later */
static const CARD32 ps_kernel_static_maskca [][4] = {
- #include "i965_composite_ps_maskca.h"
+ #include "i965_composite_wm_maskca.h"
};
/* 3: mask without componentAlpha, src * mask alpha */
static const CARD32 ps_kernel_static_masknoca [][4] = {
- #include "i965_composite_ps_masknoca.h"
+ #include "i965_composite_wm_masknoca.h"
};
Bool
@@ -478,11 +479,14 @@ ErrorF("i965 prepareComposite\n");
cc_viewport_offset = ALIGN(next_offset, 32);
next_offset = cc_viewport_offset + sizeof(*cc_viewport);
- // : fix for texture sampler
- // XXX: -> use only one sampler
+ // for texture sampler
src_sampler_offset = ALIGN(next_offset, 32);
next_offset = src_sampler_offset + sizeof(*src_sampler_state);
+ if (pMask) {
+ mask_sampler_offset = ALIGN(next_offset, 32);
+ next_offset = mask_sampler_offset + sizeof(*mask_sampler_state);
+ }
/* Align VB to native size of elements, for safety */
vb_offset = ALIGN(next_offset, 8);
next_offset = vb_offset + vb_size;
@@ -536,6 +540,9 @@ ErrorF("i965 prepareComposite\n");
mask_surf_state = (void *)(state_base + mask_surf_offset);
src_sampler_state = (void *)(state_base + src_sampler_offset);
+ if (pMask)
+ mask_sampler_state = (void *)(state_base + mask_sampler_offset);
+
binding_table = (void *)(state_base + binding_table_offset);
vb = (void *)(state_base + vb_offset);
@@ -724,6 +731,37 @@ ErrorF("i965 prepareComposite\n");
and just a single texel tex map, with R32G32B32A32_FLOAT */
src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
+ if (pMask) {
+ memset(mask_sampler_state, 0, sizeof(*mask_sampler_state));
+ mask_sampler_state->ss0.lod_peclamp = 1; /* GL mode */
+ switch(pMaskPicture->filter) {
+ case PictFilterNearest:
+ mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PictFilterBilinear:
+ mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter);
+ }
+
+ if (!pMaskPicture->repeat) {
+ /* XXX: clamp_border and set border to 0 */
+ mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ } else {
+ mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
+ /* XXX: ss2 has border color pointer, which should be in general state address,
+ and just a single texel tex map, with R32G32B32A32_FLOAT */
+ mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
+ }
+
/* Set up the vertex shader to be disabled (passthrough) */
memset(vs_state, 0, sizeof(*vs_state));
// XXX: vs URB should be defined for VF vertex URB store. done already?
@@ -783,26 +821,26 @@ ErrorF("i965 prepareComposite\n");
wm_state->thread0.grf_reg_count = ((PS_KERNEL_NUM_GRF & ~15) / 16);
wm_state->thread1.single_program_flow = 1;
if (!pMask)
- wm_state->thread1.binding_table_entry_count = 2; /* tex and fb */
+ wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
else
- wm_state->thread1.binding_table_entry_count = 3; /* tex and fb */
+ wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
wm_state->thread2.scratch_space_base_pointer = 0;
wm_state->thread2.per_thread_scratch_space = 0;
// XXX: urb allocation
- wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
- // wm kernel use urb from 3, see wm_program in compiler module
- wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
wm_state->thread3.const_urb_entry_read_length = 0;
wm_state->thread3.const_urb_entry_read_offset = 0;
+ wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
wm_state->thread3.urb_entry_read_offset = 0;
+ // wm kernel use urb from 3, see wm_program in compiler module
+ wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
- wm_state->wm4.stats_enable = 1;
- wm_state->wm4.sampler_state_pointer = (state_base_offset + src_sampler_offset) >> 5;
+ wm_state->wm4.stats_enable = 1; /* statistic */
+ wm_state->wm4.sampler_state_pointer = (state_base_offset + src_sampler_offset) >> 5;
wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
wm_state->wm5.thread_dispatch_enable = 1;
- //just use 16-pixel dispatch, don't need to change kernel start point
+ //just use 16-pixel dispatch (4 subspans), don't need to change kernel start point
wm_state->wm5.enable_16_pix = 1;
wm_state->wm5.enable_8_pix = 0;
wm_state->wm5.early_depth_test = 1;
diff-tree 926d7fb09aaaabf050949ce7c6127c68441c8801 (from ed73bbaf5c2e9d555c884037a249cf03e7f60fa0)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Wed Sep 27 13:54:14 2006 +0800
[PATCH] change some src sampler states
sampler for mask should also be set up, and fix
default border texel.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
index 24e0ba2..942f0eb 100644
--- a/src/i965_exa_render.c
+++ b/src/i965_exa_render.c
@@ -546,8 +546,8 @@ ErrorF("i965 prepareComposite\n");
#define URB_CS_ENTRY_SIZE 0
#define URB_CS_ENTRIES 0
-#define URB_VS_ENTRY_SIZE 1 // XXX: VUE row num? double check, 1 row is enough
-#define URB_VS_ENTRIES 8
+#define URB_VS_ENTRY_SIZE 1 // each 512-bit row
+#define URB_VS_ENTRIES 8 // we needs at least 8 entries
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
@@ -630,6 +630,7 @@ ErrorF("i965 prepareComposite\n");
dest_surf_state->ss2.mip_count = 0;
dest_surf_state->ss2.render_target_rotation = 0;
dest_surf_state->ss3.pitch = dst_pitch - 1;
+ // tiled surface?
/* Set up the source surface state buffer */
memset(src_surf_state, 0, sizeof(*src_surf_state));
@@ -695,19 +696,33 @@ ErrorF("i965 prepareComposite\n");
/* PS kernel use this sampler */
memset(src_sampler_state, 0, sizeof(*src_sampler_state));
- src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state->ss0.lod_peclamp = 1; /* GL mode */
+ switch(pSrcPicture->filter) {
+ case PictFilterNearest:
+ src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+ src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+ break;
+ case PictFilterBilinear:
+ src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+ break;
+ default:
+ I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
+ }
- /* XXX: fix for repeat */
if (!pSrcPicture->repeat) {
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; // XXX: clamp_border and set border to 0?
+ /* XXX: clamp_border and set border to 0 */
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
} else {
- src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; // XXX: clamp_border and set border to 0?
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
}
+ /* XXX: ss2 has border color pointer, which should be in general state address,
+ and just a single texel tex map, with R32G32B32A32_FLOAT */
+ src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
/* Set up the vertex shader to be disabled (passthrough) */
memset(vs_state, 0, sizeof(*vs_state));
diff-tree ed73bbaf5c2e9d555c884037a249cf03e7f60fa0 (from 5a793b0dcf2d5de408b55073858fcfba6d99f994)
Author: Wang Zhenyu <zhenyu.z.wang at intel.com>
Date: Mon Sep 25 14:35:51 2006 +0800
[PATCH] Add file for i965 exa composite
This does not include ps program, which will be added
in g4a form.
Signed-off-by: Keith Packard <keithp at neko.keithp.com>
diff --git a/src/Makefile.am b/src/Makefile.am
index 5309eea..8285406 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -93,7 +93,8 @@ i810_drv_la_SOURCES = \
i830_exa.c \
i830_xaa.c \
i830_exa_render.c \
- i915_exa_render.c
+ i915_exa_render.c \
+ i965_exa_render.c
if HAVE_GEN4ASM
sf_prog.h: packed_yuv_sf.g4a
diff --git a/src/i830_exa.c b/src/i830_exa.c
index 9356c79..c5b91b0 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -121,6 +121,11 @@ extern Bool I915EXACheckComposite(int, P
extern Bool I915EXAPrepareComposite(int, PicturePtr, PicturePtr, PicturePtr,
PixmapPtr, PixmapPtr, PixmapPtr);
+extern Bool I965EXACheckComposite(int, PicturePtr, PicturePtr, PicturePtr);
+extern Bool I965EXAPrepareComposite(int, PicturePtr, PicturePtr, PicturePtr,
+ PixmapPtr, PixmapPtr, PixmapPtr);
+extern void I965EXAComposite(PixmapPtr pDst, int srcX, int srcY, int maskX,
+ int maskY, int dstX, int dstY, int width, int height);
/**
* I830EXASync - wait for a command to finish
* @pScreen: current screen
@@ -419,6 +424,8 @@ IntelEXADoneComposite(PixmapPtr pDst)
I830Sync(pScrn);
#endif
}
+
+#define BRW_LINEAR_EXTRA (32*1024)
/*
* TODO:
* - Dual head?
@@ -441,7 +448,11 @@ I830EXAInit(ScreenPtr pScreen)
pI830->EXADriverPtr->exa_minor = 0;
pI830->EXADriverPtr->memoryBase = pI830->FbBase;
pI830->EXADriverPtr->offScreenBase = pI830->Offscreen.Start;
- pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
+ if (IS_I965G(pI830))
+ pI830->EXADriverPtr->memorySize = pI830->Offscreen.End -
+ BRW_LINEAR_EXTRA; /* BRW needs state buffer*/
+ else
+ pI830->EXADriverPtr->memorySize = pI830->Offscreen.End;
DPRINTF(PFX, "EXA Mem: memoryBase 0x%x, end 0x%x, offscreen base 0x%x, memorySize 0x%x\n",
pI830->EXADriverPtr->memoryBase,
@@ -492,6 +503,11 @@ I830EXAInit(ScreenPtr pScreen)
pI830->EXADriverPtr->PrepareComposite = I830EXAPrepareComposite;
pI830->EXADriverPtr->Composite = IntelEXAComposite;
pI830->EXADriverPtr->DoneComposite = IntelEXADoneComposite;
+ } else if (IS_I965G(pI830)) {
+ pI830->EXADriverPtr->CheckComposite = I965EXACheckComposite;
+ pI830->EXADriverPtr->PrepareComposite = I965EXAPrepareComposite;
+ pI830->EXADriverPtr->Composite = I965EXAComposite;
+ pI830->EXADriverPtr->DoneComposite = IntelEXADoneComposite;
}
if(!exaDriverInit(pScreen, pI830->EXADriverPtr)) {
diff --git a/src/i965_exa_render.c b/src/i965_exa_render.c
new file mode 100644
index 0000000..24e0ba2
--- /dev/null
+++ b/src/i965_exa_render.c
@@ -0,0 +1,1124 @@
+/*
+ * Copyright © 2006 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Wang Zhenyu <zhenyu.z.wang at intel.com>
+ * Eric Anholt <eric at anholt.net>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+#include "i830.h"
+#include "i915_reg.h"
+
+/* bring in brw structs */
+#include "brw_defines.h"
+#include "brw_structs.h"
+
+#ifdef I830DEBUG
+#define DEBUG_I830FALLBACK 1
+#endif
+
+#ifdef DEBUG_I830FALLBACK
+#define I830FALLBACK(s, arg...) \
+do { \
+ DPRINTF(PFX, "EXA fallback: " s "\n", ##arg); \
+ return FALSE; \
+} while(0)
+#else
+#define I830FALLBACK(s, arg...) \
+do { \
+ return FALSE; \
+} while(0)
+#endif
+
+extern Bool
+I965EXACheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+ PicturePtr pDstPicture);
+
+extern Bool
+I965EXAPrepareComposite(int op, PicturePtr pSrcPicture,
+ PicturePtr pMaskPicture, PicturePtr pDstPicture,
+ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst);
+
+extern void
+I965EXAComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
+ int dstX, int dstY, int width, int height);
+
+extern float scale_units[2][2];
+extern Bool is_transform[2];
+extern PictTransform *transform[2];
+
+struct blendinfo {
+ Bool dst_alpha;
+ Bool src_alpha;
+ CARD32 src_blend;
+ CARD32 dst_blend;
+};
+
+struct formatinfo {
+ int fmt;
+ CARD32 card_fmt;
+};
+
+// refer vol2, 3d rasterization 3.8.1
+
+/* XXX: bad!bad! broadwater has different blend factor definition */
+/* defined in brw_defines.h */
+static struct blendinfo I965BlendOp[] = {
+ /* Clear */
+ {0, 0, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_ZERO},
+ /* Src */
+ {0, 0, BRW_BLENDFACT_ONE, BRW_BLENDFACT_ZERO},
+ /* Dst */
+ {0, 0, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_ONE},
+ /* Over */
+ {0, 1, BRW_BLENDFACT_ONE, BRW_BLENDFACT_INV_SRC_ALPHA},
+ /* OverReverse */
+ {1, 0, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_ONE},
+ /* In */
+ {1, 0, BRW_BLENDFACT_DST_ALPHA, BRW_BLENDFACT_ZERO},
+ /* InReverse */
+ {0, 1, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_SRC_ALPHA},
+ /* Out */
+ {1, 0, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_ZERO},
+ /* OutReverse */
+ {0, 1, BRW_BLENDFACT_ZERO, BRW_BLENDFACT_INV_SRC_ALPHA},
+ /* Atop */
+ {1, 1, BRW_BLENDFACT_DST_ALPHA, BRW_BLENDFACT_INV_SRC_ALPHA},
+ /* AtopReverse */
+ {1, 1, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_SRC_ALPHA},
+ /* Xor */
+ {1, 1, BRW_BLENDFACT_INV_DST_ALPHA, BRW_BLENDFACT_INV_SRC_ALPHA},
+ /* Add */
+ {0, 0, BRW_BLENDFACT_ONE, BRW_BLENDFACT_ONE},
+};
+
+/* FIXME: surface format defined in brw_defines.h, shared Sampling engine 1.7.2*/
+static struct formatinfo I965TexFormats[] = {
+ {PICT_a8r8g8b8, BRW_SURFACEFORMAT_R8G8B8A8_UNORM },
+ {PICT_x8r8g8b8, BRW_SURFACEFORMAT_R8G8B8X8_UNORM },
+ {PICT_a8b8g8r8, BRW_SURFACEFORMAT_B8G8R8A8_UNORM },
+ {PICT_x8b8g8r8, BRW_SURFACEFORMAT_B8G8R8X8_UNORM },
+ {PICT_r5g6b5, BRW_SURFACEFORMAT_B5G6R5_UNORM },
+ {PICT_a1r5g5b5, BRW_SURFACEFORMAT_B5G6R5A1_UNORM },
+ {PICT_x1r5g5b5, BRW_SURFACEFORMAT_B5G6R5X1_UNORM },
+ {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM },
+};
+
+static void I965GetBlendCntl(int op, PicturePtr pMask, CARD32 dst_format,
+ CARD32 *sblend, CARD32 *dblend)
+{
+
+ *sblend = I965BlendOp[op].src_blend;
+ *dblend = I965BlendOp[op].dst_blend;
+
+ /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+ * it as always 1.
+ */
+ if (PICT_FORMAT_A(dst_format) == 0 && I965BlendOp[op].dst_alpha) {
+ if (*sblend == BRW_BLENDFACT_DST_ALPHA)
+ *sblend = BRW_BLENDFACT_ONE;
+ else if (*sblend == BRW_BLENDFACT_INV_DST_ALPHA)
+ *sblend = BRW_BLENDFACT_ZERO;
+ }
+
+ /* If the source alpha is being used, then we should only be in a case where
+ * the source blend factor is 0, and the source blend value is the mask
+ * channels multiplied by the source picture's alpha.
+ */
+ if (pMask && pMask->componentAlpha && I965BlendOp[op].src_alpha) {
+ if (*dblend == BRW_BLENDFACT_SRC_ALPHA) {
+ *dblend = BRW_BLENDFACT_SRC_COLR;
+ } else if (*dblend == BRW_BLENDFACT_INV_SRC_ALPHA) {
+ *dblend = BRW_BLENDFACT_INV_SRC_COLR;
+ }
+ }
+
+}
+
+
+/* FIXME */
+static Bool I965GetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format)
+{
+ switch (pDstPicture->format) {
+ case PICT_a8r8g8b8:
+ case PICT_x8r8g8b8:
+ *dst_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ break;
+ case PICT_r5g6b5:
+ *dst_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ break;
+ case PICT_a1r5g5b5:
+ *dst_format = BRW_SURFACEFORMAT_B5G6R5A1_UNORM;
+ break;
+ case PICT_x1r5g5b5:
+ *dst_format = BRW_SURFACEFORMAT_B5G6R5X1_UNORM;
+ break;
+ /* COLR_BUF_8BIT is special for YUV surfaces. While we may end up being
+ * able to use it depending on how the hardware implements it, disable it
+ * for now while we don't know what exactly it does (what channel does it
+ * read from?
+ */
+ /*
+ case PICT_a8:
+ *dst_format = COLR_BUF_8BIT;
+ break;
+ */
+ case PICT_a4r4g4b4:
+ case PICT_x4r4g4b4:
+ *dst_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+ break;
+ default:
+ I830FALLBACK("Unsupported dest format 0x%x\n",
+ (int)pDstPicture->format);
+ }
+
+ return TRUE;
+}
+
+static Bool I965CheckCompositeTexture(PicturePtr pPict, int unit)
+{
+ int w = pPict->pDrawable->width;
+ int h = pPict->pDrawable->height;
+ int i;
+
+ if ((w > 0x7ff) || (h > 0x7ff))
+ I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
+
+ for (i = 0; i < sizeof(I965TexFormats) / sizeof(I965TexFormats[0]); i++)
+ {
+ if (I965TexFormats[i].fmt == pPict->format)
+ break;
+ }
+ if (i == sizeof(I965TexFormats) / sizeof(I965TexFormats[0]))
+ I830FALLBACK("Unsupported picture format 0x%x\n",
+ (int)pPict->format);
+
+ /* XXX: fallback when repeat? */
+ if (pPict->repeat && pPict->repeatType != RepeatNormal)
+ I830FALLBACK("extended repeat (%d) not supported\n",
+ pPict->repeatType);
+
+ if (pPict->filter != PictFilterNearest &&
+ pPict->filter != PictFilterBilinear)
+ I830FALLBACK("Unsupported filter 0x%x\n", pPict->filter);
+
+ return TRUE;
+}
+
+Bool
+I965EXACheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+ PicturePtr pDstPicture)
+{
+ /* check op*/
+ /* check op with mask's componentAlpha*/
+ /* check textures */
+ /* check dst buffer format */
+ CARD32 tmp1;
+
+ /* Check for unsupported compositing operations. */
+ if (op >= sizeof(I965BlendOp) / sizeof(I965BlendOp[0]))
+ I830FALLBACK("Unsupported Composite op 0x%x\n", op);
+
+ if (pMaskPicture != NULL && pMaskPicture->componentAlpha) {
+ /* Check if it's component alpha that relies on a source alpha and on
+ * the source value. We can only get one of those into the single
+ * source value that we get to blend with.
+ */
+ if (I965BlendOp[op].src_alpha &&
+ (I965BlendOp[op].src_blend != BRW_BLENDFACT_ZERO))
+ I830FALLBACK("Component alpha not supported with source "
+ "alpha and source value blending.\n");
+ /* XXX: fallback now for mask with componentAlpha */
+ I830FALLBACK("mask componentAlpha not ready.\n");
+ }
+
+ if (!I965CheckCompositeTexture(pSrcPicture, 0))
+ I830FALLBACK("Check Src picture texture\n");
+ if (pMaskPicture != NULL && !I965CheckCompositeTexture(pMaskPicture, 1))
+ I830FALLBACK("Check Mask picture texture\n");
+
+ if (!I965GetDestFormat(pDstPicture, &tmp1))
+ I830FALLBACK("Get Color buffer format\n");
+
+ return TRUE;
+
+}
+
+#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+
+int urb_vs_start, urb_vs_size;
+int urb_gs_start, urb_gs_size;
+int urb_clip_start, urb_clip_size;
+int urb_sf_start, urb_sf_size;
+int urb_cs_start, urb_cs_size;
+
+struct brw_surface_state *dest_surf_state;
+struct brw_surface_state *src_surf_state;
+struct brw_surface_state *mask_surf_state;
+struct brw_sampler_state *src_sampler_state;
+struct brw_sampler_state *mask_sampler_state; // could just use one sampler?
+
+struct brw_vs_unit_state *vs_state;
+struct brw_sf_unit_state *sf_state;
+struct brw_wm_unit_state *wm_state;
+struct brw_cc_unit_state *cc_state;
+struct brw_cc_viewport *cc_viewport;
+
+struct brw_instruction *sf_kernel;
+struct brw_instruction *ps_kernel;
+struct brw_instruction *sip_kernel;
+
+CARD32 *binding_table;
+int binding_table_entries;
+
+int dest_surf_offset, src_surf_offset, src_sampler_offset, vs_offset;
+int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
+int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
+int binding_table_offset;
+int next_offset, total_state_size;
+char *state_base;
+int state_base_offset;
+float *vb;
+int vb_size = 4 * 4 ; /* 4 DWORDS per vertex, 4 vertices for TRIFAN*/
+
+int src_blend, dst_blend;
+
+static const CARD32 sip_kernel_static[][4] = {
+/* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */
+ { 0x00000030, 0x20000108, 0x00001220, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+};
+
+/*
+ * this program computes dA/dx and dA/dy for the texture coordinates along
+ * with the base texture coordinate. It was extracted from the Mesa driver
+ */
+
+#define SF_KERNEL_NUM_GRF 10
+#define SF_KERNEL_NUM_URB 8
+#define SF_MAX_THREADS 4
+
+static const CARD32 sf_kernel_static[][4] = {
+/* send 0 (1) g6<1>F g1.12<0,1,0>F math mlen 1 rlen 1 { align1 + } */
+ { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
+/* send 0 (1) g6.4<1>F g1.20<0,1,0>F math mlen 1 rlen 1 { align1 + } */
+ { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
+/* add (8) g7<1>F g4<8,8,1>F g3<8,8,1>F { align1 + } */
+ { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
+/* mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 + } */
+ { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
+/* mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 + } */
+ { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
+/* mov (8) m1<1>F g7<0,1,0>F { align1 + } */
+ { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
+/* mov (8) m2<1>F g7.4<0,1,0>F { align1 + } */
+ { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
+/* mov (8) m3<1>F g3<8,8,1>F { align1 + } */
+ { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
+/* send 0 (8) a0<1>F g0<8,8,1>F urb mlen 4 rlen 0 write +0 transpose used complete EOT{ align1 + } */
+ { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+/* nop (4) g0<1>UD { align1 + } */
+ { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 },
+};
+
+/* ps kernels */
+/* 1: no mask */
+static const CARD32 ps_kernel_static_nomask [][4] = {
+ #include "i965_composite_ps_nomask.h"
+};
+
+/* 2: mask with componentAlpha, src * mask color, XXX: later */
+static const CARD32 ps_kernel_static_maskca [][4] = {
+ #include "i965_composite_ps_maskca.h"
+};
+
+/* 3: mask without componentAlpha, src * mask alpha */
+static const CARD32 ps_kernel_static_masknoca [][4] = {
+ #include "i965_composite_ps_masknoca.h"
+};
+
+Bool
+I965EXAPrepareComposite(int op, PicturePtr pSrcPicture,
+ PicturePtr pMaskPicture, PicturePtr pDstPicture,
+ PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+ ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
+ I830Ptr pI830 = I830PTR(pScrn);
+ CARD32 src_offset, src_pitch;
+ CARD32 mask_offset, mask_pitch;
+ CARD32 dst_format, dst_offset, dst_pitch;
+ CARD32 blendctl;
+
+ErrorF("i965 prepareComposite\n");
+
+// i965_3d_pipeline_setup(pScrn);
+// i965_surf_setup(pScrn, pSrcPicture, pMaskPicture, pDstPicture,
+// pSrc, pMask, pDst);
+ // then setup blend, and shader program
+
+ I965GetDestFormat(pDstPicture, &dst_format);
+ src_offset = exaGetPixmapOffset(pSrc);
+ src_pitch = exaGetPixmapPitch(pSrc);
+ dst_offset = exaGetPixmapOffset(pDst);
+ dst_pitch = exaGetPixmapPitch(pDst);
+ if (pMask) {
+ mask_offset = exaGetPixmapOffset(pMask);
+ mask_pitch = exaGetPixmapPitch(pMask);
+ }
+ scale_units[0][0] = pSrc->drawable.width;
+ scale_units[0][1] = pSrc->drawable.height;
+ scale_units[2][0] = pDst->drawable.width;
+ scale_units[2][1] = pDst->drawable.height;
+
+ if (!pMask) {
+ is_transform[1] = FALSE;
+ scale_units[1][0] = -1;
+ scale_units[1][1] = -1;
+ } else {
+ scale_units[1][0] = pMask->drawable.width;
+ scale_units[1][1] = pMask->drawable.height;
+ }
+
+/* FIXME */
+ /* setup 3d pipeline state */
+
+ binding_table_entries = 2; /* default no mask */
+
+ /* Set up our layout of state in framebuffer. First the general state: */
+ next_offset = 0;
+ vs_offset = ALIGN(next_offset, 64);
+ next_offset = vs_offset + sizeof(*vs_state);
+
+ sf_offset = ALIGN(next_offset, 32);
+ next_offset = sf_offset + sizeof(*sf_state);
+
+ wm_offset = ALIGN(next_offset, 32);
+ next_offset = wm_offset + sizeof(*wm_state);
+
+ cc_offset = ALIGN(next_offset, 32);
+ next_offset = cc_offset + sizeof(*cc_state);
+
+// fixup sf_kernel_static, is sf_kernel needed? or not? why?
+// -> just keep current sf_kernel, which will send one setup urb entry to
+// PS kernel
+ sf_kernel_offset = ALIGN(next_offset, 64);
+ next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
+
+ //XXX: ps_kernel may be seperated, fix with offset
+ ps_kernel_offset = ALIGN(next_offset, 64);
+ if (pMask) {
+ if (pMaskPicture->componentAlpha)
+ next_offset = ps_kernel_offset + sizeof(ps_kernel_static_maskca);
+ else
+ next_offset = ps_kernel_offset + sizeof(ps_kernel_static_masknoca);
+ } else
+ next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
+
+ sip_kernel_offset = ALIGN(next_offset, 64);
+ next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
+
+ // needed?
+ cc_viewport_offset = ALIGN(next_offset, 32);
+ next_offset = cc_viewport_offset + sizeof(*cc_viewport);
+
+ // : fix for texture sampler
+ // XXX: -> use only one sampler
+ src_sampler_offset = ALIGN(next_offset, 32);
+ next_offset = src_sampler_offset + sizeof(*src_sampler_state);
+
+ /* Align VB to native size of elements, for safety */
+ vb_offset = ALIGN(next_offset, 8);
+ next_offset = vb_offset + vb_size;
+
+ /* And then the general state: */
+ //XXX: fix for texture map and target surface
+ dest_surf_offset = ALIGN(next_offset, 32);
+ next_offset = dest_surf_offset + sizeof(*dest_surf_state);
+
+ src_surf_offset = ALIGN(next_offset, 32);
+ next_offset = src_surf_offset + sizeof(*src_surf_state);
+
+ if (pMask) {
+ mask_surf_offset = ALIGN(next_offset, 32);
+ next_offset = mask_surf_offset + sizeof(*mask_surf_state);
+ binding_table_entries = 3;
+ }
+
+ binding_table_offset = ALIGN(next_offset, 32);
+ next_offset = binding_table_offset + (binding_table_entries * 4);
+
+ total_state_size = next_offset;
+
+ /*
+ * XXX: Use the extra space allocated at the end of the exa offscreen buffer?
+ */
+#define BRW_LINEAR_EXTRA (32*1024)
+
+ state_base_offset = (pI830->Offscreen.End -
+ BRW_LINEAR_EXTRA);
+
+ state_base_offset = ALIGN(state_base_offset, 64);
+ state_base = (char *)(pI830->FbBase + state_base_offset);
+ /* Set up our pointers to state structures in framebuffer. It would probably
+ * be a good idea to fill these structures out in system memory and then dump
+ * them there, instead.
+ */
+ vs_state = (void *)(state_base + vs_offset);
+ sf_state = (void *)(state_base + sf_offset);
+ wm_state = (void *)(state_base + wm_offset);
+ cc_state = (void *)(state_base + cc_offset);
+ sf_kernel = (void *)(state_base + sf_kernel_offset);
+ ps_kernel = (void *)(state_base + ps_kernel_offset);
+ sip_kernel = (void *)(state_base + sip_kernel_offset);
+
+ cc_viewport = (void *)(state_base + cc_viewport_offset);
+
+ dest_surf_state = (void *)(state_base + dest_surf_offset);
+ src_surf_state = (void *)(state_base + src_surf_offset);
+ if (pMask)
+ mask_surf_state = (void *)(state_base + mask_surf_offset);
+
+ src_sampler_state = (void *)(state_base + src_sampler_offset);
+ binding_table = (void *)(state_base + binding_table_offset);
+
+ vb = (void *)(state_base + vb_offset);
+
+ /* Set up a default static partitioning of the URB, which is supposed to
+ * allow anything we would want to do, at potentially lower performance.
+ */
+#define URB_CS_ENTRY_SIZE 0
+#define URB_CS_ENTRIES 0
+
+#define URB_VS_ENTRY_SIZE 1 // XXX: VUE row num? double check, 1 row is enough
+#define URB_VS_ENTRIES 8
+
+#define URB_GS_ENTRY_SIZE 0
+#define URB_GS_ENTRIES 0
+
+#define URB_CLIP_ENTRY_SIZE 0
+#define URB_CLIP_ENTRIES 0
+
+#define URB_SF_ENTRY_SIZE 4
+#define URB_SF_ENTRIES 8
+
+ urb_vs_start = 0;
+ urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
+ urb_gs_start = urb_vs_start + urb_vs_size;
+ urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
+ urb_clip_start = urb_gs_start + urb_gs_size;
+ urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
+ urb_sf_start = urb_clip_start + urb_clip_size;
+ urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
+ urb_cs_start = urb_sf_start + urb_sf_size;
+ urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
+
+ /* We'll be poking the state buffers that could be in use by the 3d hardware
+ * here, but we should have synced the 3D engine already in I830PutImage.
+ */
+
+// needed?
+ memset (cc_viewport, 0, sizeof (*cc_viewport));
+ cc_viewport->min_depth = -1.e35;
+ cc_viewport->max_depth = 1.e35;
+
+ /* Color calculator state */
+ memset(cc_state, 0, sizeof(*cc_state));
+ cc_state->cc0.stencil_enable = 0; /* disable stencil */
+ cc_state->cc2.depth_test = 0; /* disable depth test */
+ cc_state->cc2.logicop_enable = 0; /* disable logic op */
+ cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
+ cc_state->cc3.blend_enable = 1; /* enable color blend */
+ cc_state->cc3.alpha_test = 0; /* disable alpha test */
+ // XXX:cc_viewport needed?
+ cc_state->cc4.cc_viewport_state_offset = (state_base_offset + cc_viewport_offset) >> 5;
+ cc_state->cc5.dither_enable = 0; /* disable dither */
+// cc_state->cc5.logicop_func = 0xc; /* COPY */
+// cc_state->cc5.statistics_enable = 1;
+// cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
+// cc_state->cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE;
+// cc_state->cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE;
+ cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
+ I965GetBlendCntl(op, pMask, pDstPicture->format,
+ &src_blend, &dst_blend);
+ cc_state->cc6.src_blend_factor = src_blend;
+ cc_state->cc6.dest_blend_factor = dst_blend;
+
+ /* Upload system kernel */
+ memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
+
+ /* Set up the state buffer for the destination surface */
+ memset(dest_surf_state, 0, sizeof(*dest_surf_state));
+ dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+ dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
+ // XXX: should compare with picture's cpp?...8 bit surf?
+ if (pDst->drawable.bitsPerPixel == 16) {
+ dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ } else {
+ dest_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ }
+ dest_surf_state->ss0.writedisable_alpha = 0;
+ dest_surf_state->ss0.writedisable_red = 0;
+ dest_surf_state->ss0.writedisable_green = 0;
+ dest_surf_state->ss0.writedisable_blue = 0;
+ dest_surf_state->ss0.color_blend = 1;
+ dest_surf_state->ss0.vert_line_stride = 0;
+ dest_surf_state->ss0.vert_line_stride_ofs = 0;
+ dest_surf_state->ss0.mipmap_layout_mode = 0;
+ dest_surf_state->ss0.render_cache_read_mode = 0;
+
+ // XXX: fix to picture address & size
+ dest_surf_state->ss1.base_addr = dst_offset;
+ dest_surf_state->ss2.height = pDst->drawable.height - 1;
+ dest_surf_state->ss2.width = pDst->drawable.width - 1;
+ dest_surf_state->ss2.mip_count = 0;
+ dest_surf_state->ss2.render_target_rotation = 0;
+ dest_surf_state->ss3.pitch = dst_pitch - 1;
+
+ /* Set up the source surface state buffer */
+ memset(src_surf_state, 0, sizeof(*src_surf_state));
+ src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+ if (pSrc->drawable.bitsPerPixel == 8)
+ src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_A8_UNORM; //XXX?
+ else if (pSrc->drawable.bitsPerPixel == 16)
+ src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ else
+ src_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ src_surf_state->ss0.writedisable_alpha = 0;
+ src_surf_state->ss0.writedisable_red = 0;
+ src_surf_state->ss0.writedisable_green = 0;
+ src_surf_state->ss0.writedisable_blue = 0;
+ src_surf_state->ss0.color_blend = 1;
+ src_surf_state->ss0.vert_line_stride = 0;
+ src_surf_state->ss0.vert_line_stride_ofs = 0;
+ src_surf_state->ss0.mipmap_layout_mode = 0;
+ src_surf_state->ss0.render_cache_read_mode = 0;
+
+ src_surf_state->ss1.base_addr = src_offset;
+ src_surf_state->ss2.width = pSrc->drawable.width - 1;
+ src_surf_state->ss2.height = pSrc->drawable.height - 1;
+ src_surf_state->ss2.mip_count = 0;
+ src_surf_state->ss2.render_target_rotation = 0;
+ src_surf_state->ss3.pitch = src_pitch - 1;
+
+ /* setup mask surface */
+ if (pMask) {
+ memset(mask_surf_state, 0, sizeof(*mask_surf_state));
+ mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+ if (pMask->drawable.bitsPerPixel == 8)
+ mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_A8_UNORM; //XXX?
+ else if (pMask->drawable.bitsPerPixel == 16)
+ mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+ else
+ mask_surf_state->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+ mask_surf_state->ss0.writedisable_alpha = 0;
+ mask_surf_state->ss0.writedisable_red = 0;
+ mask_surf_state->ss0.writedisable_green = 0;
+ mask_surf_state->ss0.writedisable_blue = 0;
+ mask_surf_state->ss0.color_blend = 1;
+ mask_surf_state->ss0.vert_line_stride = 0;
+ mask_surf_state->ss0.vert_line_stride_ofs = 0;
+ mask_surf_state->ss0.mipmap_layout_mode = 0;
+ mask_surf_state->ss0.render_cache_read_mode = 0;
+
+ mask_surf_state->ss1.base_addr = mask_offset;
+ mask_surf_state->ss2.width = pMask->drawable.width - 1;
+ mask_surf_state->ss2.height = pMask->drawable.height - 1;
+ mask_surf_state->ss2.mip_count = 0;
+ mask_surf_state->ss2.render_target_rotation = 0;
+ mask_surf_state->ss3.pitch = mask_pitch - 1;
+ }
+
+ /* Set up a binding table for our surfaces. Only the PS will use it */
+ binding_table[0] = state_base_offset + dest_surf_offset;
+ binding_table[1] = state_base_offset + src_surf_offset;
+ if (pMask)
+ binding_table[2] = state_base_offset + mask_surf_offset;
+
+ /* PS kernel use this sampler */
+ memset(src_sampler_state, 0, sizeof(*src_sampler_state));
+ src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+ src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+
+ /* XXX: fix for repeat */
+ if (!pSrcPicture->repeat) {
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; // XXX: clamp_border and set border to 0?
+ src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+ } else {
+ src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; // XXX: clamp_border and set border to 0?
+ src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+ }
+
+ /* Set up the vertex shader to be disabled (passthrough) */
+ memset(vs_state, 0, sizeof(*vs_state));
+ // XXX: vs URB should be defined for VF vertex URB store. done already?
+ vs_state->vs6.vs_enable = 0;
+
+ // XXX: sf_kernel? keep it as now
+ /* Set up the SF kernel to do coord interp: for each attribute,
+ * calculate dA/dx and dA/dy. Hand these interpolation coefficients
+ * back to SF which then hands pixels off to WM.
+ */
+ memcpy (sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
+
+ memset(sf_state, 0, sizeof(*sf_state));
+ sf_state->thread0.kernel_start_pointer =
+ (state_base_offset + sf_kernel_offset) >> 6;
+ sf_state->thread0.grf_reg_count = ((SF_KERNEL_NUM_GRF & ~15) / 16);
+ sf_state->sf1.single_program_flow = 1;
+ sf_state->sf1.binding_table_entry_count = 0;
+ sf_state->sf1.thread_priority = 0;
+ sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
+ sf_state->sf1.illegal_op_exception_enable = 1;
+ sf_state->sf1.mask_stack_exception_enable = 1;
+ sf_state->sf1.sw_exception_enable = 1;
+ sf_state->thread2.per_thread_scratch_space = 0;
+ sf_state->thread2.scratch_space_base_pointer = 0; /* not used in our kernel */
+ sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+ sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+ sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+ sf_state->thread3.urb_entry_read_offset = 0;
+ sf_state->thread3.dispatch_grf_start_reg = 3;
+ sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+ sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+ sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+ sf_state->thread4.stats_enable = 1;
+ sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+ sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+ sf_state->sf6.scissor = 0;
+ sf_state->sf7.trifan_pv = 2;
+ sf_state->sf6.dest_org_vbias = 0x8;
+ sf_state->sf6.dest_org_hbias = 0x8;
+
+ /* Set up the PS kernel (dispatched by WM)
+ */
+
+ // XXX: replace to texture blend shader, and different cases
+ if (pMask) {
+ if (pMaskPicture->componentAlpha)
+ memcpy (ps_kernel, ps_kernel_static_maskca, sizeof (ps_kernel_static_maskca));
+ else
+ memcpy (ps_kernel, ps_kernel_static_masknoca, sizeof (ps_kernel_static_masknoca));
+ } else
+ memcpy (ps_kernel, ps_kernel_static_nomask, sizeof (ps_kernel_static_nomask));
+
+ memset (wm_state, 0, sizeof (*wm_state));
+ wm_state->thread0.kernel_start_pointer =
+ (state_base_offset + ps_kernel_offset) >> 6;
+ wm_state->thread0.grf_reg_count = ((PS_KERNEL_NUM_GRF & ~15) / 16);
+ wm_state->thread1.single_program_flow = 1;
+ if (!pMask)
+ wm_state->thread1.binding_table_entry_count = 2; /* tex and fb */
+ else
+ wm_state->thread1.binding_table_entry_count = 3; /* tex and fb */
+
+ wm_state->thread2.scratch_space_base_pointer = 0;
+ wm_state->thread2.per_thread_scratch_space = 0;
+ // XXX: urb allocation
+ wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+ // wm kernel use urb from 3, see wm_program in compiler module
+ wm_state->thread3.urb_entry_read_length = 1; /* one per pair of attrib */
+ wm_state->thread3.const_urb_entry_read_length = 0;
+ wm_state->thread3.const_urb_entry_read_offset = 0;
+ wm_state->thread3.urb_entry_read_offset = 0;
+
+ wm_state->wm4.stats_enable = 1;
+ wm_state->wm4.sampler_state_pointer = (state_base_offset + src_sampler_offset) >> 5;
+ wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
+ wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
+ wm_state->wm5.thread_dispatch_enable = 1;
+ //just use 16-pixel dispatch, don't need to change kernel start point
+ wm_state->wm5.enable_16_pix = 1;
+ wm_state->wm5.enable_8_pix = 0;
+ wm_state->wm5.early_depth_test = 1;
+
+ /* Begin the long sequence of commands needed to set up the 3D
+ * rendering pipe
+ */
+ {
+
+ BEGIN_LP_RING((pMask?48:46));
+ // MI_FLUSH prior to PIPELINE_SELECT
+ OUT_RING(MI_FLUSH |
+ MI_STATE_INSTRUCTION_CACHE_FLUSH |
+ BRW_MI_GLOBAL_SNAPSHOT_RESET);
+
+ /* Match Mesa driver setup */
+ OUT_RING(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+ /* Zero out the two base address registers so all offsets are absolute */
+ // XXX: zero out...
+ OUT_RING(BRW_STATE_BASE_ADDRESS | 4);
+ // why this's not state_base_offset? -> because later we'll always add on
+ // state_base_offset to offset params. see SIP
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* Surface state base address */
+ OUT_RING(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* general state max addr, disabled */
+ OUT_RING(0x10000000 | BASE_ADDRESS_MODIFY); /* media object state max addr, disabled */
+
+ /* Set system instruction pointer */
+ OUT_RING(BRW_STATE_SIP | 0);
+ OUT_RING(state_base_offset + sip_kernel_offset); /* system instruction pointer */
+
+ /* Pipe control */
+ // XXX: pipe control write cache before enabling color blending
+ // vol2, geometry pipeline 1.8.4
+ OUT_RING(BRW_PIPE_CONTROL |
+ BRW_PIPE_CONTROL_NOWRITE |
+ BRW_PIPE_CONTROL_IS_FLUSH |
+ 2);
+ OUT_RING(0); /* Destination address */
+ OUT_RING(0); /* Immediate data low DW */
+ OUT_RING(0); /* Immediate data high DW */
+
+ /* Binding table pointers */
+ OUT_RING(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4);
+ OUT_RING(0); /* vs */
+ OUT_RING(0); /* gs */
+ OUT_RING(0); /* clip */
+ OUT_RING(0); /* sf */
+ /* Only the PS uses the binding table */
+ OUT_RING(state_base_offset + binding_table_offset); /* ps */
+
+ //ring 20
+
+ /* The drawing rectangle clipping is always on. Set it to values that
+ * shouldn't do any clipping.
+ */
+ //XXX: fix for picture size
+ OUT_RING(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */
+ OUT_RING(0x00000000); /* ymin, xmin */
+ OUT_RING((pScrn->virtualX - 1) |
+ (pScrn->virtualY - 1) << 16); /* ymax, xmax */
+ OUT_RING(0x00000000); /* yorigin, xorigin */
+
+ /* skip the depth buffer */
+ /* skip the polygon stipple */
+ /* skip the polygon stipple offset */
+ /* skip the line stipple */
+
+ /* Set the pointers to the 3d pipeline state */
+ OUT_RING(BRW_3DSTATE_PIPELINED_POINTERS | 5);
+ OUT_RING(state_base_offset + vs_offset); /* 32 byte aligned */
+ OUT_RING(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */
+ OUT_RING(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
+ OUT_RING(state_base_offset + sf_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + wm_offset); /* 32 byte aligned */
+ OUT_RING(state_base_offset + cc_offset); /* 64 byte aligned */
+
+ /* URB fence */
+ // XXX: CS for const URB needed? if not, cs_fence should be equal to sf_fence
+ OUT_RING(BRW_URB_FENCE |
+ UF0_CS_REALLOC |
+ UF0_SF_REALLOC |
+ UF0_CLIP_REALLOC |
+ UF0_GS_REALLOC |
+ UF0_VS_REALLOC |
+ 1);
+ OUT_RING(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
+ ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
+ ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
+ OUT_RING(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
+ ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
+
+ /* Constant buffer state */
+ // XXX: needed? seems no usage, as we don't have CONSTANT_BUFFER definition
+ OUT_RING(BRW_CS_URB_STATE | 0);
+ OUT_RING(((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
+ (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
+
+ /* Set up the pointer to our vertex buffer */
+ // XXX: double check
+ // int vb_pitch = 4 * 4; // XXX: pitch should include mask's coords? possible
+ // all three coords on one row?
+ int nelem = pMask ? 3: 2;
+ OUT_RING(BRW_3DSTATE_VERTEX_BUFFERS | 3); //should be 4n-1 -> 3
+ OUT_RING((0 << VB0_BUFFER_INDEX_SHIFT) |
+ VB0_VERTEXDATA |
+ ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
+ // pitch includes all vertex data, 4bytes for 1 dword, each
+ // element has 2 coords (x,y)(s0,t0), nelem to reflect possible
+ // mask
+ OUT_RING(state_base_offset + vb_offset);
+ OUT_RING(4 * nelem); // max index, prim has 4 coords
+ OUT_RING(0); // ignore for VERTEXDATA, but still there
+
+ /* Set up our vertex elements, sourced from the single vertex buffer. */
+ OUT_RING(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1)); // XXX: 2n-1, (x,y) + (s0,t0) +
+ // possible (s1, t1)
+ /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (0 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (8 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+
+ if (pMask) {
+ OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+ VE0_VALID |
+ (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+ (16 << VE0_OFFSET_SHIFT));
+ OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+ (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
+ (8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
+ //XXX: is this has alignment issue? and thread access problem?
+
+ }
+
+ ADVANCE_LP_RING();
+
+ }
+
+ {
+ /* cc states */
+ /* dest buffer */
+ /* urbs */
+ /* binding tables */
+ /* clipping */
+ /* color blend (color calculator, dataport shared function)
+ COLOR_CALC_STATE/SURFACE_STATE(rendertarget's color blend enable
+ bit)
+ Errata!!!: brw-a/b, rendertarget 'local' color blending always
+ enabled! only control by global enable bit.
+ surface format for blend, "Surface format table in Sampling Engine"
+ XXX: if surface format not support, we should fallback.
+ */
+ /*
+ render target should be defined in SURFACE_STATE
+ o render target SURFTYPE_BUFFER? 2D? Keith has 2D set.
+ o depth buffer SURFTYPE_NULL?
+ color blend:
+ o Errata!!: mush issue PIPE_CONTROL with Write Cache Flush
+ enable set, before transite to read-write color buffer.
+ o disable pre/post-blending clamping
+ o enable color buffer blending enable in COLOR_CALC_STATE,(vol2, 3d rasterization 3.8)
+ enable color blending enable in SURFACE_STATE.(shared,
+ sampling engine 1.7)
+ disable depth test
+ o (we don't use BLENDFACT_SRC_ALPHA_SATURATE, so don't care
+ the Errata for independent alpha blending, just use color
+ blending factor for all) disable independent alpha blending
+ in COLOR_CALC_STATE
+ o set src/dst blend factor in COLOR_CALC_STATE
+
+ */
+ }
+
+ /* shader program
+ o use sampler shared function for texture data
+ o submit result to dataport for later color blending */
+ {
+ /* PS program:
+ o declare sampler and variables??
+ o 'send' cmd to Sampling Engine to load 'src' picture
+ o if (!pMask) then 'send' 'src' texture value to DataPort
+ target render cache
+ o else
+ - 'send' cmd to SE to load 'mask' picture
+ - if no alpha, force to 1 (move 1 to W element of mask)
+ - if (mask->componentAlpha) then mul 'src' & 'mask', 'send'
+ output to DataPort render cache
+ - else mul 'src' & 'mask''s W element(alpha), 'send' output
+ to Dataport render cache
+ */
+
+ }
+
+#ifdef I830DEBUG
+ ErrorF("try to sync to show any errors...");
+ I830Sync(pScrn);
+#endif
+ return TRUE;
+}
+
+void
+I965EXAComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
+ int dstX, int dstY, int w, int h)
+{
+ int srcXend, srcYend, maskXend, maskYend;
+ PictVector v;
+ int pMask = 1, i = 0;
+
+ DPRINTF(PFX, "Composite: srcX %d, srcY %d\n\t maskX %d, maskY %d\n\t"
+ "dstX %d, dstY %d\n\twidth %d, height %d\n\t"
+ "src_scale_x %f, src_scale_y %f, "
+ "mask_scale_x %f, mask_scale_y %f\n",
+ srcX, srcY, maskX, maskY, dstX, dstY, w, h,
+ scale_units[0][0], scale_units[0][1],
+ scale_units[1][0], scale_units[1][1]);
+
+ if (scale_units[1][0] == -1 || scale_units[1][1] == -1) {
+ pMask = 0;
+ }
+
+ srcXend = srcX + w;
+ srcYend = srcY + h;
+ maskXend = maskX + w;
+ maskYend = maskY + h;
+ if (is_transform[0]) {
+ v.vector[0] = IntToxFixed(srcX);
+ v.vector[1] = IntToxFixed(srcY);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(transform[0], &v);
+ srcX = xFixedToInt(v.vector[0]);
+ srcY = xFixedToInt(v.vector[1]);
+ v.vector[0] = IntToxFixed(srcXend);
+ v.vector[1] = IntToxFixed(srcYend);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(transform[0], &v);
+ srcXend = xFixedToInt(v.vector[0]);
+ srcYend = xFixedToInt(v.vector[1]);
+ }
+ if (is_transform[1]) {
+ v.vector[0] = IntToxFixed(maskX);
+ v.vector[1] = IntToxFixed(maskY);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(transform[1], &v);
+ maskX = xFixedToInt(v.vector[0]);
+ maskY = xFixedToInt(v.vector[1]);
+ v.vector[0] = IntToxFixed(maskXend);
+ v.vector[1] = IntToxFixed(maskYend);
+ v.vector[2] = xFixed1;
+ PictureTransformPoint(transform[1], &v);
+ maskXend = xFixedToInt(v.vector[0]);
+ maskYend = xFixedToInt(v.vector[1]);
+ }
+
+ DPRINTF(PFX, "After transform: srcX %d, srcY %d,srcXend %d, srcYend %d\n\t"
+ "maskX %d, maskY %d, maskXend %d, maskYend %d\n\t"
+ "dstX %d, dstY %d\n", srcX, srcY, srcXend, srcYend,
+ maskX, maskY, maskXend, maskYend, dstX, dstY);
+
+
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)dstY;
+ vb[i++] = (float)srcX / scale_units[0][0];
+ vb[i++] = (float)srcY / scale_units[0][1];
+ if (pMask) {
+ vb[i++] = (float)maskX / scale_units[1][0];
+ vb[i++] = (float)maskY / scale_units[1][1];
+ }
+
+ vb[i++] = (float)dstX;
+ vb[i++] = (float)(dstY + h);
+ vb[i++] = (float)srcX / scale_units[0][0];
+ vb[i++] = (float)srcYend / scale_units[0][1];
+ if (pMask) {
+ vb[i++] = (float)maskX / scale_units[1][0];
+ vb[i++] = (float)maskYend / scale_units[1][1];
+ }
+
+ vb[i++] = (float)(dstX + w);
+ vb[i++] = (float)(dstY + h);
+ vb[i++] = (float)srcXend / scale_units[0][0];
+ vb[i++] = (float)srcYend / scale_units[0][1];
+ if (pMask) {
+ vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskYend / scale_units[1][1];
+ }
+
+ vb[i++] = (float)(dstX + w);
+ vb[i++] = (float)dstY;
+ vb[i++] = (float)srcXend / scale_units[0][0];
+ vb[i++] = (float)srcY / scale_units[0][1];
+ if (pMask) {
+ vb[i++] = (float)maskXend / scale_units[1][0];
+ vb[i++] = (float)maskY / scale_units[1][1];
+ }
+
+ {
+ BEGIN_LP_RING(6);
+ OUT_RING(BRW_3DPRIMITIVE |
+ BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL |
+ (_3DPRIM_TRIFAN << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) |
+ (0 << 9) | /* CTG - indirect vertex count */
+ 4);
+ OUT_RING(4); /* vertex count per instance */
+ OUT_RING(0); /* start vertex offset */
+ OUT_RING(1); /* single instance */
+ OUT_RING(0); /* start instance location */
+ OUT_RING(0); /* index buffer offset, ignored */
+ ADVANCE_LP_RING();
+ }
+#ifdef I830DEBUG
+ ErrorF("sync after 3dprimitive");
+ I830Sync(pScrn);
+#endif
+}
More information about the xorg-commit
mailing list