From cf33bcf0b246f75094673beaa989034fa27c4b9f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 23 Jan 2007 16:07:12 +1100 Subject: nouveau: allow for card-specific shader infos to be kept NV30/40 fragprog: build FP_CONTROL per-shader, still some hardcoded bits for this reg.. It looks like it has to do with the number of temps used, but needs more looking at. NV40 vtxprog : build VP_IN_REG/VP_OUT_REG during shader compile --- src/mesa/drivers/dri/nouveau/nouveau_shader.h | 74 ++++++++------- src/mesa/drivers/dri/nouveau/nouveau_shader_0.c | 1 + src/mesa/drivers/dri/nouveau/nouveau_shader_2.c | 4 - src/mesa/drivers/dri/nouveau/nv10_swtcl.c | 3 - src/mesa/drivers/dri/nouveau/nv30_fragprog.c | 4 + src/mesa/drivers/dri/nouveau/nv30_state.c | 3 - src/mesa/drivers/dri/nouveau/nv30_vertprog.c | 4 + src/mesa/drivers/dri/nouveau/nv40_vertprog.c | 118 +++++++++++++++++++----- 8 files changed, 149 insertions(+), 62 deletions(-) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index a3ab027142..8b4be9dfe7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -24,6 +24,16 @@ typedef struct _nvs_fragment_header { } type; } nvsFragmentHeader; +typedef union { + struct { + uint32_t fp_control; + } NV30FP; + struct { + uint32_t vp_in_reg; + uint32_t vp_out_reg; + } NV30VP; +} nvsCardPriv; + typedef struct _nouveauShader { union { struct gl_vertex_program vp; @@ -41,10 +51,10 @@ typedef struct _nouveauShader { unsigned int program_start_id; unsigned int program_current; struct gl_buffer_object *program_buffer; - unsigned int inputs_read; - unsigned int outputs_written; int inst_count; + nvsCardPriv card_priv; + struct { GLfloat *source_val; /* NULL if invariant */ float val[4]; @@ -113,35 +123,35 @@ typedef enum { } nvsSwzComp; typedef enum { - NVS_FR_POSITION, - NVS_FR_WEIGHT, - NVS_FR_NORMAL, - NVS_FR_COL0, - NVS_FR_COL1, - NVS_FR_BFC0, - NVS_FR_BFC1, - NVS_FR_FOGCOORD, - NVS_FR_POINTSZ, - NVS_FR_TEXCOORD0, - NVS_FR_TEXCOORD1, - NVS_FR_TEXCOORD2, - NVS_FR_TEXCOORD3, - NVS_FR_TEXCOORD4, - NVS_FR_TEXCOORD5, - NVS_FR_TEXCOORD6, - NVS_FR_TEXCOORD7, - NVS_FR_FRAGDATA0, - NVS_FR_FRAGDATA1, - NVS_FR_FRAGDATA2, - NVS_FR_FRAGDATA3, - NVS_FR_CLIP0, - NVS_FR_CLIP1, - NVS_FR_CLIP2, - NVS_FR_CLIP3, - NVS_FR_CLIP4, - NVS_FR_CLIP5, - NVS_FR_CLIP6, - NVS_FR_FACING, + NVS_FR_POSITION = 0, + NVS_FR_WEIGHT = 1, + NVS_FR_NORMAL = 2, + NVS_FR_COL0 = 3, + NVS_FR_COL1 = 4, + NVS_FR_FOGCOORD = 5, + NVS_FR_TEXCOORD0 = 8, + NVS_FR_TEXCOORD1 = 9, + NVS_FR_TEXCOORD2 = 10, + NVS_FR_TEXCOORD3 = 11, + NVS_FR_TEXCOORD4 = 12, + NVS_FR_TEXCOORD5 = 13, + NVS_FR_TEXCOORD6 = 14, + NVS_FR_TEXCOORD7 = 15, + NVS_FR_BFC0 = 16, + NVS_FR_BFC1 = 17, + NVS_FR_POINTSZ = 18, + NVS_FR_FRAGDATA0 = 19, + NVS_FR_FRAGDATA1 = 20, + NVS_FR_FRAGDATA2 = 21, + NVS_FR_FRAGDATA3 = 22, + NVS_FR_CLIP0 = 23, + NVS_FR_CLIP1 = 24, + NVS_FR_CLIP2 = 25, + NVS_FR_CLIP3 = 26, + NVS_FR_CLIP4 = 27, + NVS_FR_CLIP5 = 28, + NVS_FR_CLIP6 = 29, + NVS_FR_FACING = 30, NVS_FR_UNKNOWN } nvsFixedReg; @@ -279,6 +289,8 @@ extern nvsSwzComp NV20VP_TX_SWIZZLE[4]; #define SCAP_SRC_ABS (1<<0) struct _nvsFunc { + nvsCardPriv *card_priv; + unsigned int MaxInst; unsigned int MaxAttrib; unsigned int MaxTemp; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c index 5845d4f63a..3e542ea9c0 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_0.c @@ -805,6 +805,7 @@ nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs) fprintf(stderr, "Unknown program type %d", prog->Target); return GL_FALSE; } + nvs->func->card_priv = &nvs->card_priv; rec = CALLOC_STRUCT(pass0_rec); if (rec) { diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index 6fb36c1daf..c106fd2d94 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -118,8 +118,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, if (op->srcpos[i] != -1) { reg = pass2_mangle_reg(nvs, inst, inst->src[i]); - if (reg.file == NVS_FILE_ATTRIB) - nvs->inputs_read |= (1 << reg.index); shader->SetSource(shader, ®, op->srcpos[i]); if (reg.file == NVS_FILE_CONST && @@ -136,8 +134,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, } reg = pass2_mangle_reg(nvs, inst, inst->dest); - if (reg.file == NVS_FILE_RESULT) - nvs->outputs_written |= (1 << reg.index); shader->SetResult(shader, ®, inst->mask, slot); } diff --git a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c index f916912c7e..c9bfac8c4a 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c @@ -476,9 +476,6 @@ static void nv10ChooseVertexState( GLcontext *ctx ) * is up to date */ nvsUpdateShader(ctx, nmesa->passthrough_vp); - BEGIN_RING_CACHE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2); - OUT_RING_CACHE (0xff09); /*IN : POS, COL, TC0-7 */ - OUT_RING_CACHE (0x3fc001); /*OUT: COL, TC0-7, POS implied */ /* Update texenv shader / user fragprog */ nvsUpdateShader(ctx, (nouveauShader*)ctx->FragmentProgram._Current); diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c index cd7c955c9e..3c7501dd62 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -46,6 +46,8 @@ NV30FPUploadToHW(GLcontext *ctx, nouveauShader *nvs) */ BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1); OUT_RING (offset | 1); + BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); + OUT_RING (nvs->card_priv.NV30FP.fp_control | 0x03000000); } static void @@ -92,6 +94,8 @@ NV30FPSupportsOpcode(nvsFunc *shader, nvsOpcode op) static void NV30FPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) { + if (opcode == NV30_FP_OP_OPCODE_KIL) + shader->card_priv->NV30FP.fp_control |= (1<<7); shader->inst[0] &= ~NV30_FP_OP_OPCODE_MASK; shader->inst[0] |= (opcode << NV30_FP_OP_OPCODE_SHIFT); } diff --git a/src/mesa/drivers/dri/nouveau/nv30_state.c b/src/mesa/drivers/dri/nouveau/nv30_state.c index 9bb4f14909..9b0d7425c8 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_state.c +++ b/src/mesa/drivers/dri/nouveau/nv30_state.c @@ -862,9 +862,6 @@ static GLboolean nv40InitCard(nouveauContextPtr nmesa) BEGIN_RING_SIZE(NvSub3D, 0x1e94, 1); OUT_RING(0x00000001); - BEGIN_RING_SIZE(NvSub3D, 0x1d60, 1); - OUT_RING(0x03008000); - return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c index 0b7678f55d..afcacf36c2 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_vertprog.c @@ -29,6 +29,10 @@ NV30VPUploadToHW(GLcontext *ctx, nouveauShader *nvs) } BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1); OUT_RING(0); + + BEGIN_RING_SIZE(NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2); + OUT_RING(nvs->card_priv.NV30VP.vp_in_reg); + OUT_RING(nvs->card_priv.NV30VP.vp_out_reg); } static void diff --git a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c index 1ba1cfd155..6cb7e1cfd6 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c @@ -66,6 +66,96 @@ NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, shader->inst[0] |= (swizzle[NVS_SWZ_W] << NV40_VP_INST_COND_SWZ_W_SHIFT); } +/* these just exist here until nouveau_reg.h has them. */ +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL0 (1<<0) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL1 (1<<1) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC0 (1<<2) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC1 (1<<3) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_FOGC (1<<4) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_PSZ (1<<5) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP0 (1<<6) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP1 (1<<7) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP2 (1<<8) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP3 (1<<9) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP4 (1<<10) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP5 (1<<11) +#define NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_TEX0 (1<<14) + +static unsigned int +NV40VPTranslateResultReg(nvsFunc *shader, nvsFixedReg result, + unsigned int *mask_ret) +{ + unsigned int *out_reg = &shader->card_priv->NV30VP.vp_out_reg; + + *mask_ret = 0xf; + + switch (result) { + case NVS_FR_POSITION: + /* out_reg POS implied */ + return NV40_VP_INST_DEST_POS; + case NVS_FR_COL0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL0; + return NV40_VP_INST_DEST_COL0; + case NVS_FR_COL1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_COL1; + return NV40_VP_INST_DEST_COL1; + case NVS_FR_BFC0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC0; + return NV40_VP_INST_DEST_BFC0; + case NVS_FR_BFC1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_BFC1; + return NV40_VP_INST_DEST_BFC1; + case NVS_FR_FOGCOORD: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_FOGC; + *mask_ret = 0x8; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP0: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP0; + *mask_ret = 0x4; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP1: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP1; + *mask_ret = 0x2; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_CLIP2: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP2; + *mask_ret = 0x1; + return NV40_VP_INST_DEST_FOGC; + case NVS_FR_POINTSZ: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_PSZ; + *mask_ret = 0x8; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP3: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP3; + *mask_ret = 0x4; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP4: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP4; + *mask_ret = 0x2; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_CLIP5: + (*out_reg) |= NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_CLP5; + *mask_ret = 0x1; + return NV40_VP_INST_DEST_PSZ; + case NVS_FR_TEXCOORD0: + case NVS_FR_TEXCOORD1: + case NVS_FR_TEXCOORD2: + case NVS_FR_TEXCOORD3: + case NVS_FR_TEXCOORD4: + case NVS_FR_TEXCOORD5: + case NVS_FR_TEXCOORD6: + case NVS_FR_TEXCOORD7: + { + int unit = result - NVS_FR_TEXCOORD0; + (*out_reg) |= (NV30_TCL_PRIMITIVE_3D_VP_OUT_REG_TEX0 << unit); + return NV40_VP_INST_DEST_TC(unit); + } + default: + WARN_ONCE("unknown vp output %d\n", result); + return NV40_VP_INST_DEST_POS; + } +} + static void NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, int slot) @@ -78,29 +168,14 @@ NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, if (mask & SMASK_W) hwmask |= (1 << 0); if (dest->file == NVS_FILE_RESULT) { + unsigned int valid_mask; int hwidx; - switch (dest->index) { - case NVS_FR_POSITION : hwidx = NV40_VP_INST_DEST_POS; break; - case NVS_FR_COL0 : hwidx = NV40_VP_INST_DEST_COL0; break; - case NVS_FR_COL1 : hwidx = NV40_VP_INST_DEST_COL1; break; - case NVS_FR_BFC0 : hwidx = NV40_VP_INST_DEST_BFC0; break; - case NVS_FR_BFC1 : hwidx = NV40_VP_INST_DEST_BFC1; break; - case NVS_FR_FOGCOORD : hwidx = NV40_VP_INST_DEST_FOGC; break; - case NVS_FR_POINTSZ : hwidx = NV40_VP_INST_DEST_PSZ; break; - case NVS_FR_TEXCOORD0: hwidx = NV40_VP_INST_DEST_TC(0); break; - case NVS_FR_TEXCOORD1: hwidx = NV40_VP_INST_DEST_TC(1); break; - case NVS_FR_TEXCOORD2: hwidx = NV40_VP_INST_DEST_TC(2); break; - case NVS_FR_TEXCOORD3: hwidx = NV40_VP_INST_DEST_TC(3); break; - case NVS_FR_TEXCOORD4: hwidx = NV40_VP_INST_DEST_TC(4); break; - case NVS_FR_TEXCOORD5: hwidx = NV40_VP_INST_DEST_TC(5); break; - case NVS_FR_TEXCOORD6: hwidx = NV40_VP_INST_DEST_TC(6); break; - case NVS_FR_TEXCOORD7: hwidx = NV40_VP_INST_DEST_TC(7); break; - default: - WARN_ONCE("unknown vtxprog output %d\n", dest->index); - hwidx = 0; - break; - } + hwidx = NV40VPTranslateResultReg(shader, dest->index, &valid_mask); + if (hwmask & ~valid_mask) + WARN_ONCE("writing invalid components of result reg\n"); + hwmask &= valid_mask; + shader->inst[3] &= ~NV40_VP_INST_DEST_MASK; shader->inst[3] |= (hwidx << NV40_VP_INST_DEST_SHIFT); @@ -174,6 +249,7 @@ NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) shader->inst[1] &= ~NV40_VP_INST_INPUT_SRC_MASK; shader->inst[1] |= (src->index << NV40_VP_INST_INPUT_SRC_SHIFT); + shader->card_priv->NV30VP.vp_in_reg |= (1 << src->index); if (src->indexed) { shader->inst[0] |= NV40_VP_INST_INDEX_INPUT; if (src->addr_reg) -- cgit v1.2.3