diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler')
37 files changed, 4627 insertions, 2537 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index d973844192..d83888d90a 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -8,10 +8,17 @@ LIBNAME = r300compiler C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ - radeon_nqssadce.c \ radeon_program.c \ + radeon_program_print.c \ + radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ + radeon_pair_translate.c \ + radeon_pair_schedule.c \ + radeon_pair_regalloc.c \ + radeon_dataflow.c \ + radeon_dataflow_deadcode.c \ + radeon_dataflow_swizzles.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript new file mode 100755 index 0000000000..46075a8aee --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -0,0 +1,37 @@ +Import('*') + +env = env.Clone() +env.Append(CPPPATH = '#/include') +env.Append(CPPPATH = '#/src/mesa') + +# temporary fix +env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '') + +r300compiler = env.ConvenienceLibrary( + target = 'r300compiler', + source = [ + 'radeon_code.c', + 'radeon_compiler.c', + 'radeon_program.c', + 'radeon_program_print.c', + 'radeon_opcodes.c', + 'radeon_program_alu.c', + 'radeon_program_pair.c', + 'radeon_pair_translate.c', + 'radeon_pair_schedule.c', + 'radeon_pair_regalloc.c', + 'radeon_dataflow.c', + 'radeon_dataflow_deadcode.c', + 'radeon_dataflow_swizzles.c', + 'r3xx_fragprog.c', + 'r300_fragprog.c', + 'r300_fragprog_swizzle.c', + 'r300_fragprog_emit.c', + 'r500_fragprog.c', + 'r500_fragprog_emit.c', + 'r3xx_vertprog.c', + 'r3xx_vertprog_dump.c', + 'memory_pool.c', + ]) + +Return('r300compiler') diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 6c9fba4914..aa69b0fc72 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -27,17 +27,17 @@ #include "r300_fragprog.h" -#include "shader/prog_parameter.h" +#include <stdio.h> #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -47,7 +47,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - extract operand swizzles * - introduce a temporary register when write masks are needed */ -GLboolean r300_transform_TEX( +int r300_transform_TEX( struct radeon_compiler * c, struct rc_instruction* inst, void* data) @@ -55,77 +55,77 @@ GLboolean r300_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; - - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -133,9 +133,9 @@ GLboolean r300_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } @@ -143,52 +143,52 @@ GLboolean r300_transform_TEX( * instead of [0..Width]x[0..Height]. * Add a scaling instruction. */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) { struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev); - inst_mul->I.Opcode = OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mul->I.DstReg.Index = rc_find_free_temporary(c); - inst_mul->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR; - inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit); + inst_mul->U.I.Opcode = RC_OPCODE_MUL; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit); - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index; } /* Cannot write texture to output registers or with masks */ - if (inst->I.Opcode != OPCODE_KIL && - (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && + (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } - return GL_TRUE; + return 1; } /* just some random things... */ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h index 0ac46dbd9c..418df36c93 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "shader/program.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" #include "radeon_program.h" @@ -44,6 +41,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); +extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index c7227bbd15..375838d98e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -56,7 +56,6 @@ struct r300_emit_state { }; #define PROG_CODE \ - struct r300_emit_state * emit = (struct r300_emit_state*)data; \ struct r300_fragment_program_compiler *c = emit->compiler; \ struct r300_fragment_program_code *code = &c->code->code.r300 @@ -69,64 +68,76 @@ struct r300_emit_state { /** * Mark a temporary register as used. */ -static void use_temporary(struct r300_fragment_program_code *code, GLuint index) +static void use_temporary(struct r300_fragment_program_code *code, unsigned int index) { if (index > code->pixsize) code->pixsize = index; } +static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src) +{ + if (src.File == RC_FILE_CONSTANT) { + return src.Index | (1 << 5); + } else if (src.File == RC_FILE_TEMPORARY) { + use_temporary(code, src.Index); + return src.Index; + } + + return 0; +} + -static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTC_CMP; - case OPCODE_DP3: return R300_ALU_OUTC_DP3; - case OPCODE_DP4: return R300_ALU_OUTC_DP4; - case OPCODE_FRC: return R300_ALU_OUTC_FRC; + case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3; + case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4; + case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTC_MAD; - case OPCODE_MAX: return R300_ALU_OUTC_MAX; - case OPCODE_MIN: return R300_ALU_OUTC_MIN; - case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; + case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN; + case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA; } } -static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode) +static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R300_ALU_OUTA_CMP; - case OPCODE_DP3: return R300_ALU_OUTA_DP4; - case OPCODE_DP4: return R300_ALU_OUTA_DP4; - case OPCODE_EX2: return R300_ALU_OUTA_EX2; - case OPCODE_FRC: return R300_ALU_OUTA_FRC; - case OPCODE_LG2: return R300_ALU_OUTA_LG2; + case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP; + case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4; + case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4; + case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2; + case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC; + case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2; default: error("translate_rgb_opcode(%i): Unknown opcode", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R300_ALU_OUTA_MAD; - case OPCODE_MAX: return R300_ALU_OUTA_MAX; - case OPCODE_MIN: return R300_ALU_OUTA_MIN; - case OPCODE_RCP: return R300_ALU_OUTA_RCP; - case OPCODE_RSQ: return R300_ALU_OUTA_RSQ; + case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD; + case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX; + case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN; + case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP; + case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ; } } /** * Emit one paired ALU instruction. */ -static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) +static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst) { PROG_CODE; if (code->alu.length >= R300_PFS_MAX_ALU_INST) { error("Too many ALU instructions"); - return GL_FALSE; + return 0; } int ip = code->alu.length++; @@ -136,17 +147,13 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { - GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); - if (!inst->RGB.Src[j].Constant) - use_temporary(code, inst->RGB.Src[j].Index); + unsigned int src = use_source(code, inst->RGB.Src[j]); code->alu.inst[ip].rgb_addr |= src << (6*j); - src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); - if (!inst->Alpha.Src[j].Constant) - use_temporary(code, inst->Alpha.Src[j].Index); + src = use_source(code, inst->Alpha.Src[j]); code->alu.inst[ip].alpha_addr |= src << (6*j); - GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); + unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; code->alu.inst[ip].rgb_inst |= arg << (7*j); @@ -186,27 +193,27 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; emit->node_flags |= R300_W_OUT; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } - return GL_TRUE; + return 1; } /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_emit_state * emit) +static int finish_node(struct r300_emit_state * emit) { struct r300_fragment_program_compiler * c = emit->compiler; struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ - struct radeon_pair_instruction inst; - _mesa_bzero(&inst, sizeof(inst)); + struct rc_pair_instruction inst; + memset(&inst, 0, sizeof(inst)); if (!emit_alu(emit, &inst)) - return GL_FALSE; + return 0; } unsigned alu_offset = emit->node_first_alu; @@ -217,7 +224,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) if (code->tex.length == emit->node_first_tex) { if (emit->current_node > 0) { error("Node %i has no TEX instructions", emit->current_node); - return GL_FALSE; + return 0; } tex_end = 0; @@ -240,7 +247,7 @@ static GLboolean finish_node(struct r300_emit_state * emit) (tex_end << R300_TEX_SIZE_SHIFT) | emit->node_flags; - return GL_TRUE; + return 1; } @@ -248,79 +255,72 @@ static GLboolean finish_node(struct r300_emit_state * emit) * Begin a block of texture instructions. * Create the necessary indirection. */ -static GLboolean begin_tex(void* data) +static int begin_tex(struct r300_emit_state * emit) { PROG_CODE; if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) { - return GL_TRUE; + return 1; } if (emit->current_node == 3) { error("Too many texture indirections"); - return GL_FALSE; + return 0; } if (!finish_node(emit)) - return GL_FALSE; + return 0; emit->current_node++; emit->node_first_tex = code->tex.length; emit->node_first_alu = code->alu.length; emit->node_flags = 0; - return GL_TRUE; + return 1; } -static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst) +static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) { PROG_CODE; if (code->tex.length >= R300_PFS_MAX_TEX_INST) { error("Too many TEX instructions"); - return GL_FALSE; + return 0; } - GLuint unit = inst->TexSrcUnit; - GLuint dest = inst->DestIndex; - GLuint opcode; + unsigned int unit = inst->U.I.TexSrcUnit; + unsigned int dest = inst->U.I.DstReg.Index; + unsigned int opcode; - switch(inst->Opcode) { - case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; - case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; - case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; - case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; + switch(inst->U.I.Opcode) { + case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break; + case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break; + case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break; + case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break; default: - error("Unknown texture opcode %i", inst->Opcode); - return GL_FALSE; + error("Unknown texture opcode %i", inst->U.I.Opcode); + return 0; } - if (inst->Opcode == RADEON_OPCODE_KIL) { + if (inst->U.I.Opcode == RC_OPCODE_KIL) { unit = 0; dest = 0; } else { use_temporary(code, dest); } - use_temporary(code, inst->SrcIndex); + use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | + (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); - return GL_TRUE; + return 1; } -static const struct radeon_pair_handler pair_handler = { - .EmitPaired = &emit_alu, - .EmitTex = &emit_tex, - .BeginTexBlock = &begin_tex, - .MaxHwTemps = R300_PFS_NUM_TEMP_REGS -}; - /** * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. @@ -333,9 +333,26 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; - _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); + memset(code, 0, sizeof(struct r300_fragment_program_code)); + + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + begin_tex(&emit); + continue; + } + + emit_tex(&emit, inst); + } else { + emit_alu(&emit, &inst->U.P); + } + } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS) + rc_error(&compiler->Base, "Too many hardware temporaries used.\n"); - radeonPairProgram(compiler, &pair_handler, &emit); if (compiler->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 1b14cc3888..cfa48a59e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -33,16 +33,17 @@ #include "r300_fragprog_swizzle.h" +#include <stdio.h> + #include "../r300_reg.h" -#include "radeon_nqssadce.h" #include "radeon_compiler.h" -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO)) +#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) struct swizzle_data { - GLuint hash; /**< swizzle value this matches */ - GLuint base; /**< base value for hw swizzle */ - GLuint stride; /**< difference in base between arg0/1/2 */ + unsigned int hash; /**< swizzle value this matches */ + unsigned int base; /**< base value for hw swizzle */ + unsigned int stride; /**< difference in base between arg0/1/2 */ }; static const struct swizzle_data native_swizzles[] = { @@ -65,15 +66,15 @@ static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swi * Find a native RGB swizzle that matches the given swizzle. * Returns 0 if none found. */ -static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) +static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) { int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data* sd = &native_swizzles[i]; for(comp = 0; comp < 3; ++comp) { - GLuint swz = GET_SWZ(swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != GET_SWZ(sd->hash, comp)) break; @@ -90,71 +91,72 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle) * Check whether the given instruction supports the swizzle and negate * combinations in the given source register. */ -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { if (reg.Abs) - reg.Negate = NEGATE_NONE; + reg.Negate = RC_MASK_NONE; - if (opcode == OPCODE_KIL || - opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP) { + if (opcode == RC_OPCODE_KIL || + opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP) { int j; if (reg.Abs || reg.Negate) - return GL_FALSE; + return 0; for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(reg.Swizzle, j); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(reg.Swizzle, j); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz != j) - return GL_FALSE; + return 0; } - return GL_TRUE; + return 1; } - GLuint relevant = 0; + unsigned int relevant = 0; int j; for(j = 0; j < 3; ++j) - if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL) + if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED) relevant |= 1 << j; if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; if (!lookup_native_swizzle(reg.Swizzle)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } -/** - * Generate MOV dst, src using only native swizzles. - */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) { if (src.Abs) - src.Negate = NEGATE_NONE; + src.Negate = RC_MASK_NONE; + + split->NumPhases = 0; - while(dst.WriteMask) { + while(mask) { const struct swizzle_data *best_swizzle = 0; - GLuint best_matchcount = 0; - GLuint best_matchmask = 0; + unsigned int best_matchcount = 0; + unsigned int best_matchmask = 0; int i, comp; for(i = 0; i < num_native_swizzles; ++i) { const struct swizzle_data *sd = &native_swizzles[i]; - GLuint matchcount = 0; - GLuint matchmask = 0; + unsigned int matchcount = 0; + unsigned int matchmask = 0; for(comp = 0; comp < 3; ++comp) { - if (!GET_BIT(dst.WriteMask, comp)) + if (!GET_BIT(mask, comp)) continue; - GLuint swz = GET_SWZ(src.Swizzle, comp); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, comp); + if (swz == RC_SWIZZLE_UNUSED) continue; if (swz == GET_SWZ(sd->hash, comp)) { /* check if the negate bit of current component @@ -170,34 +172,35 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, best_swizzle = sd; best_matchcount = matchcount; best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & WRITEMASK_XYZ)) + if (matchmask == (mask & RC_MASK_XYZ)) break; } } - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W); - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE; - /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; - dst.WriteMask &= ~inst->I.DstReg.WriteMask; + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; } } +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + /** * Translate an RGB (XYZ) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) { const struct swizzle_data* sd = lookup_native_swizzle(swizzle); if (!sd) { - _mesa_printf("Not a native swizzle: %08x\n", swizzle); + fprintf(stderr, "Not a native swizzle: %08x\n", swizzle); return 0; } @@ -209,15 +212,15 @@ GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle) * Translate an Alpha (W) swizzle into the hardware code for the given * instruction source. */ -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle) +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { if (swizzle < 3) return swizzle + 3*src; switch(swizzle) { - case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; - case SWIZZLE_ONE: return R300_ALU_ARGA_ONE; - case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; + case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; + case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; + case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; default: return R300_ALU_ARGA_ONE; } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 231bf4eef5..118476af13 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -28,15 +28,11 @@ #ifndef __R300_FRAGPROG_SWIZZLE_H_ #define __R300_FRAGPROG_SWIZZLE_H_ -#include "main/glheader.h" -#include "shader/prog_instruction.h" +#include "radeon_swizzle.h" -struct nqssadce_state; +extern struct rc_swizzle_caps r300_swizzle_caps; -GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); - -GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle); -GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle); +unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); +unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); #endif /* __R300_FRAGPROG_SWIZZLE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 76c3a7ecfd..5581f25352 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -22,22 +22,21 @@ #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" +#include <stdio.h> -#include "radeon_nqssadce.h" +#include "radeon_dataflow.h" #include "radeon_program_alu.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" -static void nqssadce_init(struct nqssadce_state* s) +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_fragment_program_compiler * c = s->UserData; - s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW; - s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W; + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor, RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); } static void rewrite_depth_out(struct r300_fragment_program_compiler * c) @@ -45,35 +44,35 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c) struct rc_instruction *rci; for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction * inst = &rci->I; + struct rc_sub_instruction * inst = &rci->U.I; - if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth) + if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth) continue; - if (inst->DstReg.WriteMask & WRITEMASK_Z) { - inst->DstReg.WriteMask = WRITEMASK_W; + if (inst->DstReg.WriteMask & RC_MASK_Z) { + inst->DstReg.WriteMask = RC_MASK_W; } else { inst->DstReg.WriteMask = 0; continue; } switch (inst->Opcode) { - case OPCODE_FRC: - case OPCODE_MOV: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); + case RC_OPCODE_FRC: + case RC_OPCODE_MOV: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); + case RC_OPCODE_ADD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MUL: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); break; - case OPCODE_CMP: - case OPCODE_MAD: - inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); - inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); - inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); + case RC_OPCODE_CMP: + case RC_OPCODE_MAD: + inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]); + inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]); + inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]); break; default: // Scalar instructions needn't be reswizzled @@ -89,11 +88,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) if (c->is_r500) { struct radeon_program_transformation transformations[] = { { &r500_transform_TEX, c }, + { &r500_transform_IF, 0 }, { &radeonTransformALU, 0 }, { &radeonTransformDeriv, 0 }, { &radeonTransformTrigScale, 0 } }; - radeonLocalTransform(&c->Base, 4, transformations); + radeonLocalTransform(&c->Base, 5, transformations); + + c->Base.SwizzleCaps = &r500_swizzle_caps; } else { struct radeon_program_transformation transformations[] = { { &r300_transform_TEX, c }, @@ -101,32 +103,66 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { &radeonTransformTrigSimple, 0 } }; radeonLocalTransform(&c->Base, 3, transformations); + + c->Base.SwizzleCaps = &r300_swizzle_caps; } if (c->Base.Debug) { - _mesa_printf("Fragment Program: After native rewrite:\n"); + fprintf(stderr, "Fragment Program: After native rewrite:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } - if (c->is_r500) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); + rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Fragment Program: After deadcode:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + rc_dataflow_swizzles(&c->Base); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after dataflow passes:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + rc_pair_translate(c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after pair translate:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); } + rc_pair_schedule(c); + if (c->Base.Error) + return; + + if (c->Base.Debug) { + fprintf(stderr, "Compiler: after pair scheduling:\n"); + rc_print_program(&c->Base.Program); + fflush(stderr); + } + + if (c->is_r500) + rc_pair_regalloc(c, 128); + else + rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS); + + if (c->Base.Error) + return; + if (c->Base.Debug) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); + fprintf(stderr, "Compiler: after pair register allocation:\n"); rc_print_program(&c->Base.Program); fflush(stderr); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index dad27fc98e..1b2cb8dde7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -22,13 +22,13 @@ #include "radeon_compiler.h" +#include <stdio.h> + #include "../r300_reg.h" -#include "radeon_nqssadce.h" -#include "radeon_program.h" +#include "radeon_dataflow.h" #include "radeon_program_alu.h" - -#include "shader/prog_print.h" +#include "radeon_swizzle.h" /* @@ -42,104 +42,83 @@ t_swizzle(y), \ t_swizzle(y), \ t_src_class(vpi->SrcReg[x].File), \ - NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4)) + RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4)) -static unsigned long t_dst_mask(GLuint mask) +static unsigned long t_dst_mask(unsigned int mask) { - /* WRITEMASK_* is equivalent to VSF_FLAG_* */ - return mask & WRITEMASK_XYZW; + /* RC_MASK_* is equivalent to VSF_FLAG_* */ + return mask & RC_MASK_XYZW; } -static unsigned long t_dst_class(gl_register_file file) +static unsigned long t_dst_class(rc_register_file file) { - switch (file) { - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_TEMPORARY: return PVS_DST_REG_TEMPORARY; - case PROGRAM_OUTPUT: + case RC_FILE_OUTPUT: return PVS_DST_REG_OUT; - case PROGRAM_ADDRESS: + case RC_FILE_ADDRESS: return PVS_DST_REG_A0; - /* - case PROGRAM_INPUT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_STATE_VAR: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } static unsigned long t_dst_index(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT) + if (dst->File == RC_FILE_OUTPUT) return vp->outputs[dst->Index]; return dst->Index; } -static unsigned long t_src_class(gl_register_file file) +static unsigned long t_src_class(rc_register_file file) { switch (file) { - case PROGRAM_BUILTIN: - case PROGRAM_TEMPORARY: + default: + fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file); + /* fall-through */ + case RC_FILE_NONE: + case RC_FILE_TEMPORARY: return PVS_SRC_REG_TEMPORARY; - case PROGRAM_INPUT: + case RC_FILE_INPUT: return PVS_SRC_REG_INPUT; - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - case PROGRAM_STATE_VAR: + case RC_FILE_CONSTANT: return PVS_SRC_REG_CONSTANT; - /* - case PROGRAM_OUTPUT: - case PROGRAM_WRITE_ONLY: - case PROGRAM_ADDRESS: - */ - default: - fprintf(stderr, "problem in %s", __FUNCTION__); - _mesa_exit(-1); - return -1; } } -static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b) +static int t_src_conflict(struct rc_src_register a, struct rc_src_register b) { unsigned long aclass = t_src_class(a.File); unsigned long bclass = t_src_class(b.File); if (aclass != bclass) - return GL_FALSE; + return 0; if (aclass == PVS_SRC_REG_TEMPORARY) - return GL_FALSE; + return 0; if (a.RelAddr || b.RelAddr) - return GL_TRUE; + return 1; if (a.Index != b.Index) - return GL_TRUE; + return 1; - return GL_FALSE; + return 0; } -static INLINE unsigned long t_swizzle(GLubyte swizzle) +static inline unsigned long t_swizzle(unsigned int swizzle) { - /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ + /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */ return swizzle; } static unsigned long t_src_index(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - if (src->File == PROGRAM_INPUT) { + if (src->File == RC_FILE_INPUT) { assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { @@ -155,9 +134,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp, /* these two functions should probably be merged... */ static unsigned long t_src(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -170,9 +149,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp, } static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, - struct prog_src_register *src) + struct rc_src_register *src) { - /* src->Negate uses the NEGATE_ flags from program_instruction.h, + /* src->Negate uses the RC_MASK_ flags from program_instruction.h, * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. */ return PVS_SRC_OPERAND(t_src_index(vp, src), @@ -181,79 +160,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp, t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_src_class(src->File), - src->Negate ? NEGATE_XYZW : NEGATE_NONE) | + src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (src->RelAddr << 4); } -static GLboolean valid_dst(struct r300_vertex_program_code *vp, - struct prog_dst_register *dst) +static int valid_dst(struct r300_vertex_program_code *vp, + struct rc_dst_register *dst) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { + if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) { + return 0; + } else if (dst->File == RC_FILE_ADDRESS) { assert(dst->Index == 0); } - return GL_TRUE; + return 1; } static void ei_vector1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_vector2(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src(vp, &vpi->SrcReg[0]); inst[2] = t_src(vp, &vpi->SrcReg[1]); - inst[3] = __CONST(1, SWIZZLE_ZERO); + inst[3] = __CONST(1, RC_SWIZZLE_ZERO); } static void ei_math1(struct r300_vertex_program_code *vp, - GLuint hw_opcode, - struct prog_instruction *vpi, - GLuint * inst) + unsigned int hw_opcode, + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(hw_opcode, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); + inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } static void ei_lit(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -263,27 +242,27 @@ static void ei_lit(struct r300_vertex_program_code *vp, PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X PVS_SRC_SELECT_FORCE_0, // Z t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W t_src_class(vpi->SrcReg[0].File), - vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) | + vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) | (vpi->SrcReg[0].RelAddr << 4); } static void ei_mad(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { /* Remarks about hardware limitations of MAD * (please preserve this comment, as this information is _NOT_ @@ -311,22 +290,22 @@ static void ei_mad(struct r300_vertex_program_code *vp, * according to AMD docs, this should improve performance by one clock * as a nice side bonus. */ - if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY && - vpi->SrcReg[1].File == PROGRAM_TEMPORARY && - vpi->SrcReg[2].File == PROGRAM_TEMPORARY && + if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY && + vpi->SrcReg[1].File == RC_FILE_TEMPORARY && + vpi->SrcReg[2].File == RC_FILE_TEMPORARY && vpi->SrcReg[0].Index != vpi->SrcReg[1].Index && vpi->SrcReg[0].Index != vpi->SrcReg[2].Index && vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) { inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD, - GL_FALSE, - GL_TRUE, + 0, + 1, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); } else { inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD, - GL_FALSE, - GL_FALSE, + 0, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); @@ -337,17 +316,17 @@ static void ei_mad(struct r300_vertex_program_code *vp, } static void ei_pow(struct r300_vertex_program_code *vp, - struct prog_instruction *vpi, - GLuint * inst) + struct rc_sub_instruction *vpi, + unsigned int * inst) { inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF, - GL_TRUE, - GL_FALSE, + 1, + 0, t_dst_index(vp, &vpi->DstReg), t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File)); inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]); - inst[2] = __CONST(0, SWIZZLE_ZERO); + inst[2] = __CONST(0, RC_SWIZZLE_ZERO); inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } @@ -362,8 +341,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi compiler->SetHwInputOutput(compiler); for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) { - struct prog_instruction *vpi = &rci->I; - GLuint *inst = compiler->code->body.d + compiler->code->length; + struct rc_sub_instruction *vpi = &rci->U.I; + unsigned int *inst = compiler->code->body.d + compiler->code->length; /* Skip instructions writing to non-existing destination */ if (!valid_dst(compiler->code, &vpi->DstReg)) @@ -375,26 +354,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } switch (vpi->Opcode) { - case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; - case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; - case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; - case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; - case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; - case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; - case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; - case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; - case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; - case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; - case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; - case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; - case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; - case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; - case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; - case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; - case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; - case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break; + case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; + case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; + case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; + case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; + case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; + case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break; + case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break; + case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break; + case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break; + case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break; + case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break; + case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break; + case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break; + case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; + case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; default: rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode); return; @@ -408,38 +387,37 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi } struct temporary_allocation { - GLuint Allocated:1; - GLuint HwTemp:15; + unsigned int Allocated:1; + unsigned int HwTemp:15; struct rc_instruction * LastRead; }; static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; - GLuint num_orig_temps = 0; - GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS]; + unsigned int num_orig_temps = 0; + char hwtemps[VSF_MAX_FRAGMENT_TEMPS]; struct temporary_allocation * ta; - GLuint i, j; + unsigned int i, j; compiler->code->num_temporaries = 0; memset(hwtemps, 0, sizeof(hwtemps)); /* Pass 1: Count original temporaries and allocate structures */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - if (inst->I.SrcReg[i].Index >= num_orig_temps) - num_orig_temps = inst->I.SrcReg[i].Index + 1; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + if (inst->U.I.SrcReg[i].Index >= num_orig_temps) + num_orig_temps = inst->U.I.SrcReg[i].Index + 1; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - if (inst->I.DstReg.Index >= num_orig_temps) - num_orig_temps = inst->I.DstReg.Index + 1; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + if (inst->U.I.DstReg.Index >= num_orig_temps) + num_orig_temps = inst->U.I.DstReg.Index + 1; } } } @@ -450,32 +428,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) - ta[inst->I.SrcReg[i].Index].LastRead = inst; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) + ta[inst->U.I.SrcReg[i].Index].LastRead = inst; } } /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); - GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - for (i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.SrcReg[i].Index; - inst->I.SrcReg[i].Index = ta[orig].HwTemp; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = GL_FALSE; + hwtemps[ta[orig].HwTemp] = 0; } } - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY) { - GLuint orig = inst->I.DstReg.Index; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.DstReg.Index; if (!ta[orig].Allocated) { for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) { @@ -485,16 +462,16 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c if (j >= VSF_MAX_FRAGMENT_TEMPS) { fprintf(stderr, "Out of hw temporaries\n"); } else { - ta[orig].Allocated = GL_TRUE; + ta[orig].Allocated = 1; ta[orig].HwTemp = j; - hwtemps[j] = GL_TRUE; + hwtemps[j] = 1; if (j >= compiler->code->num_temporaries) compiler->code->num_temporaries = j + 1; } } - inst->I.DstReg.Index = ta[orig].HwTemp; + inst->U.I.DstReg.Index = ta[orig].HwTemp; } } } @@ -505,45 +482,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c * Vertex engine cannot read two inputs or two constants at the same time. * Introduce intermediate MOVs to temporary registers to account for this. */ -static GLboolean transform_source_conflicts( +static int transform_source_conflicts( struct radeon_compiler *c, struct rc_instruction* inst, void* unused) { - GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (num_operands == 3) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2]) - || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) { + if (opcode->NumSrcRegs == 3) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]) + || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[2]; - - reset_srcreg(&inst->I.SrcReg[2]); - inst->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[2].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; + + reset_srcreg(&inst->U.I.SrcReg[2]); + inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[2].Index = tmpreg; } } - if (num_operands >= 2) { - if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) { + if (opcode->NumSrcRegs >= 2) { + if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) { int tmpreg = rc_find_free_temporary(c); struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = tmpreg; - inst_mov->I.SrcReg[0] = inst->I.SrcReg[1]; - - reset_srcreg(&inst->I.SrcReg[1]); - inst->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[1].Index = tmpreg; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = tmpreg; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; + + reset_srcreg(&inst->U.I.SrcReg[1]); + inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[1].Index = tmpreg; } } - return GL_TRUE; + return 1; } static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) @@ -554,44 +531,52 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) if ((compiler->RequiredOutputs & (1 << i)) && !(compiler->Base.Program.OutputsWritten & (1 << i))) { struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = i; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = i; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - inst->I.SrcReg[0].File = PROGRAM_CONSTANT; - inst->I.SrcReg[0].Index = 0; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT; + inst->U.I.SrcReg[0].Index = 0; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; compiler->Base.Program.OutputsWritten |= 1 << i; } } } -static void nqssadceInit(struct nqssadce_state* s) +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_vertex_program_compiler * compiler = s->UserData; + struct r300_vertex_program_compiler * c = userdata; int i; - for(i = 0; i < VERT_RESULT_MAX; ++i) { - if (compiler->RequiredOutputs & (1 << i)) - s->Outputs[i].Sourced = WRITEMASK_XYZW; + for(i = 0; i < 32; ++i) { + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); } } -static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg) +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { (void) opcode; (void) reg; - return GL_TRUE; + return 1; } +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + addArtificialOutputs(compiler); { @@ -624,22 +609,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) fflush(stderr); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after deadcode:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); + } - /* We need this step for reusing temporary registers */ - allocate_temporary_registers(compiler); + rc_dataflow_swizzles(&compiler->Base); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + allocate_temporary_registers(compiler); + + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after dataflow:\n"); + rc_print_program(&compiler->Base.Program); + fflush(stderr); } translate_vertex_program(compiler); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 980ef3eaea..66f9b0529f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -146,7 +146,7 @@ static void r300_vs_op_dump(uint32_t op) static void r300_vs_src_dump(uint32_t src) { fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n", - (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3], + (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3], src & (1 << 25) ? "-" : " ", r300_vs_swiz_debug[(src >> 13) & 0x7], src & (1 << 26) ? "-" : " ", diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 7e2faed690..d87acecdab 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -27,15 +27,17 @@ #include "r500_fragprog.h" +#include <stdio.h> + #include "../r300_reg.h" -static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu) +static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu) { - struct prog_src_register reg = { 0, }; + struct rc_src_register reg = { 0, }; - reg.File = PROGRAM_STATE_VAR; + reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu); - reg.Swizzle = SWIZZLE_WWWW; + reg.Swizzle = RC_SWIZZLE_WWWW; return reg; } @@ -44,7 +46,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t * - implement texture compare (shadow extensions) * - extract non-native source / destination operands */ -GLboolean r500_transform_TEX( +int r500_transform_TEX( struct radeon_compiler * c, struct rc_instruction * inst, void* data) @@ -52,77 +54,77 @@ GLboolean r500_transform_TEX( struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)data; - if (inst->I.Opcode != OPCODE_TEX && - inst->I.Opcode != OPCODE_TXB && - inst->I.Opcode != OPCODE_TXP && - inst->I.Opcode != OPCODE_KIL) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_TEX && + inst->U.I.Opcode != RC_OPCODE_TXB && + inst->U.I.Opcode != RC_OPCODE_TXP && + inst->U.I.Opcode != RC_OPCODE_KIL) + return 0; /* ARB_shadow & EXT_shadow_funcs */ - if (inst->I.Opcode != OPCODE_KIL && - c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; + if (inst->U.I.Opcode != RC_OPCODE_KIL && + c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) { + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; - if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - inst->I.Opcode = OPCODE_MOV; + if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.Opcode = RC_OPCODE_MOV; - if (comparefunc == GL_ALWAYS) { - inst->I.SrcReg[0].File = PROGRAM_BUILTIN; - inst->I.SrcReg[0].Swizzle = SWIZZLE_1111; + if (comparefunc == RC_COMPARE_FUNC_ALWAYS) { + inst->U.I.SrcReg[0].File = RC_FILE_NONE; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; } else { - inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit); + inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit); } - return GL_TRUE; + return 1; } else { - GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func; - GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode; + rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; + unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode; struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst); struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp); struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad); int pass, fail; - inst_rcp->I.Opcode = OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = rc_find_free_temporary(c); - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; - inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; - - inst_cmp->I.DstReg = inst->I.DstReg; - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = rc_find_free_temporary(c); - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; - - inst_mad->I.Opcode = OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = rc_find_free_temporary(c); - inst_mad->I.SrcReg[0] = inst->I.SrcReg[0]; - inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ; - inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index; - inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; - inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; + inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + + inst_cmp->U.I.DstReg = inst->U.I.DstReg; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = rc_find_free_temporary(c); + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + + inst_mad->U.I.Opcode = RC_OPCODE_MAD; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ; + inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index; + inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index; if (depthmode == 0) /* GL_LUMINANCE */ - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ - inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW; + inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 * r >= tex <=> not (-tex+r < 0 */ - if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) - inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW; + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) + inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW; else - inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW; + inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; - inst_cmp->I.Opcode = OPCODE_CMP; + inst_cmp->U.I.Opcode = RC_OPCODE_CMP; /* DstReg has been filled out above */ - inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index; + inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index; - if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { pass = 1; fail = 2; } else { @@ -130,131 +132,161 @@ GLboolean r500_transform_TEX( fail = 1; } - inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN; - inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111; - inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit); + inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; + inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; + inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit); } } /* Cannot write texture to output registers */ - if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg = inst->I.DstReg; - inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg = inst->U.I.DstReg; + inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } /* Cannot read texture coordinate from constants file */ - if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) { + if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); - inst_mov->I.Opcode = OPCODE_MOV; - inst_mov->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mov->I.DstReg.Index = rc_find_free_temporary(c); - inst_mov->I.SrcReg[0] = inst->I.SrcReg[0]; + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c); + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - reset_srcreg(&inst->I.SrcReg[0]); - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index; + reset_srcreg(&inst->U.I.SrcReg[0]); + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index; } - return GL_TRUE; + return 1; +} + +/** + * Rewrite IF instructions to use the ALU result special register. + */ +int r500_transform_IF( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + if (inst->U.I.Opcode != RC_OPCODE_IF) + return 0; + + struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev); + inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.DstReg.WriteMask = 0; + inst_mov->U.I.WriteALUResult = RC_ALURESULT_W; + inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL; + inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle, + RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X); + + inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL; + inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[0].Negate = 0; + + return 1; } -GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { - GLuint relevant; + unsigned int relevant; int i; - if (opcode == OPCODE_TEX || - opcode == OPCODE_TXB || - opcode == OPCODE_TXP || - opcode == OPCODE_KIL) { + if (opcode == RC_OPCODE_TEX || + opcode == RC_OPCODE_TXB || + opcode == RC_OPCODE_TXP || + opcode == RC_OPCODE_KIL) { if (reg.Abs) - return GL_FALSE; + return 0; - if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) - return GL_FALSE; + if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) + return 0; if (reg.Negate) - reg.Negate ^= NEGATE_XYZW; + reg.Negate ^= RC_MASK_XYZW; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED) { reg.Negate &= ~(1 << i); continue; } if (swz >= 4) - return GL_FALSE; + return 0; } if (reg.Negate) - return GL_FALSE; + return 0; - return GL_TRUE; - } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) { + return 1; + } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) { /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles; * if it doesn't fit perfectly into a .xyzw case... */ - if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate) - return GL_TRUE; + if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate) + return 1; - return GL_FALSE; + return 0; } else { /* ALU instructions support almost everything */ if (reg.Abs) - return GL_TRUE; + return 1; relevant = 0; for(i = 0; i < 3; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO) + unsigned int swz = GET_SWZ(reg.Swizzle, i); + if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO) relevant |= 1 << i; } if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant)) - return GL_FALSE; + return 0; - return GL_TRUE; + return 1; } } /** - * Implement a MOV with a potentially non-native swizzle. + * Split source register access. * * The only thing we *cannot* do in an ALU instruction is per-component - * negation. Therefore, we split the MOV into two instructions when necessary. + * negation. */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) { - GLuint negatebase[2] = { 0, 0 }; + unsigned int negatebase[2] = { 0, 0 }; int i; for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(src.Swizzle, i); - if (swz == SWIZZLE_NIL) + unsigned int swz = GET_SWZ(src.Swizzle, i); + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) continue; negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } + split->NumPhases = 0; + for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask = negatebase[i]; - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; + split->Phase[split->NumPhases++] = negatebase[i]; } } +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; static char *toswiz(int swiz_val) { switch(swiz_val) { diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 9091f65cd2..0918cdf518 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -33,21 +33,21 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "shader/prog_parameter.h" -#include "shader/prog_instruction.h" - #include "radeon_compiler.h" -#include "radeon_nqssadce.h" +#include "radeon_swizzle.h" extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); -extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); +extern struct rc_swizzle_caps r500_swizzle_caps; -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); +extern int r500_transform_TEX( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data); -extern GLboolean r500_transform_TEX( +extern int r500_transform_IF( struct radeon_compiler * c, struct rc_instruction * inst, void* data); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index d694725c9b..b1b14394b6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -37,10 +37,6 @@ * * \author Corbin Simpson <MostAwesomeDude@gmail.com> * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * */ #include "r500_fragprog.h" @@ -51,7 +47,6 @@ #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ @@ -60,63 +55,80 @@ } while(0) -static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + unsigned int MaxBranchDepth; +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: error("translate_rgb_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; } } -static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: error("translate_alpha_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; } } -static GLuint fix_hw_swizzle(GLuint swz) +static unsigned int fix_hw_swizzle(unsigned int swz) { if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; + if (swz == RC_SWIZZLE_UNUSED) swz = 4; return swz; } -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) { - GLuint t = inst->RGB.Arg[arg].Source; + unsigned int t = inst->RGB.Arg[arg].Source; int comp; t |= inst->RGB.Arg[arg].Negate << 11; t |= inst->RGB.Arg[arg].Abs << 12; @@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) return t; } -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) { - GLuint t = inst->Alpha.Arg[i].Source; + unsigned int t = inst->Alpha.Arg[i].Source; t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; } -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) { if (index > code->max_temp_idx) code->max_temp_idx = index; } -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) { - if (!src.Constant) + if (src.File == RC_FILE_CONSTANT) { + return src.Index | 0x100; + } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index | src.Constant << 8; + return src.Index; + } + + return 0; } /** * Emit a paired ALU instruction. */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_alu: Too many instructions"); - return GL_FALSE; + return; } int ip = ++code->inst_end; @@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { code->inst[ip].inst0 = R500_INST_TYPE_OUT; - else + if (inst->WriteALUResult) { + error("%s: cannot write output and ALU result at the same time"); + return; + } + } else { code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - return GL_TRUE; + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } } -static GLuint translate_strq_swizzle(GLuint swizzle) +static unsigned int translate_strq_swizzle(unsigned int swizzle) { - GLuint swiz = 0; + unsigned int swiz = 0; int i; for (i = 0; i < 4; i++) swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; @@ -221,67 +265,193 @@ static GLuint translate_strq_swizzle(GLuint swizzle) /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_tex: Too many instructions"); - return GL_FALSE; + return 0; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->WriteMask << 11) + | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case RADEON_OPCODE_KIL: + case RC_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case RADEON_OPCODE_TEX: + case RC_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case RADEON_OPCODE_TXB: + case RC_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case RADEON_OPCODE_TXP: + case RC_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) - | (translate_strq_swizzle(inst->SrcSwizzle) << 8) - | R500_TEX_DST_ADDR(inst->DestIndex) + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - return GL_TRUE; + return 1; } -static const struct radeon_pair_handler pair_handler = { - .EmitPaired = emit_paired, - .EmitTex = emit_tex, - .MaxHwTemps = 128 -}; +static void grow_branches(struct emit_state * s) +{ + unsigned int newreserved = s->BranchesReserved * 2; + struct branch_info * newbranches; + + if (!newreserved) + newreserved = 4; + + newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); + memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); + + s->Branches = newbranches; + s->BranchesReserved = newreserved; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + if (s->Code->inst_end >= 511) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + unsigned int newip = ++s->Code->inst_end; + + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + if (inst->U.I.Opcode == RC_OPCODE_IF) { + if (s->CurrentBranchDepth >= 32) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + + if (s->CurrentBranchDepth >= s->BranchesReserved) + grow_branches(s); + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + + s->CurrentBranchDepth--; + } else { + rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode); + } +} void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { + struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; - _mesa_bzero(code, sizeof(*code)); + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); code->max_temp_idx = 1; code->inst_end = -1; - radeonPairProgram(compiler, &pair_handler, compiler); + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= 128) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + if (compiler->Base.Error) return; @@ -296,4 +466,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } + + if (s.MaxBranchDepth >= 4) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c index c7923004df..1a3d8bb641 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c @@ -25,11 +25,13 @@ * */ -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "radeon_code.h" +#include <stdlib.h> +#include <string.h> + +#include "radeon_program.h" + void rc_constants_init(struct rc_constant_list * c) { memset(c, 0, sizeof(*c)); @@ -143,7 +145,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) { for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) { if (c->Constants[index].u.Immediate[comp] == data) { - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return index; } } @@ -156,7 +158,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da if (free_index >= 0) { unsigned comp = c->Constants[free_index].Size++; c->Constants[free_index].u.Immediate[comp] = data; - *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp); + *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp); return free_index; } @@ -164,7 +166,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da constant.Type = RC_CONSTANT_IMMEDIATE; constant.Size = 1; constant.u.Immediate[0] = data; - *swizzle = SWIZZLE_XXXX; + *swizzle = RC_SWIZZLE_XXXX; return rc_constants_add(c, &constant); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 3e88554ba1..902b7cfa53 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -89,6 +89,23 @@ unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const floa unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle); /** + * Compare functions. + * + * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you + * the correct GL compare function. + */ +typedef enum { + RC_COMPARE_FUNC_NEVER = 0, + RC_COMPARE_FUNC_LESS, + RC_COMPARE_FUNC_EQUAL, + RC_COMPARE_FUNC_LEQUAL, + RC_COMPARE_FUNC_GREATER, + RC_COMPARE_FUNC_NOTEQUAL, + RC_COMPARE_FUNC_GEQUAL, + RC_COMPARE_FUNC_ALWAYS +} rc_compare_func; + +/** * Stores state that influences the compilation of a fragment program. */ struct r300_fragment_program_external_state { @@ -105,10 +122,12 @@ struct r300_fragment_program_external_state { /** * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] + * this field specifies the compare function. + * + * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0). * * Otherwise, this field is 0. + * \sa rc_compare_func */ unsigned texture_compare_func : 3; } unit[16]; @@ -163,6 +182,8 @@ struct r500_fragment_program_code { int inst_end; /* Number of instructions - 1; also, last instruction to be executed */ int max_temp_idx; + + uint32_t us_fc_ctrl; }; struct rX00_fragment_program_code { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index da950d5289..c0e7a7f7a0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -23,6 +23,8 @@ #include "radeon_compiler.h" #include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> #include "radeon_program.h" @@ -34,7 +36,7 @@ void rc_init(struct radeon_compiler * c) memory_pool_init(&c->Pool); c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; - c->Program.Instructions.I.Opcode = OPCODE_END; + c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; } void rc_destroy(struct radeon_compiler * c) @@ -60,7 +62,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) { va_list ap; - c->Error = GL_TRUE; + c->Error = 1; if (!c->ErrorMsg) { /* Only remember the first error */ @@ -91,28 +93,63 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) } } +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; + } + } +} + /** * Rewrite the program such that everything that source the given input * register will source new_input instead. */ -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input) +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) { struct rc_instruction * inst; c->Program.InputsRead &= ~(1 << input); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; - for(i = 0; i < numsrcs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) { - inst->I.SrcReg[i].File = new_input.File; - inst->I.SrcReg[i].Index = new_input.Index; - inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle); - if (!inst->I.SrcReg[i].Abs) { - inst->I.SrcReg[i].Negate ^= new_input.Negate; - inst->I.SrcReg[i].Abs = new_input.Abs; + for(i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { + inst->U.I.SrcReg[i].File = new_input.File; + inst->U.I.SrcReg[i].Index = new_input.Index; + inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); + if (!inst->U.I.SrcReg[i].Abs) { + inst->U.I.SrcReg[i].Negate ^= new_input.Negate; + inst->U.I.SrcReg[i].Abs = new_input.Abs; } c->Program.InputsRead |= 1 << new_input.Index; @@ -134,12 +171,12 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou c->Program.OutputsWritten &= ~(1 << output); for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.Index = new_output; - inst->I.DstReg.WriteMask &= writemask; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.Index = new_output; + inst->U.I.DstReg.WriteMask &= writemask; c->Program.OutputsWritten |= 1 << new_output; } @@ -157,33 +194,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou struct rc_instruction * inst; for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (numdsts) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) { - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = tempreg; + if (opcode->HasDstReg) { + if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = tempreg; } } } inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - inst->I.Opcode = OPCODE_MOV; - inst->I.DstReg.File = PROGRAM_OUTPUT; - inst->I.DstReg.Index = dup_output; + inst->U.I.Opcode = RC_OPCODE_MOV; + inst->U.I.DstReg.File = RC_FILE_OUTPUT; + inst->U.I.DstReg.Index = dup_output; - inst->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[0].Index = tempreg; - inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW; + inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[0].Index = tempreg; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; c->Program.OutputsWritten |= 1 << dup_output; } @@ -201,60 +238,60 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig /* perspective divide */ struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); - inst_rcp->I.Opcode = OPCODE_RCP; + inst_rcp->U.I.Opcode = RC_OPCODE_RCP; - inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY; - inst_rcp->I.DstReg.Index = tempregi; - inst_rcp->I.DstReg.WriteMask = WRITEMASK_W; + inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_rcp->U.I.DstReg.Index = tempregi; + inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; - inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT; - inst_rcp->I.SrcReg[0].Index = new_input; - inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW; + inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_rcp->U.I.SrcReg[0].Index = new_input; + inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp); - inst_mul->I.Opcode = OPCODE_MUL; + inst_mul->U.I.Opcode = RC_OPCODE_MUL; - inst_mul->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mul->I.DstReg.Index = tempregi; - inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tempregi; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mul->I.SrcReg[0].File = PROGRAM_INPUT; - inst_mul->I.SrcReg[0].Index = new_input; + inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; + inst_mul->U.I.SrcReg[0].Index = new_input; - inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY; - inst_mul->I.SrcReg[1].Index = tempregi; - inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tempregi; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; /* viewport transformation */ struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul); - inst_mad->I.Opcode = OPCODE_MAD; + inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->I.DstReg.File = PROGRAM_TEMPORARY; - inst_mad->I.DstReg.Index = tempregi; - inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ; + inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mad->U.I.DstReg.Index = tempregi; + inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; - inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY; - inst_mad->I.SrcReg[0].Index = tempregi; - inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_mad->U.I.SrcReg[0].Index = tempregi; + inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR; - inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); - inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); + inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); - inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR; - inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index; - inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; + inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index; + inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO); struct rc_instruction * inst; for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { - const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode); + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned i; - for(i = 0; i < numsrcs; i++) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT && - inst->I.SrcReg[i].Index == wpos) { - inst->I.SrcReg[i].File = PROGRAM_TEMPORARY; - inst->I.SrcReg[i].Index = tempregi; + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == wpos) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; } } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e63ab8840a..87a732cd90 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -23,35 +23,11 @@ #ifndef RADEON_COMPILER_H #define RADEON_COMPILER_H -#include "main/mtypes.h" -#include "shader/prog_instruction.h" - #include "memory_pool.h" #include "radeon_code.h" +#include "radeon_program.h" - -struct rc_instruction { - struct rc_instruction * Prev; - struct rc_instruction * Next; - struct prog_instruction I; -}; - -struct rc_program { - /** - * Instructions.Next points to the first instruction, - * Instructions.Prev points to the last instruction. - */ - struct rc_instruction Instructions; - - /* Long term, we should probably remove InputsRead & OutputsWritten, - * since updating dependent state can be fragile, and they aren't - * actually used very often. */ - uint32_t InputsRead; - uint32_t OutputsWritten; - uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ - - struct rc_constant_list Constants; -}; +struct rc_swizzle_caps; struct radeon_compiler { struct memory_pool Pool; @@ -59,6 +35,14 @@ struct radeon_compiler { unsigned Debug:1; unsigned Error:1; char * ErrorMsg; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ }; void rc_init(struct radeon_compiler * c); @@ -67,11 +51,26 @@ void rc_destroy(struct radeon_compiler * c); void rc_debug(struct radeon_compiler * c, const char * fmt, ...); void rc_error(struct radeon_compiler * c, const char * fmt, ...); -void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program); +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) void rc_calculate_inputs_outputs(struct radeon_compiler * c); -void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input); +void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask); void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input); @@ -97,7 +96,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c); struct r300_vertex_program_compiler { struct radeon_compiler Base; struct r300_vertex_program_code *code; - GLbitfield RequiredOutputs; + uint32_t RequiredOutputs; void * UserData; void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c new file mode 100644 index 0000000000..cce9166e64 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_program.h" + + +static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + + if (inst->SrcReg[src].File == RC_FILE_NONE) + return; + + for(unsigned int chan = 0; chan < 4; ++chan) + refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan); + + refmask &= RC_MASK_XYZW; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(refmask, chan)) { + cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan); + } + } + + if (refmask && inst->SrcReg[src].RelAddr) + cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + unsigned int refmasks[3] = { 0, 0, 0 }; + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + for(unsigned int chan = 0; chan < 3; ++chan) { + unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan); + if (swz < 4) + refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz; + } + } + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + if (inst->Alpha.Arg[arg].Swizzle < 4) + refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle; + } + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + for(unsigned int chan = 0; chan < 3; ++chan) { + if (GET_BIT(refmasks[src], chan)) + cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan); + } + } + + if (inst->Alpha.Src[src].Used) { + if (GET_BIT(refmasks[src], 3)) + cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3); + } + } +} + +/** + * Calls a callback function for all sourced register channels. + * + * This is conservative, i.e. channels may be called multiple times, + * and the writemask of the instruction is not taken into account. + */ +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + reads_normal(inst, cb, userdata); + } else { + reads_pair(inst, cb, userdata); + } +} + + + +static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_sub_instruction * inst = &fullinst->U.I; + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(inst->DstReg.WriteMask, chan)) + cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan); + } + } + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + + for(unsigned int chan = 0; chan < 3; ++chan) { + if (GET_BIT(inst->RGB.WriteMask, chan)) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan); + } + + if (inst->Alpha.WriteMask) + cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3); + + if (inst->WriteALUResult) + cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0); +} + +/** + * Calls a callback function for all written register channels. + * + * \warning Does not report output registers for paired instructions! + */ +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + writes_normal(inst, cb, userdata); + } else { + writes_pair(inst, cb, userdata); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h new file mode 100644 index 0000000000..5aa4cb64f3 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; + + +/** + * Help analyze the register accesses of instructions. + */ +/*@{*/ +typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan); +void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata); +/*@}*/ + + +/** + * Compiler passes based on dataflow analysis. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); +void rc_dataflow_swizzles(struct radeon_compiler * c); +/*@}*/ + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c new file mode 100644 index 0000000000..e0c66c4aeb --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -0,0 +1,295 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct updatemask_state { + unsigned char Output[RC_REGISTER_MAX_INDEX]; + unsigned char Temporary[RC_REGISTER_MAX_INDEX]; + unsigned char Address; + unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; +}; + +struct instruction_state { + unsigned char WriteMask:4; + unsigned char WriteALUResult:1; + unsigned char SrcReg[3]; +}; + +struct branchinfo { + unsigned int HaveElse:1; + + struct updatemask_state StoreEndif; + struct updatemask_state StoreElse; +}; + +struct deadcode_state { + struct radeon_compiler * C; + struct instruction_state * Instructions; + + struct updatemask_state R; + + struct branchinfo * BranchStack; + unsigned int BranchStackSize; + unsigned int BranchStackReserved; +}; + + +static void or_updatemasks( + struct updatemask_state * dst, + struct updatemask_state * a, + struct updatemask_state * b) +{ + for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { + dst->Output[i] = a->Output[i] | b->Output[i]; + dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; + } + + for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) + dst->Special[i] = a->Special[i] | b->Special[i]; + + dst->Address = a->Address | b->Address; +} + +static void push_branch(struct deadcode_state * s) +{ + if (s->BranchStackSize >= s->BranchStackReserved) { + unsigned int new_reserve = 2 * s->BranchStackReserved; + struct branchinfo * new_stack; + + if (!new_reserve) + new_reserve = 4; + + new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo)); + memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo)); + + s->BranchStack = new_stack; + s->BranchStackReserved = new_reserve; + } + + struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++]; + branch->HaveElse = 0; + memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); +} + +static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); + return 0; + } + + if (file == RC_FILE_OUTPUT) + return &s->R.Output[index]; + else + return &s->R.Temporary[index]; + } else if (file == RC_FILE_ADDRESS) { + return &s->R.Address; + } else if (file == RC_FILE_SPECIAL) { + if (index >= RC_NUM_SPECIAL_REGISTERS) { + rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->R.Special[index]; + } + + return 0; +} + +static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned char * pused = get_used_ptr(s, file, index); + if (pused) + *pused |= mask; +} + +static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + struct instruction_state * insts = &s->Instructions[inst->IP]; + unsigned int usedmask = 0; + + if (opcode->HasDstReg) { + unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); + if (pused) { + usedmask = *pused & inst->U.I.DstReg.WriteMask; + *pused &= ~usedmask; + } + } + + insts->WriteMask |= usedmask; + + if (inst->U.I.WriteALUResult) { + unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); + if (pused && *pused) { + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usedmask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usedmask |= RC_MASK_W; + + *pused = 0; + insts->WriteALUResult = 1; + } + } + + unsigned int srcmasks[3]; + rc_compute_sources_for_writemask(opcode, usedmask, srcmasks); + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + unsigned int refmask = 0; + unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; + insts->SrcReg[src] |= newsrcmask; + + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_BIT(newsrcmask, chan)) + refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + continue; + + mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); + + if (inst->U.I.SrcReg[src].RelAddr) + mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct deadcode_state * s = data; + + mark_used(s, RC_FILE_OUTPUT, index, mask); +} + +void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) +{ + struct deadcode_state s; + unsigned int nr_instructions; + + memset(&s, 0, sizeof(s)); + s.C = c; + + nr_instructions = rc_recompute_ips(c); + s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); + memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); + + dce(userdata, &s, &mark_output_use); + + for(struct rc_instruction * inst = c->Program.Instructions.Prev; + inst != &c->Program.Instructions; + inst = inst->Prev) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + if (opcode->Opcode == RC_OPCODE_ENDIF) { + push_branch(&s); + } else { + if (s.BranchStackSize) { + struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; + + if (opcode->Opcode == RC_OPCODE_IF) { + or_updatemasks(&s.R, + &s.R, + branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); + + s.BranchStackSize--; + } else if (opcode->Opcode == RC_OPCODE_ELSE) { + if (branch->HaveElse) { + rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); + } else { + memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); + memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); + branch->HaveElse = 1; + } + } else { + rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); + } + } else { + rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__); + } + } + } + + update_instruction(&s, inst); + } + + unsigned int ip = 0; + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next, ++ip) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\ + int dead = 1; + + if (!opcode->HasDstReg) { + dead = 0; + } else { + inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; + if (s.Instructions[ip].WriteMask) + dead = 0; + + if (s.Instructions[ip].WriteALUResult) + dead = 0; + else + inst->U.I.WriteALUResult = RC_ALURESULT_NONE; + } + + if (dead) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + + unsigned int srcmasks[3]; + unsigned int usemask = s.Instructions[ip].WriteMask; + + if (inst->U.I.WriteALUResult == RC_ALURESULT_X) + usemask |= RC_MASK_X; + else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) + usemask |= RC_MASK_W; + + rc_compute_sources_for_writemask(opcode, usemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + } + } + + rc_calculate_inputs_outputs(c); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 0000000000..33acbd30f4 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->U.I.Opcode = RC_OPCODE_MOV; + mov->U.I.DstReg.File = RC_FILE_TEMPORARY; + mov->U.I.DstReg.Index = tempreg; + mov->U.I.DstReg.WriteMask = split.Phase[phase]; + mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->U.I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; + + } + + inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[src].Index = tempreg; + inst->U.I.SrcReg[src].Swizzle = 0; + inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; + inst->U.I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c deleted file mode 100644 index aaaa50ad1f..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c +++ /dev/null @@ -1,294 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - * - * @note This code uses SWIZZLE_NIL in a source register to indicate that - * the corresponding component is ignored by the corresponding instruction. - */ - -#include "radeon_nqssadce.h" - -#include "radeon_compiler.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_OUTPUT: return &s->Outputs[index]; - case PROGRAM_ADDRESS: return &s->Address; - default: return 0; - } -} - - -/** - * Left multiplication of a register with a swizzle - * - * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. - */ -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) -{ - struct prog_src_register tmp = srcreg; - int i; - tmp.Swizzle = 0; - tmp.Negate = NEGATE_NONE; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(swizzle, i); - if (swz < 4) { - tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); - tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; - } else { - tmp.Swizzle |= swz << (i*3); - } - } - return tmp; -} - - -static void track_used_srcreg(struct nqssadce_state* s, - GLint src, GLuint sourced) -{ - struct prog_instruction * inst = &s->IP->I; - int i; - GLuint deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct prog_dst_register dstreg = inst->DstReg; - dstreg.File = PROGRAM_TEMPORARY; - dstreg.Index = rc_find_free_temporary(s->Compiler); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst->SrcReg[src].File = PROGRAM_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = NEGATE_NONE; - inst->SrcReg[src].Abs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate; - - if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, PROGRAM_ADDRESS, 0); - if (regstate) - regstate->Sourced |= WRITEMASK_X; - } else { - regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - } -} - -static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex) -{ - int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode); - int i; - for(i = 0; i < nsrc; ++i) - if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) - inst->I.SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) -{ - GLuint newindex = rc_find_free_temporary(s->Compiler); - struct rc_instruction * inst; - for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { - if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex) - inst->I.DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct prog_instruction *inst = &s->IP->I; - GLuint WriteMask; - - if (inst->Opcode == OPCODE_END) - return; - - if (inst->Opcode != OPCODE_KIL) { - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - struct rc_instruction * inst_remove = s->IP; - s->IP = s->IP->Prev; - rc_remove_instruction(inst_remove); - return; - } - - if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - WriteMask = inst->DstReg.WriteMask; - - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MOV: - track_used_srcreg(s, 0, WriteMask); - break; - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: - case OPCODE_SGE: - case OPCODE_SLT: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - break; - case OPCODE_CMP: - case OPCODE_MAD: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - track_used_srcreg(s, 2, WriteMask); - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - track_used_srcreg(s, 0, 0x1); - break; - case OPCODE_DP3: - track_used_srcreg(s, 0, 0x7); - track_used_srcreg(s, 1, 0x7); - break; - case OPCODE_DP4: - track_used_srcreg(s, 0, 0xf); - track_used_srcreg(s, 1, 0xf); - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - track_used_srcreg(s, 0, 0xf); - break; - case OPCODE_DST: - track_used_srcreg(s, 0, 0x6); - track_used_srcreg(s, 1, 0xa); - break; - case OPCODE_EXP: - case OPCODE_LOG: - case OPCODE_POW: - track_used_srcreg(s, 0, 0x3); - break; - case OPCODE_LIT: - track_used_srcreg(s, 0, 0xb); - break; - default: - rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } - - s->IP = s->IP->Prev; -} - -void rc_calculate_inputs_outputs(struct radeon_compiler * c) -{ - struct rc_instruction *inst; - - c->Program.InputsRead = 0; - c->Program.OutputsWritten = 0; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) - { - int i; - int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode); - - for (i = 0; i < num_src_regs; ++i) { - if (inst->I.SrcReg[i].File == PROGRAM_INPUT) - c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; - } - - if (_mesa_num_inst_dst_regs(inst->I.Opcode)) { - if (inst->I.DstReg.File == PROGRAM_OUTPUT) - c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; - } - } -} - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) -{ - struct nqssadce_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Compiler = c; - s.Descr = descr; - s.UserData = data; - s.Descr->Init(&s); - s.IP = c->Program.Instructions.Prev; - - while(s.IP != &c->Program.Instructions && !c->Error) - process_instruction(&s); - - rc_calculate_inputs_outputs(c); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h deleted file mode 100644 index b3fc77a35a..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - GLuint Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - struct radeon_compiler *Compiler; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - struct rc_instruction * IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[MAX_PROGRAM_TEMPS]; - struct register_state Outputs[VERT_RESULT_MAX]; - struct register_state Address; - - void * UserData; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - */ - GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src); -}; - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); -struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c new file mode 100644 index 0000000000..c1c0181fac --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -0,0 +1,429 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_opcodes.h" + +#include "radeon_program_constants.h" + +struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { + { + .Opcode = RC_OPCODE_NOP, + .Name = "NOP" + }, + { + .Opcode = RC_OPCODE_ILLEGAL_OPCODE, + .Name = "ILLEGAL OPCODE" + }, + { + .Opcode = RC_OPCODE_ABS, + .Name = "ABS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ADD, + .Name = "ADD", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_ARL, + .Name = "ARL", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_CMP, + .Name = "CMP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_COS, + .Name = "COS", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_DDX, + .Name = "DDX", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DDY, + .Name = "DDY", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_DP3, + .Name = "DP3", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DP4, + .Name = "DP4", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DPH, + .Name = "DPH", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_DST, + .Name = "DST", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_EX2, + .Name = "EX2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_EXP, + .Name = "EXP", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_FLR, + .Name = "FLR", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_FRC, + .Name = "FRC", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_KIL, + .Name = "KIL", + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_LG2, + .Name = "LG2", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_LIT, + .Name = "LIT", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LOG, + .Name = "LOG", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_LRP, + .Name = "LRP", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAD, + .Name = "MAD", + .NumSrcRegs = 3, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MAX, + .Name = "MAX", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MIN, + .Name = "MIN", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MOV, + .Name = "MOV", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_MUL, + .Name = "MUL", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_POW, + .Name = "POW", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RCP, + .Name = "RCP", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_RSQ, + .Name = "RSQ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SCS, + .Name = "SCS", + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_SEQ, + .Name = "SEQ", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SFL, + .Name = "SFL", + .NumSrcRegs = 0, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGE, + .Name = "SGE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SGT, + .Name = "SGT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SIN, + .Name = "SIN", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsStandardScalar = 1 + }, + { + .Opcode = RC_OPCODE_SLE, + .Name = "SLE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SLT, + .Name = "SLT", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SNE, + .Name = "SNE", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SUB, + .Name = "SUB", + .NumSrcRegs = 2, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_SWZ, + .Name = "SWZ", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { + .Opcode = RC_OPCODE_XPD, + .Name = "XPD", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TEX, + .Name = "TEX", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXB, + .Name = "TXB", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXD, + .Name = "TXD", + .HasTexture = 1, + .NumSrcRegs = 3, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXL, + .Name = "TXL", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_TXP, + .Name = "TXP", + .HasTexture = 1, + .NumSrcRegs = 1, + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_IF, + .Name = "IF", + .IsFlowControl = 1, + .NumSrcRegs = 1 + }, + { + .Opcode = RC_OPCODE_ELSE, + .Name = "ELSE", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_ENDIF, + .Name = "ENDIF", + .IsFlowControl = 1, + .NumSrcRegs = 0 + }, + { + .Opcode = RC_OPCODE_REPL_ALPHA, + .Name = "REPL_ALPHA", + .HasDstReg = 1 + }, + { + .Opcode = RC_OPCODE_BEGIN_TEX, + .Name = "BEGIN_TEX" + } +}; + +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks) +{ + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (opcode->Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + else if (opcode->Opcode == RC_OPCODE_IF) + srcmasks[0] |= RC_MASK_X; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(opcode->Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + srcmasks[0] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DST: + srcmasks[0] |= 0x6; + srcmasks[1] |= 0xa; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= 0xb; + break; + default: + break; + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h new file mode 100644 index 0000000000..a3c5b86954 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_OPCODES_H +#define RADEON_OPCODES_H + +#include <assert.h> + +/** + * Opcodes understood by the Radeon compiler. + */ +typedef enum { + RC_OPCODE_NOP = 0, + RC_OPCODE_ILLEGAL_OPCODE, + + /** vec4 instruction: dst.c = abs(src0.c); */ + RC_OPCODE_ABS, + + /** vec4 instruction: dst.c = src0.c + src1.c; */ + RC_OPCODE_ADD, + + /** special instruction: load address register + * dst.x = floor(src.x), where dst must be an address register */ + RC_OPCODE_ARL, + + /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */ + RC_OPCODE_CMP, + + /** scalar instruction: dst = cos(src0.x) */ + RC_OPCODE_COS, + + /** special instruction: take vec4 partial derivative in X direction + * dst.c = d src0.c / dx */ + RC_OPCODE_DDX, + + /** special instruction: take vec4 partial derivative in Y direction + * dst.c = d src0.c / dy */ + RC_OPCODE_DDY, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ + RC_OPCODE_DP3, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */ + RC_OPCODE_DP4, + + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */ + RC_OPCODE_DPH, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_DST, + + /** scalar instruction: dst = 2**src0.x */ + RC_OPCODE_EX2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_EXP, + + /** vec4 instruction: dst.c = floor(src0.c) */ + RC_OPCODE_FLR, + + /** vec4 instruction: dst.c = src0.c - floor(src0.c) */ + RC_OPCODE_FRC, + + /** special instruction: stop execution if any component of src0 is negative */ + RC_OPCODE_KIL, + + /** scalar instruction: dst = log_2(src0.x) */ + RC_OPCODE_LG2, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LIT, + + /** special instruction, see ARB_vertex_program */ + RC_OPCODE_LOG, + + /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */ + RC_OPCODE_LRP, + + /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */ + RC_OPCODE_MAD, + + /** vec4 instruction: dst.c = max(src0.c, src1.c) */ + RC_OPCODE_MAX, + + /** vec4 instruction: dst.c = min(src0.c, src1.c) */ + RC_OPCODE_MIN, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_MOV, + + /** vec4 instruction: dst.c = src0.c*src1.c */ + RC_OPCODE_MUL, + + /** scalar instruction: dst = src0.x ** src1.x */ + RC_OPCODE_POW, + + /** scalar instruction: dst = 1 / src0.x */ + RC_OPCODE_RCP, + + /** scalar instruction: dst = 1 / sqrt(src0.x) */ + RC_OPCODE_RSQ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_SCS, + + /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SEQ, + + /** vec4 instruction: dst.c = 0.0 */ + RC_OPCODE_SFL, + + /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGE, + + /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SGT, + + /** scalar instruction: dst = sin(src0.x) */ + RC_OPCODE_SIN, + + /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLE, + + /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SLT, + + /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ + RC_OPCODE_SNE, + + /** vec4 instruction: dst.c = src0.c - src1.c */ + RC_OPCODE_SUB, + + /** vec4 instruction: dst.c = src0.c */ + RC_OPCODE_SWZ, + + /** special instruction, see ARB_fragment_program */ + RC_OPCODE_XPD, + + RC_OPCODE_TEX, + RC_OPCODE_TXB, + RC_OPCODE_TXD, + RC_OPCODE_TXL, + RC_OPCODE_TXP, + + /** branch instruction: + * If src0.x != 0.0, continue with the next instruction; + * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF. + */ + RC_OPCODE_IF, + + /** branch instruction: jump to matching RC_OPCODE_ENDIF */ + RC_OPCODE_ELSE, + + /** branch instruction: has no effect */ + RC_OPCODE_ENDIF, + + /** special instruction, used in R300-R500 fragment program pair instructions + * indicates that the result of the alpha operation shall be replicated + * across all other channels */ + RC_OPCODE_REPL_ALPHA, + + /** special instruction, used in R300-R500 fragment programs + * to indicate the start of a block of texture instructions that + * can run simultaneously. */ + RC_OPCODE_BEGIN_TEX, + + MAX_RC_OPCODE +} rc_opcode; + + +struct rc_opcode_info { + rc_opcode Opcode; + const char * Name; + + /** true if the instruction reads from a texture. + * + * \note This is false for the KIL instruction, even though KIL is + * a texture instruction from a hardware point of view. */ + unsigned int HasTexture:1; + + unsigned int NumSrcRegs:2; + unsigned int HasDstReg:1; + + /** true if this instruction affects control flow */ + unsigned int IsFlowControl:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; +}; + +extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; + +static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode) +{ + assert((unsigned int)opcode < MAX_RC_OPCODE); + assert(rc_opcodes[opcode].Opcode == opcode); + + return &rc_opcodes[opcode]; +} + +void rc_compute_sources_for_writemask( + const struct rc_opcode_info * opcode, + unsigned int writemask, + unsigned int *srcmasks); + +#endif /* RADEON_OPCODES_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c new file mode 100644 index 0000000000..828d0c8e28 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + + +struct live_intervals { + int Start; + int End; + struct live_intervals * Next; +}; + +struct register_info { + struct live_intervals Live; + + unsigned int Used:1; + unsigned int Allocated:1; + rc_register_file File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; +}; + +struct hardware_register { + struct live_intervals * Used; +}; + +struct regalloc_state { + struct radeon_compiler * C; + + struct register_info Input[RC_REGISTER_MAX_INDEX]; + struct register_info Temporary[RC_REGISTER_MAX_INDEX]; + + struct hardware_register * HwTemporary; + unsigned int NumHwTemporaries; +}; + +static void print_live_intervals(struct live_intervals * src) +{ + if (!src) { + DBG("(null)"); + return; + } + + while(src) { + DBG("(%i,%i)", src->Start, src->End); + src = src->Next; + } +} + +static void add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + struct live_intervals ** dst_backup = dst; + + if (VERBOSE) { + DBG("add_live_intervals: "); + print_live_intervals(*dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src) { + if (*dst && (*dst)->End < src->Start) { + dst = &(*dst)->Next; + } else if (!*dst || (*dst)->Start > src->End) { + struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); + li->Start = src->Start; + li->End = src->End; + li->Next = *dst; + *dst = li; + src = src->Next; + } else { + if (src->End > (*dst)->End) + (*dst)->End = src->End; + if (src->Start < (*dst)->Start) + (*dst)->Start = src->Start; + src = src->Next; + } + } + + if (VERBOSE) { + DBG(" result: "); + print_live_intervals(*dst_backup); + DBG("\n"); + } +} + +static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) +{ + if (VERBOSE) { + DBG("overlap_live_intervals: "); + print_live_intervals(dst); + DBG(" to "); + print_live_intervals(src); + DBG("\n"); + } + + while(src && dst) { + if (dst->End <= src->Start) { + dst = dst->Next; + } else if (dst->End <= src->End) { + DBG(" overlap\n"); + return 1; + } else if (dst->Start < src->End) { + DBG(" overlap\n"); + return 1; + } else { + src = src->Next; + } + } + + DBG(" no overlap\n"); + + return 0; +} + +static int try_add_live_intervals(struct regalloc_state * s, + struct live_intervals ** dst, struct live_intervals * src) +{ + if (overlap_live_intervals(*dst, src)) + return 0; + + add_live_intervals(s, dst, src); + return 1; +} + +static void scan_callback(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct regalloc_state * s = data; + struct register_info * reg; + + if (file == RC_FILE_TEMPORARY) + reg = &s->Temporary[index]; + else if (file == RC_FILE_INPUT) + reg = &s->Input[index]; + else + return; + + if (!reg->Used) { + reg->Used = 1; + if (file == RC_FILE_INPUT) + reg->Live.Start = -1; + else + reg->Live.Start = inst->IP; + reg->Live.End = inst->IP; + } else { + if (inst->IP > reg->Live.End) + reg->Live.End = inst->IP; + } +} + +static void compute_live_intervals(struct regalloc_state * s) +{ + rc_recompute_ips(s->C); + + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads(inst, scan_callback, s); + rc_for_all_writes(inst, scan_callback, s); + } +} + +static void rewrite_register(struct regalloc_state * s, + rc_register_file * file, unsigned int * index) +{ + const struct register_info * reg; + + if (*file == RC_FILE_TEMPORARY) + reg = &s->Temporary[*index]; + else if (*file == RC_FILE_INPUT) + reg = &s->Input[*index]; + else + return; + + if (reg->Allocated) { + *file = reg->File; + *index = reg->Index; + } +} + +static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + + if (opcode->HasDstReg) { + rc_register_file file = inst->DstReg.File; + unsigned int index = inst->DstReg.Index; + + rewrite_register(s, &file, &index); + + inst->DstReg.File = file; + inst->DstReg.Index = index; + } + + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { + rc_register_file file = inst->SrcReg[src].File; + unsigned int index = inst->SrcReg[src].Index; + + rewrite_register(s, &file, &index); + + inst->SrcReg[src].File = file; + inst->SrcReg[src].Index = index; + } +} + +static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst) +{ + if (inst->RGB.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->RGB.DestIndex; + + rewrite_register(s, &file, &index); + + inst->RGB.DestIndex = index; + } + + if (inst->Alpha.WriteMask) { + rc_register_file file = RC_FILE_TEMPORARY; + unsigned int index = inst->Alpha.DestIndex; + + rewrite_register(s, &file, &index); + + inst->Alpha.DestIndex = index; + } + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + rc_register_file file = inst->RGB.Src[src].File; + unsigned int index = inst->RGB.Src[src].Index; + + rewrite_register(s, &file, &index); + + inst->RGB.Src[src].File = file; + inst->RGB.Src[src].Index = index; + } + + if (inst->Alpha.Src[src].Used) { + rc_register_file file = inst->Alpha.Src[src].File; + unsigned int index = inst->Alpha.Src[src].Index; + + rewrite_register(s, &file, &index); + + inst->Alpha.Src[src].File = file; + inst->Alpha.Src[src].Index = index; + } + } +} + +static void do_regalloc(struct regalloc_state * s) +{ + /* Simple and stupid greedy register allocation */ + for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { + struct register_info * reg = &s->Temporary[index]; + + if (!reg->Used) + continue; + + for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { + if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { + reg->Allocated = 1; + reg->File = RC_FILE_TEMPORARY; + reg->Index = hwreg; + goto success; + } + } + + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + + success:; + } + + /* Rewrite all instructions based on the translation table we built */ + for(struct rc_instruction * inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) + rewrite_normal_instruction(s, &inst->U.I); + else + rewrite_pair_instruction(s, &inst->U.P); + } +} + +static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +{ + struct regalloc_state * s = data; + + if (!s->Input[input].Used) + return; + + add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; + +} + +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps) +{ + struct regalloc_state s; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + s.NumHwTemporaries = maxtemps; + s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register)); + memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register)); + + compute_live_intervals(&s); + + c->AllocateHwInputs(c, &alloc_input, &s); + + do_regalloc(&s); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c new file mode 100644 index 0000000000..df67aafe02 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include <stdio.h> + +#include "radeon_compiler.h" +#include "radeon_dataflow.h" + + +#define VERBOSE 0 + +#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) + +struct schedule_instruction { + struct rc_instruction * Instruction; + + /** Next instruction in the linked list of ready instructions. */ + struct schedule_instruction *NextReady; + + /** Values that this instruction reads and writes */ + struct reg_value * WriteValues[4]; + struct reg_value * ReadValues[12]; + unsigned int NumWriteValues:3; + unsigned int NumReadValues:4; + + /** + * Number of (read and write) dependencies that must be resolved before + * this instruction can be scheduled. + */ + unsigned int NumDependencies:5; +}; + + +/** + * Used to keep track of which instructions read a value. + */ +struct reg_value_reader { + struct schedule_instruction *Reader; + struct reg_value_reader *Next; +}; + +/** + * Used to keep track which values are stored in each component of a + * RC_FILE_TEMPORARY. + */ +struct reg_value { + struct schedule_instruction * Writer; + + /** + * Unordered linked list of instructions that read from this value. + * When this value becomes available, we increase all readers' + * dependency count. + */ + struct reg_value_reader *Readers; + + /** + * Number of readers of this value. This is decremented each time + * a reader of the value is committed. + * When the reader cound reaches zero, the dependency count + * of the instruction writing \ref Next is decremented. + */ + unsigned int NumReaders; + + struct reg_value *Next; /**< Pointer to the next value to be written to the same register */ +}; + +struct register_state { + struct reg_value * Values[4]; +}; + +struct schedule_state { + struct radeon_compiler * C; + struct schedule_instruction * Current; + + struct register_state Temporary[RC_REGISTER_MAX_INDEX]; + + /** + * Linked lists of instructions that can be scheduled right now, + * based on which ALU/TEX resources they require. + */ + /*@{*/ + struct schedule_instruction *ReadyFullALU; + struct schedule_instruction *ReadyRGB; + struct schedule_instruction *ReadyAlpha; + struct schedule_instruction *ReadyTEX; + /*@}*/ +}; + +static struct reg_value ** get_reg_valuep(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + if (file != RC_FILE_TEMPORARY) + return 0; + + if (index >= RC_REGISTER_MAX_INDEX) { + rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index); + return 0; + } + + return &s->Temporary[index].Values[chan]; +} + +static struct reg_value * get_reg_value(struct schedule_state * s, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + if (!pv) + return 0; + return *pv; +} + +static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst) +{ + inst->NextReady = *list; + *list = inst; +} + +static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i is now ready\n", sinst->Instruction->IP); + + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) + add_inst_to_list(&s->ReadyTEX, sinst); + else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyRGB, sinst); + else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP) + add_inst_to_list(&s->ReadyAlpha, sinst); + else + add_inst_to_list(&s->ReadyFullALU, sinst); +} + +static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst) +{ + assert(sinst->NumDependencies > 0); + sinst->NumDependencies--; + if (!sinst->NumDependencies) + instruction_ready(s, sinst); +} + +static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst) +{ + DBG("%i: commit\n", sinst->Instruction->IP); + + for(unsigned int i = 0; i < sinst->NumReadValues; ++i) { + struct reg_value * v = sinst->ReadValues[i]; + assert(v->NumReaders > 0); + v->NumReaders--; + if (!v->NumReaders) { + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } + + for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) { + struct reg_value * v = sinst->WriteValues[i]; + if (v->NumReaders) { + for(struct reg_value_reader * r = v->Readers; r; r = r->Next) { + decrease_dependencies(s, r->Reader); + } + } else { + /* This happens in instruction sequences of the type + * OP r.x, ...; + * OP r.x, r.x, ...; + * See also the subtlety in how instructions that both + * read and write the same register are scanned. + */ + if (v->Next) + decrease_dependencies(s, v->Next->Writer); + } + } +} + +/** + * Emit all ready texture instructions in a single block. + * + * Emit as a single block to (hopefully) sample many textures in parallel, + * and to avoid hardware indirections on R300. + */ +static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before) +{ + struct schedule_instruction *readytex; + + assert(s->ReadyTEX); + + /* Don't let the ready list change under us! */ + readytex = s->ReadyTEX; + s->ReadyTEX = 0; + + /* Node marker for R300 */ + struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev); + inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX; + + /* Link texture instructions back in */ + while(readytex) { + struct schedule_instruction * tex = readytex; + readytex = readytex->NextReady; + + rc_insert_instruction(before->Prev, tex->Instruction); + commit_instruction(s, tex); + } +} + + +static int destructive_merge_instructions( + struct rc_pair_instruction * rgb, + struct rc_pair_instruction * alpha) +{ + assert(rgb->Alpha.Opcode == RC_OPCODE_NOP); + assert(alpha->RGB.Opcode == RC_OPCODE_NOP); + + /* Copy alpha args into rgb */ + const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int oldsrc = alpha->Alpha.Arg[arg].Source; + rc_register_file file = 0; + unsigned int index = 0; + + if (alpha->Alpha.Arg[arg].Swizzle < 3) { + srcrgb = 1; + file = alpha->RGB.Src[oldsrc].File; + index = alpha->RGB.Src[oldsrc].Index; + } else if (alpha->Alpha.Arg[arg].Swizzle < 4) { + srcalpha = 1; + file = alpha->Alpha.Src[oldsrc].File; + index = alpha->Alpha.Src[oldsrc].Index; + } + + int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index); + if (source < 0) + return 0; + + rgb->Alpha.Arg[arg].Source = source; + rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle; + rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs; + rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate; + } + + /* Copy alpha opcode into rgb */ + rgb->Alpha.Opcode = alpha->Alpha.Opcode; + rgb->Alpha.DestIndex = alpha->Alpha.DestIndex; + rgb->Alpha.WriteMask = alpha->Alpha.WriteMask; + rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask; + rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask; + rgb->Alpha.Saturate = alpha->Alpha.Saturate; + + /* Merge ALU result writing */ + if (alpha->WriteALUResult) { + if (rgb->WriteALUResult) + return 0; + + rgb->WriteALUResult = alpha->WriteALUResult; + rgb->ALUResultCompare = alpha->ALUResultCompare; + } + + return 1; +} + +/** + * Try to merge the given instructions into the rgb instructions. + * + * Return true on success; on failure, return false, and keep + * the instructions untouched. + */ +static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha) +{ + struct rc_pair_instruction backup; + + memcpy(&backup, rgb, sizeof(struct rc_pair_instruction)); + + if (destructive_merge_instructions(rgb, alpha)) + return 1; + + memcpy(rgb, &backup, sizeof(struct rc_pair_instruction)); + return 0; +} + + +/** + * Find a good ALU instruction or pair of ALU instruction and emit it. + * + * Prefer emitting full ALU instructions, so that when we reach a point + * where no full ALU instruction can be emitted, we have more candidates + * for RGB/Alpha pairing. + */ +static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before) +{ + struct schedule_instruction * sinst; + + if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { + if (s->ReadyFullALU) { + sinst = s->ReadyFullALU; + s->ReadyFullALU = s->ReadyFullALU->NextReady; + } else if (s->ReadyRGB) { + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + } else { + sinst = s->ReadyAlpha; + s->ReadyAlpha = s->ReadyAlpha->NextReady; + } + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + } else { + struct schedule_instruction **prgb; + struct schedule_instruction **palpha; + + /* Some pairings might fail because they require too + * many source slots; try all possible pairings if necessary */ + for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { + for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { + struct schedule_instruction * psirgb = *prgb; + struct schedule_instruction * psialpha = *palpha; + + if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P)) + continue; + + *prgb = (*prgb)->NextReady; + *palpha = (*palpha)->NextReady; + rc_insert_instruction(before->Prev, psirgb->Instruction); + commit_instruction(s, psirgb); + commit_instruction(s, psialpha); + goto success; + } + } + + /* No success in pairing; just take the first RGB instruction */ + sinst = s->ReadyRGB; + s->ReadyRGB = s->ReadyRGB->NextReady; + + rc_insert_instruction(before->Prev, sinst->Instruction); + commit_instruction(s, sinst); + success: ; + } +} + +static void scan_read(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value * v = get_reg_value(s, file, index, chan); + + if (!v) + return; + + if (v->Writer == s->Current) { + /* The instruction reads and writes to a register component. + * In this case, we only want to increment dependencies by one. + */ + return; + } + + DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader)); + reader->Reader = s->Current; + reader->Next = v->Readers; + v->Readers = reader; + v->NumReaders++; + + s->Current->NumDependencies++; + + if (s->Current->NumReadValues >= 12) { + rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__); + } else { + s->Current->ReadValues[s->Current->NumReadValues++] = v; + } +} + +static void scan_write(void * data, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int chan) +{ + struct schedule_state * s = data; + struct reg_value ** pv = get_reg_valuep(s, file, index, chan); + + if (!pv) + return; + + DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan); + + struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv)); + memset(newv, 0, sizeof(*newv)); + + newv->Writer = s->Current; + + if (*pv) { + (*pv)->Next = newv; + s->Current->NumDependencies++; + } + + *pv = newv; + + if (s->Current->NumWriteValues >= 4) { + rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__); + } else { + s->Current->WriteValues[s->Current->NumWriteValues++] = newv; + } +} + +static void schedule_block(struct r300_fragment_program_compiler * c, + struct rc_instruction * begin, struct rc_instruction * end) +{ + struct schedule_state s; + + memset(&s, 0, sizeof(s)); + s.C = &c->Base; + + /* Scan instructions for data dependencies */ + unsigned int ip = 0; + for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) { + s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current)); + memset(s.Current, 0, sizeof(struct schedule_instruction)); + + s.Current->Instruction = inst; + inst->IP = ip++; + + DBG("%i: Scanning\n", inst->IP); + + /* The order of things here is subtle and maybe slightly + * counter-intuitive, to account for the case where an + * instruction writes to the same register as it reads + * from. */ + rc_for_all_writes(inst, &scan_write, &s); + rc_for_all_reads(inst, &scan_read, &s); + + DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies); + + if (!s.Current->NumDependencies) + instruction_ready(&s, s.Current); + } + + /* Temporarily unlink all instructions */ + begin->Prev->Next = end; + end->Prev = begin->Prev; + + /* Schedule instructions back */ + while(!s.C->Error && + (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { + if (s.ReadyTEX) + emit_all_tex(&s, end); + + while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)) + emit_one_alu(&s, end); + } +} + +static int is_controlflow(struct rc_instruction * inst) +{ + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + return opcode->IsFlowControl; + } + return 0; +} + +void rc_pair_schedule(struct r300_fragment_program_compiler *c) +{ + struct rc_instruction * inst = c->Base.Program.Instructions.Next; + while(inst != &c->Base.Program.Instructions) { + if (is_controlflow(inst)) { + inst = inst->Next; + continue; + } + + struct rc_instruction * first = inst; + + while(inst != &c->Base.Program.Instructions && !is_controlflow(inst)) + inst = inst->Next; + + DBG("Schedule one block\n"); + schedule_block(c, first, inst); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c new file mode 100644 index 0000000000..7211768272 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program_pair.h" + +#include "radeon_compiler.h" + + +/** + * Finally rewrite ADD, MOV, MUL as the appropriate native instruction + * and reverse the order of arguments for CMP. + */ +static void final_rewrite(struct rc_sub_instruction *inst) +{ + struct rc_src_register tmp; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + inst->SrcReg[2] = inst->SrcReg[1]; + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[1].Negate = RC_MASK_NONE; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_CMP: + tmp = inst->SrcReg[2]; + inst->SrcReg[2] = inst->SrcReg[0]; + inst->SrcReg[0] = tmp; + break; + case RC_OPCODE_MOV: + /* AMD say we should use CMP. + * However, when we transform + * KIL -r0; + * into + * CMP tmp, -r0, -r0, 0; + * KIL tmp; + * we get incorrect behaviour on R500 when r0 == 0.0. + * It appears that the R500 KIL hardware treats -0.0 as less + * than zero. + */ + inst->SrcReg[1].File = RC_FILE_NONE; + inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + case RC_OPCODE_MUL: + inst->SrcReg[2].File = RC_FILE_NONE; + inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; + inst->Opcode = RC_OPCODE_MAD; + break; + default: + /* nothing to do */ + break; + } +} + + +/** + * Classify an instruction according to which ALUs etc. it needs + */ +static void classify_instruction(struct rc_sub_instruction * inst, + int * needrgb, int * needalpha, int * istranscendent) +{ + *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; + *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; + *istranscendent = 0; + + if (inst->WriteALUResult == RC_ALURESULT_X) + *needrgb = 1; + else if (inst->WriteALUResult == RC_ALURESULT_W) + *needalpha = 1; + + switch(inst->Opcode) { + case RC_OPCODE_ADD: + case RC_OPCODE_CMP: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + case RC_OPCODE_FRC: + case RC_OPCODE_MAD: + case RC_OPCODE_MAX: + case RC_OPCODE_MIN: + case RC_OPCODE_MOV: + case RC_OPCODE_MUL: + break; + case RC_OPCODE_COS: + case RC_OPCODE_EX2: + case RC_OPCODE_LG2: + case RC_OPCODE_RCP: + case RC_OPCODE_RSQ: + case RC_OPCODE_SIN: + *istranscendent = 1; + *needalpha = 1; + break; + case RC_OPCODE_DP4: + *needalpha = 1; + /* fall through */ + case RC_OPCODE_DP3: + *needrgb = 1; + break; + default: + break; + } +} + + +/** + * Fill the given ALU instruction's opcodes and source operands into the given pair, + * if possible. + */ +static void set_pair_instruction(struct r300_fragment_program_compiler *c, + struct rc_pair_instruction * pair, + struct rc_sub_instruction * inst) +{ + memset(pair, 0, sizeof(struct rc_pair_instruction)); + + int needrgb, needalpha, istranscendent; + classify_instruction(inst, &needrgb, &needalpha, &istranscendent); + + if (needrgb) { + if (istranscendent) + pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; + else + pair->RGB.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->RGB.Saturate = 1; + } + if (needalpha) { + pair->Alpha.Opcode = inst->Opcode; + if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) + pair->Alpha.Saturate = 1; + } + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); + int nargs = opcode->NumSrcRegs; + int i; + + /* Special case for DDX/DDY (MDH/MDV). */ + if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) { + nargs++; + } + + for(i = 0; i < opcode->NumSrcRegs; ++i) { + int source; + if (needrgb && !istranscendent) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + int j; + for(j = 0; j < 3; ++j) { + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + } + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->RGB.Arg[i].Source = source; + pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + } + if (needalpha) { + unsigned int srcrgb = 0; + unsigned int srcalpha = 0; + unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3); + if (swz < 3) + srcrgb = 1; + else if (swz < 4) + srcalpha = 1; + source = rc_pair_alloc_source(pair, srcrgb, srcalpha, + inst->SrcReg[i].File, inst->SrcReg[i].Index); + pair->Alpha.Arg[i].Source = source; + pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; + pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); + } + } + + /* Destination handling */ + if (inst->DstReg.File == RC_FILE_OUTPUT) { + if (inst->DstReg.Index == c->OutputColor) { + pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } else if (inst->DstReg.Index == c->OutputDepth) { + pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } else { + if (needrgb) { + pair->RGB.DestIndex = inst->DstReg.Index; + pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; + } + if (needalpha) { + pair->Alpha.DestIndex = inst->DstReg.Index; + pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); + } + } + + if (inst->WriteALUResult) { + pair->WriteALUResult = inst->WriteALUResult; + pair->ALUResultCompare = inst->ALUResultCompare; + } +} + + +/** + * Translate all ALU instructions into corresponding pair instructions, + * performing no other changes. + */ +void rc_pair_translate(struct r300_fragment_program_compiler *c) +{ + for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; + inst != &c->Base.Program.Instructions; + inst = inst->Next) { + if (inst->Type != RC_INSTRUCTION_NORMAL) + continue; + + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) + continue; + + struct rc_sub_instruction copy = inst->U.I; + + final_rewrite(©); + inst->Type = RC_INSTRUCTION_PAIR; + set_pair_instruction(c, &inst->U.P, ©); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index b636f90a96..0dbc5380bb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -27,9 +27,9 @@ #include "radeon_program.h" +#include <stdio.h> + #include "radeon_compiler.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" /** @@ -69,37 +69,57 @@ void radeonLocalTransform( } } +/** + * Left multiplication of a register with a swizzle + */ +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg) +{ + struct rc_src_register tmp = srcreg; + int i; + tmp.Swizzle = 0; + tmp.Negate = 0; + for(i = 0; i < 4; ++i) { + rc_swizzle swz = GET_SWZ(swizzle, i); + if (swz < 4) { + tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); + tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i; + } else { + tmp.Swizzle |= swz << (i*3); + } + } + return tmp; +} -GLint rc_find_free_temporary(struct radeon_compiler * c) +unsigned int rc_find_free_temporary(struct radeon_compiler * c) { - GLboolean used[MAX_PROGRAM_TEMPS]; - GLuint i; + char used[RC_REGISTER_MAX_INDEX]; + unsigned int i; memset(used, 0, sizeof(used)); for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) { - const struct prog_instruction *inst = &rcinst->I; - const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode); - const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode); - GLuint k; - - for (k = 0; k < nsrc; k++) { - if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) - used[inst->SrcReg[k].Index] = GL_TRUE; + const struct rc_sub_instruction *inst = &rcinst->U.I; + const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode); + unsigned int k; + + for (k = 0; k < opcode->NumSrcRegs; k++) { + if (inst->SrcReg[k].File == RC_FILE_TEMPORARY) + used[inst->SrcReg[k].Index] = 1; } - if (ndst) { - if (inst->DstReg.File == PROGRAM_TEMPORARY) - used[inst->DstReg.Index] = GL_TRUE; + if (opcode->HasDstReg) { + if (inst->DstReg.File == RC_FILE_TEMPORARY) + used[inst->DstReg.Index] = 1; } } - for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) { if (!used[i]) return i; } - return -1; + rc_error(c, "Ran out of temporary registers\n"); + return 0; } @@ -107,24 +127,31 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c) { struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction)); - inst->Prev = 0; - inst->Next = 0; + memset(inst, 0, sizeof(struct rc_instruction)); - _mesa_init_instructions(&inst->I, 1); + inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; + inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW; + inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW; return inst; } - -struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst) { - struct rc_instruction * inst = rc_alloc_instruction(c); - inst->Prev = after; inst->Next = after->Next; inst->Prev->Next = inst; inst->Next->Prev = inst; +} + +struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after) +{ + struct rc_instruction * inst = rc_alloc_instruction(c); + + rc_insert_instruction(after, inst); return inst; } @@ -135,76 +162,20 @@ void rc_remove_instruction(struct rc_instruction * inst) inst->Next->Prev = inst->Prev; } - -void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program) -{ - struct prog_instruction *source; - unsigned int i; - - for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) { - struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev); - dest->I = *source; - } - - c->Program.ShadowSamplers = program->ShadowSamplers; - c->Program.InputsRead = program->InputsRead; - c->Program.OutputsWritten = program->OutputsWritten; - - int isNVProgram = 0; - - if (program->Target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program * vp = (struct gl_vertex_program *) program; - isNVProgram = vp->IsNVProgram; - } - - if (isNVProgram) { - /* NV_vertex_program has a fixed-sized constant environment. - * This could be handled more efficiently for programs that - * do not use relative addressing. - */ - for(i = 0; i < 96; ++i) { - struct rc_constant constant; - - constant.Type = RC_CONSTANT_EXTERNAL; - constant.Size = 4; - constant.u.External = i; - - rc_constants_add(&c->Program.Constants, &constant); - } - } else { - for(i = 0; i < program->Parameters->NumParameters; ++i) { - struct rc_constant constant; - - constant.Type = RC_CONSTANT_EXTERNAL; - constant.Size = 4; - constant.u.External = i; - - rc_constants_add(&c->Program.Constants, &constant); - } - } -} - - /** - * Print program to stderr, default options. + * Return the number of instructions in the program. */ -void rc_print_program(const struct rc_program *prog) +unsigned int rc_recompute_ips(struct radeon_compiler * c) { - GLuint indent = 0; - GLuint linenum = 1; - struct rc_instruction *inst; - - fprintf(stderr, "# Radeon Compiler Program\n"); + unsigned int ip = 0; - for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { - fprintf(stderr, "%3d: ", linenum); + for(struct rc_instruction * inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + inst->IP = ip++; + } - /* Massive hack: We rely on the fact that the printers do not actually - * use the gl_program argument (last argument) in debug mode */ - indent = _mesa_fprint_instruction_opt( - stderr, &inst->I, - indent, PROG_PRINT_DEBUG, 0); + c->Program.Instructions.IP = 0xcafedead; - linenum++; - } + return ip; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index 561958608c..33db3ea0ff 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -28,37 +28,144 @@ #ifndef __RADEON_PROGRAM_H_ #define __RADEON_PROGRAM_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" -#include "shader/prog_instruction.h" +#include <stdint.h> +#include <string.h> + +#include "radeon_opcodes.h" +#include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_program_pair.h" struct radeon_compiler; -struct rc_instruction; -struct rc_program; -enum { - PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +struct rc_src_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int Swizzle:12; + + /** Take the component-wise absolute value */ + unsigned int Abs:1; + + /** Post-Abs negation. */ + unsigned int Negate:4; +}; + +struct rc_dst_register { + rc_register_file File:3; + + /** Negative values may be used for relative addressing. */ + signed int Index:(RC_REGISTER_INDEX_BITS+1); + unsigned int RelAddr:1; + + unsigned int WriteMask:4; +}; + +/** + * Instructions are maintained by the compiler in a doubly linked list + * of these structures. + * + * This instruction format is intended to be expanded for hardware-specific + * trickery. At different stages of compilation, a different set of + * instruction types may be valid. + */ +struct rc_sub_instruction { + struct rc_src_register SrcReg[3]; + struct rc_dst_register DstReg; + + /** + * Opcode of this instruction, according to \ref rc_opcode enums. + */ + rc_opcode Opcode:8; + + /** + * Saturate each value of the result to the range [0,1] or [-1,1], + * according to \ref rc_saturate_mode enums. + */ + rc_saturate_mode SaturateMode:2; + + /** + * Writing to the special register RC_SPECIAL_ALU_RESULT + */ + /*@{*/ + rc_write_aluresult WriteALUResult:2; + rc_compare_func ALUResultCompare:3; + /*@}*/ + + /** + * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. + */ + /*@{*/ + /** Source texture unit. */ + unsigned int TexSrcUnit:5; + + /** Source texture target, one of the \ref rc_texture_target enums */ + rc_texture_target TexSrcTarget:3; + + /** True if tex instruction should do shadow comparison */ + unsigned int TexShadow:1; + /*@}*/ +}; + +typedef enum { + RC_INSTRUCTION_NORMAL = 0, + RC_INSTRUCTION_PAIR +} rc_instruction_type; + +struct rc_instruction { + struct rc_instruction * Prev; + struct rc_instruction * Next; + + rc_instruction_type Type; + union { + struct rc_sub_instruction I; + struct rc_pair_instruction P; + } U; + + /** + * Warning: IPs are not stable. If you want to use them, + * you need to recompute them at the beginning of each pass + * using \ref rc_recompute_ips + */ + unsigned int IP; +}; + +struct rc_program { + /** + * Instructions.Next points to the first instruction, + * Instructions.Prev points to the last instruction. + */ + struct rc_instruction Instructions; + + /* Long term, we should probably remove InputsRead & OutputsWritten, + * since updating dependent state can be fragile, and they aren't + * actually used very often. */ + uint32_t InputsRead; + uint32_t OutputsWritten; + uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */ + + struct rc_constant_list Constants; }; enum { - OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */ + OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */ }; -#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) -#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) -static inline GLuint get_swz(GLuint swz, GLuint idx) +static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) { if (idx & 0x4) return idx; return GET_SWZ(swz, idx); } -static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w) +static inline unsigned int combine_swizzles4(unsigned int src, + rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) { - GLuint ret = 0; + unsigned int ret = 0; ret |= get_swz(src, swz_x); ret |= get_swz(src, swz_y) << 3; @@ -68,22 +175,24 @@ static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, G return ret; } -static inline GLuint combine_swizzles(GLuint src, GLuint swz) +static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz) { - GLuint ret = 0; + unsigned int ret = 0; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X)); - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6; - ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X)); + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6; + ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9; return ret; } -static INLINE void reset_srcreg(struct prog_src_register* reg) +struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); + +static inline void reset_srcreg(struct rc_src_register* reg) { - _mesa_bzero(reg, sizeof(*reg)); - reg->Swizzle = SWIZZLE_NOOP; + memset(reg, 0, sizeof(reg)); + reg->Swizzle = RC_SWIZZLE_XYZW; } @@ -92,13 +201,13 @@ static INLINE void reset_srcreg(struct prog_src_register* reg) * * The function will be called once for each instruction. * It has to either emit the appropriate transformed code for the instruction - * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * and return true, or return false if it doesn't understand the * instruction. * * The function gets passed the userData as last parameter. */ struct radeon_program_transformation { - GLboolean (*function)( + int (*function)( struct radeon_compiler*, struct rc_instruction*, void*); @@ -110,12 +219,15 @@ void radeonLocalTransform( int num_transformations, struct radeon_program_transformation* transformations); -GLint rc_find_free_temporary(struct radeon_compiler * c); +unsigned int rc_find_free_temporary(struct radeon_compiler * c); struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); +void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst); void rc_remove_instruction(struct rc_instruction * inst); +unsigned int rc_recompute_ips(struct radeon_compiler * c); + void rc_print_program(const struct rc_program *prog); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index f23ce301ca..0326d25233 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -40,175 +40,164 @@ static struct rc_instruction *emit1( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg; return fpi; } static struct rc_instruction *emit2( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; return fpi; } static struct rc_instruction *emit3( struct radeon_compiler * c, struct rc_instruction * after, - gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg, - struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, - struct prog_src_register SrcReg2) + rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, + struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, + struct rc_src_register SrcReg2) { struct rc_instruction *fpi = rc_insert_new_instruction(c, after); - fpi->I.Opcode = Opcode; - fpi->I.SaturateMode = Saturate; - fpi->I.DstReg = DstReg; - fpi->I.SrcReg[0] = SrcReg0; - fpi->I.SrcReg[1] = SrcReg1; - fpi->I.SrcReg[2] = SrcReg2; + fpi->U.I.Opcode = Opcode; + fpi->U.I.SaturateMode = Saturate; + fpi->U.I.DstReg = DstReg; + fpi->U.I.SrcReg[0] = SrcReg0; + fpi->U.I.SrcReg[1] = SrcReg1; + fpi->U.I.SrcReg[2] = SrcReg2; return fpi; } -static struct prog_dst_register dstreg(int file, int index) +static struct rc_dst_register dstreg(int file, int index) { - struct prog_dst_register dst; + struct rc_dst_register dst; dst.File = file; dst.Index = index; - dst.WriteMask = WRITEMASK_XYZW; - dst.CondMask = COND_TR; + dst.WriteMask = RC_MASK_XYZW; dst.RelAddr = 0; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static struct prog_dst_register dstregtmpmask(int index, int mask) +static struct rc_dst_register dstregtmpmask(int index, int mask) { - struct prog_dst_register dst = {0}; - dst.File = PROGRAM_TEMPORARY; + struct rc_dst_register dst = {0}; + dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; dst.RelAddr = 0; - dst.CondMask = COND_TR; - dst.CondSwizzle = SWIZZLE_NOOP; - dst.CondSrc = 0; - dst.pad = 0; return dst; } -static const struct prog_src_register builtin_zero = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_zero = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_0000 + .Swizzle = RC_SWIZZLE_0000 }; -static const struct prog_src_register builtin_one = { - .File = PROGRAM_BUILTIN, +static const struct rc_src_register builtin_one = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_1111 + .Swizzle = RC_SWIZZLE_1111 }; -static const struct prog_src_register srcreg_undefined = { - .File = PROGRAM_UNDEFINED, +static const struct rc_src_register srcreg_undefined = { + .File = RC_FILE_NONE, .Index = 0, - .Swizzle = SWIZZLE_NOOP + .Swizzle = RC_SWIZZLE_XYZW }; -static struct prog_src_register srcreg(int file, int index) +static struct rc_src_register srcreg(int file, int index) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; return src; } -static struct prog_src_register srcregswz(int file, int index, int swz) +static struct rc_src_register srcregswz(int file, int index, int swz) { - struct prog_src_register src = srcreg_undefined; + struct rc_src_register src = srcreg_undefined; src.File = file; src.Index = index; src.Swizzle = swz; return src; } -static struct prog_src_register absolute(struct prog_src_register reg) +static struct rc_src_register absolute(struct rc_src_register reg) { - struct prog_src_register newreg = reg; + struct rc_src_register newreg = reg; newreg.Abs = 1; - newreg.Negate = NEGATE_NONE; + newreg.Negate = RC_MASK_NONE; return newreg; } -static struct prog_src_register negate(struct prog_src_register reg) +static struct rc_src_register negate(struct rc_src_register reg) { - struct prog_src_register newreg = reg; - newreg.Negate = newreg.Negate ^ NEGATE_XYZW; + struct rc_src_register newreg = reg; + newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; return newreg; } -static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +static struct rc_src_register swizzle(struct rc_src_register reg, + rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) { - struct prog_src_register swizzled = reg; - swizzled.Swizzle = MAKE_SWIZZLE4( - x >= 4 ? x : GET_SWZ(reg.Swizzle, x), - y >= 4 ? y : GET_SWZ(reg.Swizzle, y), - z >= 4 ? z : GET_SWZ(reg.Swizzle, z), - w >= 4 ? w : GET_SWZ(reg.Swizzle, w)); + struct rc_src_register swizzled = reg; + swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); return swizzled; } -static struct prog_src_register scalar(struct prog_src_register reg) +static struct rc_src_register scalar(struct rc_src_register reg) { - return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); + return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X); } static void transform_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src = inst->I.SrcReg[0]; + struct rc_src_register src = inst->U.I.SrcReg[0]; src.Abs = 1; - src.Negate = NEGATE_NONE; - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src); + src.Negate = RC_MASK_NONE; + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); rc_remove_instruction(inst); } static void transform_DP3(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - struct prog_src_register src1 = inst->I.SrcReg[1]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~NEGATE_W; + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } static void transform_DPH(struct radeon_compiler* c, struct rc_instruction* inst) { - struct prog_src_register src0 = inst->I.SrcReg[0]; - src0.Negate &= ~NEGATE_W; + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + src0.Negate &= ~RC_MASK_W; src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= SWIZZLE_ONE << (3 * 3); - emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]); + src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); rc_remove_instruction(inst); } @@ -219,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c, static void transform_DST(struct radeon_compiler* c, struct rc_instruction* inst) { - emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE), - swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W)); + emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); rc_remove_instruction(inst); } @@ -229,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]); - emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg, - inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]); + emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, + inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -256,64 +245,64 @@ static void transform_FLR(struct radeon_compiler* c, static void transform_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { - GLuint constant; - GLuint constant_swizzle; - GLuint temp; - struct prog_src_register srctemp; + unsigned int constant; + unsigned int constant_swizzle; + unsigned int temp; + struct rc_src_register srctemp; constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); - if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) { + if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { struct rc_instruction * inst_mov; inst_mov = emit1(c, inst, - OPCODE_MOV, 0, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c))); + RC_OPCODE_MOV, 0, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); - inst->I.DstReg.File = PROGRAM_TEMPORARY; - inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index; - inst->I.DstReg.WriteMask = WRITEMASK_XYZW; + inst->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; + inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; } - temp = inst->I.DstReg.Index; - srctemp = srcreg(PROGRAM_TEMPORARY, temp); + temp = inst->U.I.DstReg.Index; + srctemp = srcreg(RC_FILE_TEMPORARY, temp); // tmp.x = max(0.0, Src.x); // tmp.y = max(0.0, Src.y); // tmp.w = clamp(Src.z, -128+eps, 128-eps); - emit2(c, inst->Prev, OPCODE_MAX, 0, - dstregtmpmask(temp, WRITEMASK_XYW), - inst->I.SrcReg[0], - swizzle(srcreg(PROGRAM_CONSTANT, constant), - SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3)); - emit2(c, inst->Prev, OPCODE_MIN, 0, - dstregtmpmask(temp, WRITEMASK_Z), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle))); + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + dstregtmpmask(temp, RC_MASK_XYW), + inst->U.I.SrcReg[0], + swizzle(srcreg(RC_FILE_CONSTANT, constant), + RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); + emit2(c, inst->Prev, RC_OPCODE_MIN, 0, + dstregtmpmask(temp, RC_MASK_Z), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); // tmp.w = Pow(tmp.y, tmp.w) - emit1(c, inst->Prev, OPCODE_LG2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit2(c, inst->Prev, OPCODE_MUL, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)); - emit1(c, inst->Prev, OPCODE_EX2, 0, - dstregtmpmask(temp, WRITEMASK_W), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)); + emit1(c, inst->Prev, RC_OPCODE_EX2, 0, + dstregtmpmask(temp, RC_MASK_W), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); // tmp.z = (tmp.x > 0) ? tmp.w : 0.0 - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_Z), - negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_Z), + negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), builtin_zero); // tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 - emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, - dstregtmpmask(temp, WRITEMASK_XYW), - swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE)); + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, + dstregtmpmask(temp, RC_MASK_XYW), + swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); rc_remove_instruction(inst); } @@ -323,12 +312,12 @@ static void transform_LRP(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, - dstreg(PROGRAM_TEMPORARY, tempreg), - inst->I.SrcReg[1], negate(inst->I.SrcReg[2])); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, - inst->I.DstReg, - inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + dstreg(RC_FILE_TEMPORARY, tempreg), + inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, + inst->U.I.DstReg, + inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]); rc_remove_instruction(inst); } @@ -337,14 +326,14 @@ static void transform_POW(struct radeon_compiler* c, struct rc_instruction* inst) { int tempreg = rc_find_free_temporary(c); - struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); - struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); - tempdst.WriteMask = WRITEMASK_W; - tempsrc.Swizzle = SWIZZLE_WWWW; + struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg); + struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg); + tempdst.WriteMask = RC_MASK_W; + tempsrc.Swizzle = RC_SWIZZLE_WWWW; - emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0])); - emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1])); - emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc); + emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0])); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1])); + emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); rc_remove_instruction(inst); } @@ -352,7 +341,26 @@ static void transform_POW(struct radeon_compiler* c, static void transform_RSQ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]); + inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); +} + +static void transform_SEQ(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SFL(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); + rc_remove_instruction(inst); } static void transform_SGE(struct radeon_compiler* c, @@ -360,9 +368,33 @@ static void transform_SGE(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); + + rc_remove_instruction(inst); +} + +static void transform_SGT(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SLE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one); rc_remove_instruction(inst); } @@ -372,9 +404,21 @@ static void transform_SLT(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1])); - emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg, - srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero); + + rc_remove_instruction(inst); +} + +static void transform_SNE(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + int tempreg = rc_find_free_temporary(c); + + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); + emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, + negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero); rc_remove_instruction(inst); } @@ -382,14 +426,14 @@ static void transform_SLT(struct radeon_compiler* c, static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_ADD; - inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]); + inst->U.I.Opcode = RC_OPCODE_ADD; + inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); } static void transform_SWZ(struct radeon_compiler* c, struct rc_instruction* inst) { - inst->I.Opcode = OPCODE_MOV; + inst->U.I.Opcode = RC_OPCODE_MOV; } static void transform_XPD(struct radeon_compiler* c, @@ -397,13 +441,13 @@ static void transform_XPD(struct radeon_compiler* c, { int tempreg = rc_find_free_temporary(c); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), - swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg, - swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), - swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), - negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), + swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), + negate(srcreg(RC_FILE_TEMPORARY, tempreg))); rc_remove_instruction(inst); } @@ -414,7 +458,7 @@ static void transform_XPD(struct radeon_compiler* c, * no userData necessary. * * Eliminates the following ALU instructions: - * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD + * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * @@ -423,27 +467,32 @@ static void transform_XPD(struct radeon_compiler* c, * * @note should be applicable to R300 and R500 fragment programs. */ -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { - case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_DST: transform_DST(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(c, inst); return GL_TRUE; - case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_DST: transform_DST(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_POW: transform_POW(c, inst); return 1; + case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; + case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; + case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; + case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; + case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; + case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; + case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; + case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } @@ -452,37 +501,37 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c, struct rc_instruction* inst) { /* Note: r500 can take absolute values, but r300 cannot. */ - inst->I.Opcode = OPCODE_MAX; - inst->I.SrcReg[1] = inst->I.SrcReg[0]; - inst->I.SrcReg[1].Negate ^= NEGATE_XYZW; + inst->U.I.Opcode = RC_OPCODE_MAX; + inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; + inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. */ -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction* inst, void* unused) { - switch(inst->I.Opcode) { - case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE; - case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE; - case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE; + switch(inst->U.I.Opcode) { + case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; + case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; + case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; + case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; + case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; + case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; + case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; default: - return GL_FALSE; + return 0; } } -static void sincos_constants(struct radeon_compiler* c, GLuint *constants) +static void sincos_constants(struct radeon_compiler* c, unsigned int *constants) { - static const GLfloat SinCosConsts[2][4] = { + static const float SinCosConsts[2][4] = { { 1.273239545, // 4/PI -0.405284735, // -4/(PI*PI) @@ -512,25 +561,25 @@ static void sincos_constants(struct radeon_compiler* c, GLuint *constants) */ static void sin_approx( struct radeon_compiler* c, struct rc_instruction * before, - struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants) -{ - GLuint tempreg = rc_find_free_temporary(c); - - emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcreg(PROGRAM_CONSTANT, constants[0])); - emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), - negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); - emit3(c, before->Prev, OPCODE_MAD, 0, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), - swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); + struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) +{ + unsigned int tempreg = rc_find_free_temporary(c); + + emit2(c, before, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcreg(RC_FILE_CONSTANT, constants[0])); + emit3(c, before, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit3(c, before, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)), + negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X))); + emit3(c, before, RC_OPCODE_MAD, 0, dst, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), + swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); } /** @@ -538,81 +587,81 @@ static void sin_approx( * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */ -GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, +int radeonTransformTrigSimple(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; - GLuint constants[2]; - GLuint tempreg = rc_find_free_temporary(c); + unsigned int constants[2]; + unsigned int tempreg = rc_find_free_temporary(c); sincos_constants(c, constants); - if (inst->I.Opcode == OPCODE_COS) { + if (inst->U.I.Opcode == RC_OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(c, inst, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - sin_approx(c, inst, inst->I.DstReg, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + sin_approx(c, inst, inst->U.I.DstReg, + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), constants); } else { - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg)); - emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), - srcreg(PROGRAM_TEMPORARY, tempreg), - swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), - negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); - - struct prog_dst_register dst = inst->I.DstReg; - - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X; + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg)); + emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), + srcreg(RC_FILE_TEMPORARY, tempreg), + swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W), + negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z))); + + struct rc_dst_register dst = inst->U.I.DstReg; + + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; sin_approx(c, inst, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), constants); - dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y; + dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; sin_approx(c, inst, dst, - swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), + swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y), constants); } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } @@ -624,53 +673,53 @@ GLboolean radeonTransformTrigSimple(struct radeon_compiler* c, * * @warning This transformation implicitly changes the semantics of SIN and COS! */ -GLboolean radeonTransformTrigScale(struct radeon_compiler* c, +int radeonTransformTrigScale(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_COS && - inst->I.Opcode != OPCODE_SIN && - inst->I.Opcode != OPCODE_SCS) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_COS && + inst->U.I.Opcode != RC_OPCODE_SIN && + inst->U.I.Opcode != RC_OPCODE_SCS) + return 0; - static const GLfloat RCP_2PI = 0.15915494309189535; - GLuint temp; - GLuint constant; - GLuint constant_swizzle; + static const float RCP_2PI = 0.15915494309189535; + unsigned int temp; + unsigned int constant; + unsigned int constant_swizzle; temp = rc_find_free_temporary(c); constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); - emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), - swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), - srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); - emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), - srcreg(PROGRAM_TEMPORARY, temp)); - - if (inst->I.Opcode == OPCODE_COS) { - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SIN) { - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, - inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); - } else if (inst->I.Opcode == OPCODE_SCS) { - struct prog_dst_register moddst = inst->I.DstReg; - - if (inst->I.DstReg.WriteMask & WRITEMASK_X) { - moddst.WriteMask = WRITEMASK_X; - emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), + swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X), + srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); + emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), + srcreg(RC_FILE_TEMPORARY, temp)); + + if (inst->U.I.Opcode == RC_OPCODE_COS) { + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, + inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); + } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { + struct rc_dst_register moddst = inst->U.I.DstReg; + + if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { + moddst.WriteMask = RC_MASK_X; + emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } - if (inst->I.DstReg.WriteMask & WRITEMASK_Y) { - moddst.WriteMask = WRITEMASK_Y; - emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst, - srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); + if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { + moddst.WriteMask = RC_MASK_Y; + emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, + srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW)); } } rc_remove_instruction(inst); - return GL_TRUE; + return 1; } /** @@ -681,15 +730,15 @@ GLboolean radeonTransformTrigScale(struct radeon_compiler* c, * @warning This explicitly changes the form of DDX and DDY! */ -GLboolean radeonTransformDeriv(struct radeon_compiler* c, +int radeonTransformDeriv(struct radeon_compiler* c, struct rc_instruction* inst, void* unused) { - if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY) - return GL_FALSE; + if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) + return 0; - inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); - inst->I.SrcReg[1].Negate = NEGATE_XYZW; + inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE); + inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; - return GL_TRUE; + return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h index 147efec6fc..7cb5f84b7f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h @@ -30,27 +30,27 @@ #include "radeon_program.h" -GLboolean radeonTransformALU( +int radeonTransformALU( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean r300_transform_vertex_alu( +int r300_transform_vertex_alu( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigSimple( +int radeonTransformTrigSimple( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformTrigScale( +int radeonTransformTrigScale( struct radeon_compiler * c, struct rc_instruction * inst, void*); -GLboolean radeonTransformDeriv( +int radeonTransformDeriv( struct radeon_compiler * c, struct rc_instruction * inst, void*); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h new file mode 100644 index 0000000000..7c0d6720b1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT, + + /** + * Indicates a special register, see RC_SPECIAL_xxx. + */ + RC_FILE_SPECIAL +} rc_register_file; + +enum { + /** R500 fragment program ALU result "register" */ + RC_SPECIAL_ALU_RESULT = 0, + + /** Must be last */ + RC_NUM_SPECIAL_REGISTERS +}; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +typedef enum { + RC_ALURESULT_NONE = 0, + RC_ALURESULT_X, + RC_ALURESULT_W +} rc_write_aluresult; + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 4c26db5d24..ee839596aa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008 Nicolai Haehnle. + * Copyright (C) 2008-2009 Nicolai Haehnle. * * All Rights Reserved. * @@ -25,584 +25,29 @@ * */ -/** - * @file - * - * Perform temporary register allocation and attempt to pair off instructions - * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction - * vs. ALU instruction scheduling. - */ - #include "radeon_program_pair.h" -#include "memory_pool.h" -#include "radeon_compiler.h" -#include "shader/prog_print.h" - -#define error(fmt, args...) do { \ - rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ -} while(0) - -struct pair_state_instruction { - struct prog_instruction Instruction; - GLuint IP; /**< Position of this instruction in original program */ - - GLuint IsTex:1; /**< Is a texture instruction */ - GLuint NeedRGB:1; /**< Needs the RGB ALU */ - GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ - GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ - - /** - * Number of (read and write) dependencies that must be resolved before - * this instruction can be scheduled. - */ - GLuint NumDependencies:5; - - /** - * Next instruction in the linked list of ready instructions. - */ - struct pair_state_instruction *NextReady; - - /** - * Values that this instruction writes - */ - struct reg_value *Values[4]; -}; - - -/** - * Used to keep track of which instructions read a value. - */ -struct reg_value_reader { - struct pair_state_instruction *Reader; - struct reg_value_reader *Next; -}; - -/** - * Used to keep track which values are stored in each component of a - * PROGRAM_TEMPORARY. - */ -struct reg_value { - struct pair_state_instruction *Writer; - struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ - - /** - * Unordered linked list of instructions that read from this value. - */ - struct reg_value_reader *Readers; - - /** - * Number of readers of this value. This is calculated during @ref scan_instructions - * and continually decremented during code emission. - * When this count reaches zero, the instruction that writes the @ref Next value - * can be scheduled. - */ - GLuint NumReaders; -}; - -/** - * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register - * to the proper hardware temporary. - */ -struct pair_register_translation { - GLuint Allocated:1; - GLuint HwIndex:8; - GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ - - /** - * Notes the value that is currently contained in each component - * (only used for PROGRAM_TEMPORARY registers). - */ - struct reg_value *Value[4]; -}; - -struct pair_state { - struct r300_fragment_program_compiler * Compiler; - const struct radeon_pair_handler *Handler; - GLboolean Verbose; - void *UserData; - - /** - * Translate Mesa registers to hardware registers - */ - struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; - struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; - - struct { - GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ - } HwTemps[128]; - - /** - * Linked list of instructions that can be scheduled right now, - * based on which ALU/TEX resources they require. - */ - struct pair_state_instruction *ReadyFullALU; - struct pair_state_instruction *ReadyRGB; - struct pair_state_instruction *ReadyAlpha; - struct pair_state_instruction *ReadyTEX; -}; - - -static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index) -{ - switch(file) { - case PROGRAM_TEMPORARY: return &s->Temps[index]; - case PROGRAM_INPUT: return &s->Inputs[index]; - default: return 0; - } -} - -static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex) -{ - struct pair_register_translation *t = get_register(s, file, index); - ASSERT(!s->HwTemps[hwindex].RefCount); - ASSERT(!t->Allocated); - s->HwTemps[hwindex].RefCount = t->RefCount; - t->Allocated = 1; - t->HwIndex = hwindex; -} - -static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index) -{ - GLuint hwindex; - - struct pair_register_translation *t = get_register(s, file, index); - if (!t) { - error("get_hw_reg: %i[%i]\n", file, index); - return 0; - } - - if (t->Allocated) - return t->HwIndex; - - for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) - if (!s->HwTemps[hwindex].RefCount) - break; - - if (hwindex >= s->Handler->MaxHwTemps) { - error("Ran out of hardware temporaries"); - return 0; - } - - alloc_hw_reg(s, file, index, hwindex); - return hwindex; -} - - -static void deref_hw_reg(struct pair_state *s, GLuint hwindex) -{ - if (!s->HwTemps[hwindex].RefCount) { - error("Hwindex %i refcount error", hwindex); - return; - } - - s->HwTemps[hwindex].RefCount--; -} - -static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst) -{ - pairinst->NextReady = *list; - *list = pairinst; -} - -/** - * The given instruction has become ready. Link it into the ready - * instructions. - */ -static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - if (s->Verbose) - _mesa_printf("instruction_ready(%i)\n", pairinst->IP); - - if (pairinst->IsTex) - add_pairinst_to_list(&s->ReadyTEX, pairinst); - else if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s->ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s->ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s->ReadyFullALU, pairinst); -} - - -/** - * Finally rewrite ADD, MOV, MUL as the appropriate native instruction - * and reverse the order of arguments for CMP. - */ -static void final_rewrite(struct pair_state *s, struct prog_instruction *inst) -{ - struct prog_src_register tmp; - - switch(inst->Opcode) { - case OPCODE_ADD: - inst->SrcReg[2] = inst->SrcReg[1]; - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[1].Negate = NEGATE_NONE; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_CMP: - tmp = inst->SrcReg[2]; - inst->SrcReg[2] = inst->SrcReg[0]; - inst->SrcReg[0] = tmp; - break; - case OPCODE_MOV: - /* AMD say we should use CMP. - * However, when we transform - * KIL -r0; - * into - * CMP tmp, -r0, -r0, 0; - * KIL tmp; - * we get incorrect behaviour on R500 when r0 == 0.0. - * It appears that the R500 KIL hardware treats -0.0 as less - * than zero. - */ - inst->SrcReg[1].File = PROGRAM_BUILTIN; - inst->SrcReg[1].Swizzle = SWIZZLE_1111; - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - case OPCODE_MUL: - inst->SrcReg[2].File = PROGRAM_BUILTIN; - inst->SrcReg[2].Swizzle = SWIZZLE_0000; - inst->Opcode = OPCODE_MAD; - break; - default: - /* nothing to do */ - break; - } -} - - -/** - * Classify an instruction according to which ALUs etc. it needs - */ -static void classify_instruction(struct pair_state *s, - struct pair_state_instruction *psi) -{ - psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; - psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; - - switch(psi->Instruction.Opcode) { - case OPCODE_ADD: - case OPCODE_CMP: - case OPCODE_DDX: - case OPCODE_DDY: - case OPCODE_FRC: - case OPCODE_MAD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MOV: - case OPCODE_MUL: - break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - psi->IsTranscendent = 1; - psi->NeedAlpha = 1; - break; - case OPCODE_DP4: - psi->NeedAlpha = 1; - /* fall through */ - case OPCODE_DP3: - psi->NeedRGB = 1; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - case OPCODE_END: - psi->IsTex = 1; - break; - default: - error("Unknown opcode %d\n", psi->Instruction.Opcode); - break; - } -} - - -/** - * Count which (input, temporary) register is read and written how often, - * and scan the instruction stream to find dependencies. - */ -static void scan_instructions(struct pair_state *s) -{ - struct rc_instruction *source; - GLuint ip; - - for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0; - source != &s->Compiler->Base.Program.Instructions; - source = source->Next, ++ip) { - struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst)); - memset(pairinst, 0, sizeof(struct pair_state_instruction)); - - pairinst->Instruction = source->I; - pairinst->IP = ip; - final_rewrite(s, &pairinst->Instruction); - classify_instruction(s, pairinst); - - int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode); - int j; - for(j = 0; j < nsrc; j++) { - struct pair_register_translation *t = - get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index); - if (!t) - continue; - - t->RefCount++; - - if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) { - int i; - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i); - if (swz >= 4) - continue; /* constant or NIL swizzle */ - if (!t->Value[swz]) - continue; /* this is an undefined read */ - - /* Do not add a dependency if this instruction - * also rewrites the value. The code below adds - * a dependency for the DstReg, which is a superset - * of the SrcReg dependency. */ - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY && - pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index && - GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz)) - continue; - - struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r)); - pairinst->NumDependencies++; - t->Value[swz]->NumReaders++; - r->Reader = pairinst; - r->Next = t->Value[swz]->Readers; - t->Value[swz]->Readers = r; - } - } - } - - int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode); - if (ndst) { - struct pair_register_translation *t = - get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index); - if (t) { - t->RefCount++; - - if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) { - int j; - for(j = 0; j < 4; ++j) { - if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j)) - continue; - - struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v)); - memset(v, 0, sizeof(struct reg_value)); - v->Writer = pairinst; - if (t->Value[j]) { - pairinst->NumDependencies++; - t->Value[j]->Next = v; - } - t->Value[j] = v; - pairinst->Values[j] = v; - } - } - } - } - - if (s->Verbose) - _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); - - if (!pairinst->NumDependencies) - instruction_ready(s, pairinst); - } - - /* Clear the PROGRAM_TEMPORARY state */ - int i, j; - for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { - for(j = 0; j < 4; ++j) - s->Temps[i].Value[j] = 0; - } -} - - -static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - ASSERT(pairinst->NumDependencies > 0); - if (!--pairinst->NumDependencies) - instruction_ready(s, pairinst); -} /** - * Update the dependency tracking state based on what the instruction - * at the given IP does. + * Return the source slot where we installed the given register access, + * or -1 if no slot was free anymore. */ -static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - if (s->Verbose) - _mesa_printf("commit_instruction(%i)\n", pairinst->IP); - - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; - deref_hw_reg(s, t->HwIndex); - - int i; - for(i = 0; i < 4; ++i) { - if (!GET_BIT(inst->DstReg.WriteMask, i)) - continue; - - t->Value[i] = pairinst->Values[i]; - if (t->Value[i]->NumReaders) { - struct reg_value_reader *r; - for(r = pairinst->Values[i]->Readers; r; r = r->Next) - decrement_dependencies(s, r->Reader); - } else if (t->Value[i]->Next) { - /* This happens when the only reader writes - * the register at the same time */ - decrement_dependencies(s, t->Value[i]->Next->Writer); - } - } - } - - int nsrc = _mesa_num_inst_src_regs(inst->Opcode); - int i; - for(i = 0; i < nsrc; i++) { - struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); - if (!t) - continue; - - deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); - - if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) - continue; - - int j; - for(j = 0; j < 4; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz >= 4) - continue; - if (!t->Value[swz]) - continue; - - /* Do not free a dependency if this instruction - * also rewrites the value. See scan_instructions. */ - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == inst->SrcReg[i].Index && - GET_BIT(inst->DstReg.WriteMask, swz)) - continue; - - if (!--t->Value[swz]->NumReaders) { - if (t->Value[swz]->Next) - decrement_dependencies(s, t->Value[swz]->Next->Writer); - } - } - } -} - - -/** - * Emit all ready texture instructions in a single block. - * - * Emit as a single block to (hopefully) sample many textures in parallel, - * and to avoid hardware indirections on R300. - * - * In R500, we don't really know when the result of a texture instruction - * arrives. So allocate all destinations first, to make sure they do not - * arrive early and overwrite a texture coordinate we're going to use later - * in the block. - */ -static void emit_all_tex(struct pair_state *s) -{ - struct pair_state_instruction *readytex; - struct pair_state_instruction *pairinst; - - ASSERT(s->ReadyTEX); - - // Don't let the ready list change under us! - readytex = s->ReadyTEX; - s->ReadyTEX = 0; - - // Allocate destination hardware registers in one block to avoid conflicts. - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; - if (inst->Opcode != OPCODE_KIL) - get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - } - - if (s->Compiler->Base.Debug) - _mesa_printf(" BEGIN_TEX\n"); - - if (s->Handler->BeginTexBlock) - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData); - - for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { - struct prog_instruction *inst = &pairinst->Instruction; - commit_instruction(s, pairinst); - - if (inst->Opcode != OPCODE_KIL) - inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); - - if (s->Compiler->Base.Debug) { - _mesa_printf(" "); - _mesa_print_instruction(inst); - fflush(stderr); - } - - struct radeon_pair_texture_instruction rpti; - - switch(inst->Opcode) { - case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break; - case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break; - case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break; - default: - case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break; - } - - rpti.DestIndex = inst->DstReg.Index; - rpti.WriteMask = inst->DstReg.WriteMask; - rpti.TexSrcUnit = inst->TexSrcUnit; - rpti.TexSrcTarget = inst->TexSrcTarget; - rpti.SrcIndex = inst->SrcReg[0].Index; - rpti.SrcSwizzle = inst->SrcReg[0].Swizzle; - - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti); - } - - if (s->Compiler->Base.Debug) - _mesa_printf(" END_TEX\n"); -} - - -static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, - struct prog_src_register src, GLboolean rgb, GLboolean alpha) +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index) { int candidate = -1; int candidate_quality = -1; int i; - if (!rgb && !alpha) + if ((!rgb && !alpha) || file == RC_FILE_NONE) return 0; - GLuint constant; - GLuint index; - - if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { - constant = 0; - index = get_hw_reg(s, src.File, src.Index); - } else { - constant = 1; - index = src.Index; - } - for(i = 0; i < 3; ++i) { int q = 0; if (rgb) { if (pair->RGB.Src[i].Used) { - if (pair->RGB.Src[i].Constant != constant || + if (pair->RGB.Src[i].File != file || pair->RGB.Src[i].Index != index) continue; q++; @@ -610,7 +55,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio } if (alpha) { if (pair->Alpha.Src[i].Used) { - if (pair->Alpha.Src[i].Constant != constant || + if (pair->Alpha.Src[i].File != file || pair->Alpha.Src[i].Index != index) continue; q++; @@ -625,334 +70,15 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio if (candidate >= 0) { if (rgb) { pair->RGB.Src[candidate].Used = 1; - pair->RGB.Src[candidate].Constant = constant; + pair->RGB.Src[candidate].File = file; pair->RGB.Src[candidate].Index = index; } if (alpha) { pair->Alpha.Src[candidate].Used = 1; - pair->Alpha.Src[candidate].Constant = constant; + pair->Alpha.Src[candidate].File = file; pair->Alpha.Src[candidate].Index = index; } } return candidate; } - -/** - * Fill the given ALU instruction's opcodes and source operands into the given pair, - * if possible. - */ -static GLboolean fill_instruction_into_pair( - struct pair_state *s, - struct radeon_pair_instruction *pair, - struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); - ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); - - if (pairinst->NeedRGB) { - if (pairinst->IsTranscendent) - pair->RGB.Opcode = OPCODE_REPL_ALPHA; - else - pair->RGB.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->RGB.Saturate = 1; - } - if (pairinst->NeedAlpha) { - pair->Alpha.Opcode = inst->Opcode; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - pair->Alpha.Saturate = 1; - } - - int nargs = _mesa_num_inst_src_regs(inst->Opcode); - int i; - - /* Special case for DDX/DDY (MDH/MDV). */ - if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { - if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) - return GL_FALSE; - else - nargs++; - } - - for(i = 0; i < nargs; ++i) { - int source; - if (pairinst->NeedRGB && !pairinst->IsTranscendent) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - int j; - for(j = 0; j < 3; ++j) { - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - } - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; - pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z)); - } - if (pairinst->NeedAlpha) { - GLboolean srcrgb = GL_FALSE; - GLboolean srcalpha = GL_FALSE; - GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); - if (swz < 3) - srcrgb = GL_TRUE; - else if (swz < 4) - srcalpha = GL_TRUE; - source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); - if (source < 0) - return GL_FALSE; - pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; - pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W); - } - } - - return GL_TRUE; -} - - -/** - * Fill in the destination register information. - * - * This is split from filling in source registers because we want - * to avoid allocating hardware temporaries for destinations until - * we are absolutely certain that we're going to emit a certain - * instruction pairing. - */ -static void fill_dest_into_pair( - struct pair_state *s, - struct radeon_pair_instruction *pair, - struct pair_state_instruction *pairinst) -{ - struct prog_instruction *inst = &pairinst->Instruction; - - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if (inst->DstReg.Index == s->Compiler->OutputColor) { - pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } else if (inst->DstReg.Index == s->Compiler->OutputDepth) { - pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } else { - GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - if (pairinst->NeedRGB) { - pair->RGB.DestIndex = hwindex; - pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; - } - if (pairinst->NeedAlpha) { - pair->Alpha.DestIndex = hwindex; - pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); - } - } -} - - -/** - * Find a good ALU instruction or pair of ALU instruction and emit it. - * - * Prefer emitting full ALU instructions, so that when we reach a point - * where no full ALU instruction can be emitted, we have more candidates - * for RGB/Alpha pairing. - */ -static void emit_alu(struct pair_state *s) -{ - struct radeon_pair_instruction pair; - struct pair_state_instruction *psi; - - if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { - if (s->ReadyFullALU) { - psi = s->ReadyFullALU; - s->ReadyFullALU = s->ReadyFullALU->NextReady; - } else if (s->ReadyRGB) { - psi = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - } else { - psi = s->ReadyAlpha; - s->ReadyAlpha = s->ReadyAlpha->NextReady; - } - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psi); - fill_dest_into_pair(s, &pair, psi); - commit_instruction(s, psi); - } else { - struct pair_state_instruction **prgb; - struct pair_state_instruction **palpha; - - /* Some pairings might fail because they require too - * many source slots; try all possible pairings if necessary */ - for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { - for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { - struct pair_state_instruction * psirgb = *prgb; - struct pair_state_instruction * psialpha = *palpha; - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psirgb); - if (!fill_instruction_into_pair(s, &pair, psialpha)) - continue; - *prgb = (*prgb)->NextReady; - *palpha = (*palpha)->NextReady; - fill_dest_into_pair(s, &pair, psirgb); - fill_dest_into_pair(s, &pair, psialpha); - commit_instruction(s, psirgb); - commit_instruction(s, psialpha); - goto success; - } - } - - /* No success in pairing; just take the first RGB instruction */ - psi = s->ReadyRGB; - s->ReadyRGB = s->ReadyRGB->NextReady; - - _mesa_bzero(&pair, sizeof(pair)); - fill_instruction_into_pair(s, &pair, psi); - fill_dest_into_pair(s, &pair, psi); - commit_instruction(s, psi); - success: ; - } - - if (s->Compiler->Base.Debug) - radeonPrintPairInstruction(&pair); - - s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair); -} - -/* Callback function for assigning input registers to hardware registers */ -static void alloc_helper(void * data, unsigned input, unsigned hwreg) -{ - struct pair_state * s = data; - alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg); -} - -void radeonPairProgram( - struct r300_fragment_program_compiler * compiler, - const struct radeon_pair_handler* handler, void *userdata) -{ - struct pair_state s; - - _mesa_bzero(&s, sizeof(s)); - s.Compiler = compiler; - s.Handler = handler; - s.UserData = userdata; - s.Verbose = GL_FALSE && s.Compiler->Base.Debug; - - if (s.Compiler->Base.Debug) - _mesa_printf("Emit paired program\n"); - - scan_instructions(&s); - s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s); - - while(!s.Compiler->Base.Error && - (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { - if (s.ReadyTEX) - emit_all_tex(&s); - - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) - emit_alu(&s); - } - - if (s.Compiler->Base.Debug) - _mesa_printf(" END\n"); -} - - -static void print_pair_src(int i, struct radeon_pair_instruction_source* src) -{ - _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index); -} - -static const char* opcode_string(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return "SOP"; - else - return _mesa_opcode_string(opcode); -} - -static int num_pairinst_args(GLuint opcode) -{ - if (opcode == OPCODE_REPL_ALPHA) - return 0; - else - return _mesa_num_inst_src_regs(opcode); -} - -static char swizzle_char(GLuint swz) -{ - switch(swz) { - case SWIZZLE_X: return 'x'; - case SWIZZLE_Y: return 'y'; - case SWIZZLE_Z: return 'z'; - case SWIZZLE_W: return 'w'; - case SWIZZLE_ZERO: return '0'; - case SWIZZLE_ONE: return '1'; - case SWIZZLE_NIL: return '_'; - default: return '?'; - } -} - -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst) -{ - int nargs; - int i; - - _mesa_printf(" RGB: "); - for(i = 0; i < 3; ++i) { - if (inst->RGB.Src[i].Used) - print_pair_src(i, inst->RGB.Src + i); - } - _mesa_printf("\n"); - _mesa_printf(" Alpha:"); - for(i = 0; i < 3; ++i) { - if (inst->Alpha.Src[i].Used) - print_pair_src(i, inst->Alpha.Src + i); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); - if (inst->RGB.WriteMask) - _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, - (inst->RGB.WriteMask & 1) ? "x" : "", - (inst->RGB.WriteMask & 2) ? "y" : "", - (inst->RGB.WriteMask & 4) ? "z" : ""); - if (inst->RGB.OutputWriteMask) - _mesa_printf(" COLOR.%s%s%s", - (inst->RGB.OutputWriteMask & 1) ? "x" : "", - (inst->RGB.OutputWriteMask & 2) ? "y" : "", - (inst->RGB.OutputWriteMask & 4) ? "z" : ""); - nargs = num_pairinst_args(inst->RGB.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; - const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), - swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), - abs); - } - _mesa_printf("\n"); - - _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); - if (inst->Alpha.WriteMask) - _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); - if (inst->Alpha.OutputWriteMask) - _mesa_printf(" COLOR.w"); - if (inst->Alpha.DepthWriteMask) - _mesa_printf(" DEPTH.w"); - nargs = num_pairinst_args(inst->Alpha.Opcode); - for(i = 0; i < nargs; ++i) { - const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; - const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; - _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, - swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); - } - _mesa_printf("\n"); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ff76178551..1600598428 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -28,116 +28,97 @@ #ifndef __RADEON_PROGRAM_PAIR_H_ #define __RADEON_PROGRAM_PAIR_H_ -#include "radeon_program.h" +#include "radeon_code.h" +#include "radeon_opcodes.h" +#include "radeon_program_constants.h" struct r300_fragment_program_compiler; /** - * Represents a paired instruction, as found in R300 and R500 + * \file + * Represents a paired ALU instruction, as found in R300 and R500 * fragment programs. + * + * Note that this representation is taking some liberties as far + * as register files are concerned, to allow separate register + * allocation. + * + * Also note that there are some subtleties in that the semantics + * of certain opcodes are implicitly changed in this representation; + * see \ref rc_pair_translate */ + + struct radeon_pair_instruction_source { - GLuint Index:8; - GLuint Constant:1; - GLuint Used:1; + unsigned int Used:1; + rc_register_file File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; }; struct radeon_pair_instruction_rgb { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:3; - GLuint OutputWriteMask:3; - GLuint Saturate:1; + rc_opcode Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:3; + unsigned int OutputWriteMask:3; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:9; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:9; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; struct radeon_pair_instruction_alpha { - GLuint Opcode:8; - GLuint DestIndex:8; - GLuint WriteMask:1; - GLuint OutputWriteMask:1; - GLuint DepthWriteMask:1; - GLuint Saturate:1; + rc_opcode Opcode:8; + unsigned int DestIndex:RC_REGISTER_INDEX_BITS; + unsigned int WriteMask:1; + unsigned int OutputWriteMask:1; + unsigned int DepthWriteMask:1; + unsigned int Saturate:1; struct radeon_pair_instruction_source Src[3]; struct { - GLuint Source:2; - GLuint Swizzle:3; - GLuint Abs:1; - GLuint Negate:1; + unsigned int Source:2; + unsigned int Swizzle:3; + unsigned int Abs:1; + unsigned int Negate:1; } Arg[3]; }; -struct radeon_pair_instruction { +struct rc_pair_instruction { struct radeon_pair_instruction_rgb RGB; struct radeon_pair_instruction_alpha Alpha; -}; - -enum { - RADEON_OPCODE_TEX = 0, - RADEON_OPCODE_TXB, - RADEON_OPCODE_TXP, - RADEON_OPCODE_KIL + rc_write_aluresult WriteALUResult:2; + rc_compare_func ALUResultCompare:3; }; -struct radeon_pair_texture_instruction { - GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */ - - GLuint DestIndex:8; - GLuint WriteMask:4; - GLuint TexSrcUnit:5; - GLuint TexSrcTarget:3; - - GLuint SrcIndex:8; - GLuint SrcSwizzle:12; -}; +/** + * General helper functions for dealing with the paired instruction format. + */ +/*@{*/ +int rc_pair_alloc_source(struct rc_pair_instruction *pair, + unsigned int rgb, unsigned int alpha, + rc_register_file file, unsigned int index); +/*@}*/ /** - * + * Compiler passes that operate with the paired format. */ -struct radeon_pair_handler { - /** - * Write a paired instruction to the hardware. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*); - - /** - * Write a texture instruction to the hardware. - * Register indices have already been rewritten to the allocated - * hardware register numbers. - * - * @return GL_FALSE on error. - */ - GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*); - - /** - * Called before a block of contiguous, independent texture - * instructions is emitted. - */ - GLboolean (*BeginTexBlock)(void*); - - unsigned MaxHwTemps; -}; - -void radeonPairProgram( - struct r300_fragment_program_compiler * compiler, - const struct radeon_pair_handler*, void *userdata); +/*@{*/ +struct radeon_pair_handler; -void radeonPrintPairInstruction(struct radeon_pair_instruction *inst); +void rc_pair_translate(struct r300_fragment_program_compiler *c); +void rc_pair_schedule(struct r300_fragment_program_compiler *c); +void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps); +/*@}*/ #endif /* __RADEON_PROGRAM_PAIR_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c new file mode 100644 index 0000000000..d863b82d53 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -0,0 +1,300 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs) +{ + if (func == RC_COMPARE_FUNC_NEVER) { + fprintf(f, "false"); + } else if (func == RC_COMPARE_FUNC_ALWAYS) { + fprintf(f, "true"); + } else { + const char * op; + switch(func) { + case RC_COMPARE_FUNC_LESS: op = "<"; break; + case RC_COMPARE_FUNC_EQUAL: op = "=="; break; + case RC_COMPARE_FUNC_LEQUAL: op = "<="; break; + case RC_COMPARE_FUNC_GREATER: op = ">"; break; + case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break; + case RC_COMPARE_FUNC_GEQUAL: op = ">="; break; + default: op = "???"; break; + } + fprintf(f, "%s %s %s", lhs, op, rhs); + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else if (file == RC_FILE_SPECIAL) { + switch(index) { + case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break; + default: fprintf(f, "special[%i]", index); break; + } + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static char rc_swizzle_char(unsigned int swz) +{ + switch(swz) { + case RC_SWIZZLE_X: return 'x'; + case RC_SWIZZLE_Y: return 'y'; + case RC_SWIZZLE_Z: return 'z'; + case RC_SWIZZLE_W: return 'w'; + case RC_SWIZZLE_ZERO: return '0'; + case RC_SWIZZLE_ONE: return '1'; + case RC_SWIZZLE_HALF: return 'H'; + case RC_SWIZZLE_UNUSED: return '_'; + } + return '?'; +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + fprintf(f, "%c", rc_swizzle_char(swz)); + } +} + +static void rc_print_src_register(FILE * f, struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned int reg; + + fprintf(f, "%s", opcode->Name); + + switch(inst->U.I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->U.I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst->U.I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->U.I.TexSrcTarget), + inst->U.I.TexShadow ? "SHADOW" : "", + inst->U.I.TexSrcUnit); + } + + fprintf(f, ";"); + + if (inst->U.I.WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, + (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w", + inst->U.I.ALUResultCompare, "0"); + fprintf(f, ")]"); + } + + fprintf(f, "\n"); +} + +static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst) +{ + struct rc_pair_instruction * inst = &fullinst->U.P; + int printedsrc = 0; + + for(unsigned int src = 0; src < 3; ++src) { + if (inst->RGB.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.xyz = ", src); + rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0); + printedsrc = 1; + } + if (inst->Alpha.Src[src].Used) { + if (printedsrc) + fprintf(f, ", "); + fprintf(f, "src%i.w = ", src); + rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0); + printedsrc = 1; + } + } + fprintf(f, "\n"); + + if (inst->RGB.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : ""); + if (inst->RGB.WriteMask) + fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex, + (inst->RGB.WriteMask & 1) ? "x" : "", + (inst->RGB.WriteMask & 2) ? "y" : "", + (inst->RGB.WriteMask & 4) ? "z" : ""); + if (inst->RGB.OutputWriteMask) + fprintf(f, " color.%s%s%s", + (inst->RGB.OutputWriteMask & 1) ? "x" : "", + (inst->RGB.OutputWriteMask & 2) ? "y" : "", + (inst->RGB.OutputWriteMask & 4) ? "z" : ""); + if (inst->WriteALUResult == RC_ALURESULT_X) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->RGB.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->RGB.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source, + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)), + rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)), + abs); + } + fprintf(f, "\n"); + } + + if (inst->Alpha.Opcode != RC_OPCODE_NOP) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode); + + fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : ""); + if (inst->Alpha.WriteMask) + fprintf(f, " temp[%i].w", inst->Alpha.DestIndex); + if (inst->Alpha.OutputWriteMask) + fprintf(f, " color.w"); + if (inst->Alpha.DepthWriteMask) + fprintf(f, " depth.w"); + if (inst->WriteALUResult == RC_ALURESULT_W) + fprintf(f, " aluresult"); + + for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) { + const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : ""; + const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : ""; + fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source, + rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); + } + fprintf(f, "\n"); + } + + if (inst->WriteALUResult) { + fprintf(f, " [aluresult = ("); + rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0"); + fprintf(f, ")]\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog) +{ + unsigned int linenum = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program\n"); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + if (inst->Type == RC_INSTRUCTION_PAIR) + rc_print_pair_instruction(stderr, inst); + else + rc_print_normal_instruction(stderr, inst); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h new file mode 100644 index 0000000000..c81d5f7a5e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ |