diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c | 321 |
1 files changed, 249 insertions, 72 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index d694725c9b..b1b14394b6 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -37,10 +37,6 @@ * * \author Corbin Simpson <MostAwesomeDude@gmail.com> * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * */ #include "r500_fragprog.h" @@ -51,7 +47,6 @@ #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ struct r500_fragment_program_code *code = &c->code->code.r500 #define error(fmt, args...) do { \ @@ -60,63 +55,80 @@ } while(0) -static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode) +struct branch_info { + int If; + int Else; + int Endif; +}; + +struct emit_state { + struct radeon_compiler * C; + struct r500_fragment_program_code * Code; + + struct branch_info * Branches; + unsigned int CurrentBranchDepth; + unsigned int BranchesReserved; + + unsigned int MaxBranchDepth; +}; + +static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; - case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; - case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; - case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; - case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; - case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; + case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP; + case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3; + case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4; + case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC; default: error("translate_rgb_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; - case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; - case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; - case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; + case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN; + case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP; } } -static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode) +static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode) { switch(opcode) { - case OPCODE_CMP: return R500_ALPHA_OP_CMP; - case OPCODE_COS: return R500_ALPHA_OP_COS; - case OPCODE_DDX: return R500_ALPHA_OP_MDH; - case OPCODE_DDY: return R500_ALPHA_OP_MDV; - case OPCODE_DP3: return R500_ALPHA_OP_DP; - case OPCODE_DP4: return R500_ALPHA_OP_DP; - case OPCODE_EX2: return R500_ALPHA_OP_EX2; - case OPCODE_FRC: return R500_ALPHA_OP_FRC; - case OPCODE_LG2: return R500_ALPHA_OP_LN2; + case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP; + case RC_OPCODE_COS: return R500_ALPHA_OP_COS; + case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH; + case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV; + case RC_OPCODE_DP3: return R500_ALPHA_OP_DP; + case RC_OPCODE_DP4: return R500_ALPHA_OP_DP; + case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2; + case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC; + case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2; default: error("translate_alpha_op(%d): unknown opcode\n", opcode); /* fall through */ - case OPCODE_NOP: + case RC_OPCODE_NOP: /* fall through */ - case OPCODE_MAD: return R500_ALPHA_OP_MAD; - case OPCODE_MAX: return R500_ALPHA_OP_MAX; - case OPCODE_MIN: return R500_ALPHA_OP_MIN; - case OPCODE_RCP: return R500_ALPHA_OP_RCP; - case OPCODE_RSQ: return R500_ALPHA_OP_RSQ; - case OPCODE_SIN: return R500_ALPHA_OP_SIN; + case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD; + case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX; + case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN; + case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP; + case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ; + case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN; } } -static GLuint fix_hw_swizzle(GLuint swz) +static unsigned int fix_hw_swizzle(unsigned int swz) { if (swz == 5) swz = 6; - if (swz == SWIZZLE_NIL) swz = 4; + if (swz == RC_SWIZZLE_UNUSED) swz = 4; return swz; } -static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) +static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) { - GLuint t = inst->RGB.Arg[arg].Source; + unsigned int t = inst->RGB.Arg[arg].Source; int comp; t |= inst->RGB.Arg[arg].Negate << 11; t |= inst->RGB.Arg[arg].Abs << 12; @@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg) return t; } -static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i) +static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) { - GLuint t = inst->Alpha.Arg[i].Source; + unsigned int t = inst->Alpha.Arg[i].Source; t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; } -static void use_temporary(struct r500_fragment_program_code* code, GLuint index) +static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func) +{ + switch(func) { + case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ; + case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT; + case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE; + case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE; + default: + rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func); + return 0; + } +} + +static void use_temporary(struct r500_fragment_program_code* code, unsigned int index) { if (index > code->max_temp_idx) code->max_temp_idx = index; } -static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) +static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src) { - if (!src.Constant) + if (src.File == RC_FILE_CONSTANT) { + return src.Index | 0x100; + } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index | src.Constant << 8; + return src.Index; + } + + return 0; } /** * Emit a paired ALU instruction. */ -static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) +static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_alu: Too many instructions"); - return GL_FALSE; + return; } int ip = ++code->inst_end; @@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode); code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode); - if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) + if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) { code->inst[ip].inst0 = R500_INST_TYPE_OUT; - else + if (inst->WriteALUResult) { + error("%s: cannot write output and ALU result at the same time"); + return; + } + } else { code->inst[ip].inst0 = R500_INST_TYPE_ALU; + } code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT; code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14); code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18); if (inst->Alpha.DepthWriteMask) { code->inst[ip].inst4 |= R500_ALPHA_W_OMASK; - c->code->writes_depth = GL_TRUE; + c->code->writes_depth = 1; } code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex); @@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst) code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT; code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT; - return GL_TRUE; + if (inst->WriteALUResult) { + code->inst[ip].inst3 |= R500_ALU_RGB_WMASK; + + if (inst->WriteALUResult == RC_ALURESULT_X) + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED; + else + code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA; + + code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare); + } } -static GLuint translate_strq_swizzle(GLuint swizzle) +static unsigned int translate_strq_swizzle(unsigned int swizzle) { - GLuint swiz = 0; + unsigned int swiz = 0; int i; for (i = 0; i < 4; i++) swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2; @@ -221,67 +265,193 @@ static GLuint translate_strq_swizzle(GLuint swizzle) /** * Emit a single TEX instruction */ -static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst) +static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst) { PROG_CODE; if (code->inst_end >= 511) { error("emit_tex: Too many instructions"); - return GL_FALSE; + return 0; } int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_TEX - | (inst->WriteMask << 11) + | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) - code->inst[ip].inst1 |= R500_TEX_UNSCALED; + if (inst->TexSrcTarget == RC_TEXTURE_RECT) + code->inst[ip].inst1 |= R500_TEX_UNSCALED; switch (inst->Opcode) { - case RADEON_OPCODE_KIL: + case RC_OPCODE_KIL: code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL; break; - case RADEON_OPCODE_TEX: + case RC_OPCODE_TEX: code->inst[ip].inst1 |= R500_TEX_INST_LD; break; - case RADEON_OPCODE_TXB: + case RC_OPCODE_TXB: code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS; break; - case RADEON_OPCODE_TXP: + case RC_OPCODE_TXP: code->inst[ip].inst1 |= R500_TEX_INST_PROJ; break; default: error("emit_tex can't handle opcode %x\n", inst->Opcode); } - code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex) - | (translate_strq_swizzle(inst->SrcSwizzle) << 8) - | R500_TEX_DST_ADDR(inst->DestIndex) + use_temporary(code, inst->SrcReg[0].Index); + if (inst->Opcode != RC_OPCODE_KIL) + use_temporary(code, inst->DstReg.Index); + + code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) + | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) + | R500_TEX_DST_ADDR(inst->DstReg.Index) | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - return GL_TRUE; + return 1; } -static const struct radeon_pair_handler pair_handler = { - .EmitPaired = emit_paired, - .EmitTex = emit_tex, - .MaxHwTemps = 128 -}; +static void grow_branches(struct emit_state * s) +{ + unsigned int newreserved = s->BranchesReserved * 2; + struct branch_info * newbranches; + + if (!newreserved) + newreserved = 4; + + newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info)); + memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info)); + + s->Branches = newbranches; + s->BranchesReserved = newreserved; +} + +static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) +{ + if (s->Code->inst_end >= 511) { + rc_error(s->C, "emit_tex: Too many instructions"); + return; + } + + unsigned int newip = ++s->Code->inst_end; + + s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; + + if (inst->U.I.Opcode == RC_OPCODE_IF) { + if (s->CurrentBranchDepth >= 32) { + rc_error(s->C, "Branch depth exceeds hardware limit"); + return; + } + + if (s->CurrentBranchDepth >= s->BranchesReserved) + grow_branches(s); + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++]; + branch->If = newip; + branch->Else = -1; + branch->Endif = -1; + + if (s->CurrentBranchDepth > s->MaxBranchDepth) + s->MaxBranchDepth = s->CurrentBranchDepth; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Else = newip; + + /* actual instruction is filled in at ENDIF time */ + } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) { + if (!s->CurrentBranchDepth) { + rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__); + return; + } + + struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1]; + branch->Endif = newip; + + s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ + | R500_FC_B_OP0_INCR /* increment branch counter if stay */ + ; + + if (branch->Else >= 0) { + /* increment branch counter also if jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1); + + s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_B_ELSE /* all active pixels want to jump */ + | R500_FC_B_OP0_NONE /* no counter op if stay */ + | R500_FC_B_OP1_DECR /* decrement branch counter if jump */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } else { + /* don't touch branch counter on jump */ + s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE; + s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + } + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) + ; + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); + + s->CurrentBranchDepth--; + } else { + rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode); + } +} void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { + struct emit_state s; struct r500_fragment_program_code *code = &compiler->code->code.r500; - _mesa_bzero(code, sizeof(*code)); + memset(&s, 0, sizeof(s)); + s.C = &compiler->Base; + s.Code = code; + + memset(code, 0, sizeof(*code)); code->max_temp_idx = 1; code->inst_end = -1; - radeonPairProgram(compiler, &pair_handler, compiler); + for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; + inst = inst->Next) { + if (inst->Type == RC_INSTRUCTION_NORMAL) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + + if (opcode->IsFlowControl) { + emit_flowcontrol(&s, inst); + } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) { + continue; + } else { + emit_tex(compiler, &inst->U.I); + } + } else { + emit_paired(compiler, &inst->U.P); + } + } + + if (code->max_temp_idx >= 128) + rc_error(&compiler->Base, "Too many hardware temporaries used"); + if (compiler->Base.Error) return; @@ -296,4 +466,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi int ip = ++code->inst_end; code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } + + if (s.MaxBranchDepth >= 4) { + if (code->max_temp_idx < 1) + code->max_temp_idx = 1; + + code->us_fc_ctrl |= R500_FC_FULL_FC_EN; + } } |