From af2ca5dc3823269636bfa8377ed971a761096b2e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 16:31:53 -0600 Subject: cell: initial support for IF/ELSE/ENDIF in fragment shader codegen Only one level of if/else/endif nesting is currently working. --- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 193 ++++++++++++++++++++++++++--- 1 file changed, 175 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index d7a8846ab3..8d8dfea039 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -71,6 +71,11 @@ struct codegen int num_itemps; int itemps[3]; + /** Current IF/ELSE/ENDIF nesting level */ + int if_nesting; + /** Index of execution mask register */ + int exec_mask_reg; + struct spe_function *f; boolean error; }; @@ -112,18 +117,43 @@ get_const_one_reg(struct codegen *gen) { if (gen->one_reg <= 0) { gen->one_reg = spe_allocate_available_register(gen->f); - } - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); #if DISASSEM - printf("il\tr%d, 1.0f\n", gen->one_reg); + printf("\til\tr%d, 1.0f\n", gen->one_reg); #endif + } return gen->one_reg; } +/** + * Return index of the pixel execution mask. + * The register is allocated an initialized upon the first call. + * + * The pixel execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ + if (gen->exec_mask_reg <= 0) { + gen->exec_mask_reg = spe_allocate_available_register(gen->f); + + /* exec_mask = {~0, ~0, ~0, ~0} */ + spe_load_int(gen->f, gen->exec_mask_reg, ~0); +#if DISASSEM + printf("INIT EXEC MASK:\n"); + printf("\tload\tr%d, 0x%x\n", gen->exec_mask_reg, ~0); +#endif + } + + return gen->exec_mask_reg; +} + + /** * Return the index of the SPU temporary containing the named TGSI * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we @@ -136,7 +166,7 @@ get_src_reg(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) { - int reg; + int reg = -1; int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); assert(swizzle >= 0); @@ -156,7 +186,7 @@ get_src_reg(struct codegen *gen, /* Load: reg = memory[(machine_reg) + offset] */ spe_lqd(gen->f, reg, gen->inputs_reg, offset); #if DISASSEM - printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); + printf("\tlqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); #endif } break; @@ -184,11 +214,14 @@ get_dst_reg(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dest) { - int reg; + int reg = -1; switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + if (gen->if_nesting > 0) + reg = get_itemp(gen); + else + reg = gen->temp_regs[dest->DstRegister.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -214,17 +247,56 @@ store_dest_reg(struct codegen *gen, { switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - /* no-op */ + if (gen->if_nesting > 0) { + int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int exec_reg = get_exec_mask_reg(gen); + /* Mix d with new value according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); +#if DISASSEM + printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", + d_reg, d_reg, value_reg, exec_reg); +#endif + } + else { + /* we're not inside a condition or loop: do nothing special */ + } break; case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ int offset = dest->DstRegister.Index * 4 + channel; - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); + if (gen->if_nesting > 0) { + int exec_reg = get_exec_mask_reg(gen); + int curval_reg = get_itemp(gen); + /* First read the current value from memory: + * Load: curval = memory[(machine_reg) + offset] + */ + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset); + /* Mix curval with newvalue according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); + /* Store: memory[(machine_reg) + offset] = curval */ + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); #if DISASSEM - printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); + printf("\tlqd\tr%d, r%d + %d\n", + curval_reg, gen->outputs_reg, offset); + printf("\tselb\tr%d, r%d, r%d, r%d # EXEC MASK'ed\n", + curval_reg, curval_reg, value_reg, exec_reg); + printf("\tstqd\tr%d, r%d + %d\n", + curval_reg, gen->outputs_reg, offset); #endif + } + else { + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); +#if DISASSEM + printf("\tstqd\tr%d, r%d + %d\n", + value_reg, gen->outputs_reg, offset); +#endif + } } break; default: @@ -237,6 +309,9 @@ static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("MOV:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -244,7 +319,7 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) /* XXX we don't always need to actually emit a mov instruction here */ spe_move(gen->f, dst_reg, src_reg); #if DISASSEM - printf("mov\tr%d, r%d\n", dst_reg, src_reg); + printf("\tmov\tr%d, r%d\n", dst_reg, src_reg); #endif store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -254,6 +329,7 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) } + /** * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD * becomes (up to) four SPU "fa" instructions because we're doing SOA @@ -263,6 +339,9 @@ static boolean emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("ADD:\n"); +#endif /* Loop over Red/Green/Blue/Alpha channels */ for (ch = 0; ch < 4; ch++) { /* If the dest R, G, B or A writemask is enabled... */ @@ -275,7 +354,7 @@ emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) /* Emit actual SPE instruction: d = s1 + s2 */ spe_fa(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ @@ -295,6 +374,9 @@ static boolean emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("MUL:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -303,7 +385,7 @@ emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = s1 * s2 */ spe_fm(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -324,6 +406,9 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; +#if DISASSEM + printf("SGT:\n"); +#endif for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); @@ -333,14 +418,14 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) /* d = (s1 > s2) */ spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); #if DISASSEM - printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + printf("\tfcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); #endif /* convert d from 0x0/0xffffffff to 0.0/1.0 */ /* d = d & one_reg */ spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); #if DISASSEM - printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); + printf("\tand\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); #endif store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); @@ -352,6 +437,71 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) } +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int channel = 0; + const int exec_reg = get_exec_mask_reg(gen); + + /* update execution mask with the predicate register */ + int tmp_reg = spe_allocate_available_register(gen->f); + int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + + /* tmp = (s1_reg == 0) */ + spe_ceqi(gen->f, tmp_reg, s1_reg, 0); + /* tmp = !tmp */ + spe_complement(gen->f, tmp_reg); + /* exec_mask = exec_mask & tmp */ + spe_and(gen->f, exec_reg, exec_reg, tmp_reg); +#if DISASSEM + printf("IF:\n"); + printf("\tseqi\tr%d, r%d, 0;\n", tmp_reg, s1_reg); + printf("\tcomp\tr%d\n", tmp_reg); + printf("\tand\tr%d, r%d, r%d\n", exec_reg, exec_reg, tmp_reg); +#endif + + gen->if_nesting++; + + spe_release_register(gen->f, tmp_reg); + + return true; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + /* exec_mask = !exec_mask */ + spe_complement(gen->f, exec_reg); +#if DISASSEM + printf("ELSE:\n"); + printf("\tcomp\tr%d;\n", exec_reg); +#endif + return true; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + /* XXX todo: pop execution mask */ + + spe_load_int(gen->f, exec_reg, ~0x0); +#if DISASSEM + printf("ENDIF:\n"); + printf("\tli\tr%d, ~0x0\n", exec_reg); +#endif + + gen->if_nesting--; + return true; +} + + + /** * Emit END instruction. * We just return from the shader function at this point. @@ -365,7 +515,7 @@ emit_END(struct codegen *gen) /* return from function call */ spe_bi(gen->f, SPE_REG_RA, 0, 0); #if DISASSEM - printf("bi\trRA\n"); + printf("\tbi\trRA\n"); #endif return true; } @@ -390,6 +540,13 @@ emit_instruction(struct codegen *gen, case TGSI_OPCODE_END: return emit_END(gen); + case TGSI_OPCODE_IF: + return emit_IF(gen, inst); + case TGSI_OPCODE_ELSE: + return emit_ELSE(gen, inst); + case TGSI_OPCODE_ENDIF: + return emit_ENDIF(gen, inst); + /* XXX lots more cases to do... */ default: -- cgit v1.2.3