diff options
-rw-r--r-- | src/gallium/drivers/cell/common.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_context.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 272 | ||||
-rw-r--r-- | src/gallium/drivers/cell/ppu/cell_state_emit.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/cell/spu/spu_main.c | 115 |
5 files changed, 337 insertions, 68 deletions
diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 8f08854117..f0ff96eb47 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -104,12 +104,11 @@ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 - -#define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_ASM (1 << 1) -#define CELL_DEBUG_SYNC (1 << 2) - - +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) /** Max instructions for doing per-fragment operations */ #define SPU_MAX_FRAGMENT_OPS_INSTS 64 diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index b418271dca..62e213ea35 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -89,6 +89,8 @@ static const struct debug_named_value cell_debug_flags[] = { {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ {NULL, 0} }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06219d4e98..2c8c9e0d2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,7 +229,36 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, amask_reg); } +/* This is a convenient and oft-used sequence. It chooses + * the smaller of each element of reg1 and reg2, and combines them + * into the result register, as follows: + * + * The Float Compare Greater Than (fcgt) instruction will put + * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. + * + * Then the Select Bits (selb) instruction will take bits from + * reg1 where compare_reg is 0, and from reg2 where compare_reg is + * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, + * and the bits from reg2 where reg1 > reg2, which is exactly the + * MIN operation. + */ +#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg1, reg2, compare_reg); \ + spe_release_register(f, compare_reg); \ +} +/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN + * sequence above, except that the registers specified when selecting + * bits are reversed. + */ +#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ + int compare_reg = spe_allocate_available_register(f); \ + spe_fcgt(f, compare_reg, reg1, reg2); \ + spe_selb(f, result_reg, reg2, reg1, compare_reg); \ + spe_release_register(f, compare_reg); \ +} /** * Generate SPE code to implement the given blend mode for a quad of pixels. @@ -242,6 +271,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, */ static void gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, @@ -262,10 +292,53 @@ gen_blend(const struct pipe_blend_state *blend, int fbB_reg = spe_allocate_available_register(f); int fbA_reg = spe_allocate_available_register(f); - int one_reg = spe_allocate_available_register(f); int tmp_reg = spe_allocate_available_register(f); - boolean one_reg_set = false; /* avoid setting one_reg more than once */ + /* These values might or might not eventually get put into + * registers. We avoid allocating them and setting them until + * they're actually needed; then we avoid setting them more than + * once, and release them at the end of code generation. + */ + boolean one_reg_set = false; + int one_reg; +#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ + one_reg = spe_allocate_available_register(f); \ + spe_load_float(f, one_reg, 1.0f); \ + one_reg_set = true; \ +} +#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ + spe_release_register(f, one_reg); \ +} + + boolean const_color_set = false; + int constR_reg, constG_reg, constB_reg; +#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ + constR_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + constG_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constR_reg, blend_color->color[0]); \ + spe_load_float(f, constG_reg, blend_color->color[1]); \ + spe_load_float(f, constB_reg, blend_color->color[2]); \ + const_color_set = true;\ +} +#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ + spe_release_register(f, constR_reg); \ + spe_release_register(f, constG_reg); \ + spe_release_register(f, constB_reg); \ +} + + boolean const_alpha_set = false; + int constA_reg; +#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ + constA_reg = spe_allocate_available_register(f); \ + spe_load_float(f, constA_reg, blend_color->color[3]); \ + const_alpha_set = true; \ +} +#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ + spe_release_register(f, constA_reg); \ +} + + /* Real code starts here */ ASSERT(blend->blend_enable); @@ -348,30 +421,161 @@ gen_blend(const struct pipe_blend_state *blend, /* - * Compute Src RGB terms + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color). */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ spe_move(f, term1R_reg, fragR_reg); spe_move(f, term1G_reg, fragG_reg); spe_move(f, term1B_reg, fragB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term1R_reg); - spe_zero(f, term1G_reg); - spe_zero(f, term1B_reg); + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ spe_fm(f, term1R_reg, fragR_reg, fragR_reg); spe_fm(f, term1G_reg, fragG_reg, fragG_reg); spe_fm(f, term1B_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ spe_fm(f, term1R_reg, fragR_reg, fragA_reg); spe_fm(f, term1G_reg, fragG_reg, fragA_reg); spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - R */ + spe_fs(f, tmp_reg, one_reg, fragR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fragG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fragB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - Rfb */ + spe_fs(f, tmp_reg, one_reg, fbR_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B */ + spe_fs(f, tmp_reg, one_reg, fbG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, fbB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = R * tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + /* repeat for G and B with the same (1-A) factor */ + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ + /* we'll need the optional constant {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* tmp = 1 - A */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* term = R * tmp, G*tmp, and B*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We'll need the optional blend color registers */ + SET_CONST_COLOR_IF_UNSET(f,blend_color) + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need both the optional {1,1,1,1} register, and the optional + * constant color registers + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_COLOR_IF_UNSET(f, blend_color) + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ + spe_fs(f, tmp_reg, one_reg, constR_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constG_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fs(f, tmp_reg, one_reg, constB_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional {1,1,1,1} register and the optional + * constant alpha register + */ + SET_ONE_REG_IF_UNSET(f) + SET_CONST_ALPHA_IF_UNSET(f, blend_color) + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ + spe_fs(f, tmp_reg, one_reg, constA_reg); + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + SET_ONE_REG_IF_UNSET(f) + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* non-OpenGL cases? */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } @@ -421,6 +625,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -432,6 +637,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2B_reg, fbB_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -452,6 +665,7 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* one = {1.0, 1.0, 1.0, 1.0} */ if (!one_reg_set) { + one_reg = spe_allocate_available_register(f); spe_load_float(f, one_reg, 1.0f); one_reg_set = true; } @@ -461,6 +675,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term2A_reg, fbA_reg, tmp_reg); break; /* XXX more cases */ + // GL_ONE_MINUS_SRC_COLOR + // GL_DST_COLOR + // GL_ONE_MINUS_DST_COLOR + // GL_DST_ALPHA + // GL_CONSTANT_COLOR + // GL_ONE_MINUS_CONSTANT_COLOR + // GL_CONSTANT_ALPHA + // GL_ONE_MINUS_CONSTANT_ALPHA default: ASSERT(0); } @@ -479,7 +701,21 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragG_reg, term1G_reg, term2G_reg); spe_fs(f, fragB_reg, term1B_reg, term2B_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) + FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) + FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) + break; default: ASSERT(0); } @@ -494,7 +730,15 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLEND_SUBTRACT: spe_fs(f, fragA_reg, term1A_reg, term2A_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) + break; + case PIPE_BLEND_MAX: + FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) + break; default: ASSERT(0); } @@ -514,8 +758,12 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, fbB_reg); spe_release_register(f, fbA_reg); - spe_release_register(f, one_reg); spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + RELEASE_ONE_REG_IF_USED(f) + RELEASE_CONST_COLOR_IF_USED(f) + RELEASE_CONST_ALPHA_IF_USED(f) } @@ -629,6 +877,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const struct pipe_depth_stencil_alpha_state *dsa = &cell->depth_stencil->base; const struct pipe_blend_state *blend = &cell->blend->base; + const struct pipe_blend_color *blend_color = &cell->blend_color; const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -651,7 +900,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); spe_allocate_register(f, x_reg); spe_allocate_register(f, y_reg); spe_allocate_register(f, color_tile_reg); @@ -816,7 +1064,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) if (blend->blend_enable) { - gen_blend(blend, f, color_format, + gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 2da3097983..8a389cd6aa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -100,14 +100,19 @@ cell_emit_state(struct cell_context *cell) = cell_batch_alloc(cell, sizeof(*fops)); struct spe_function spe_code; + /* Prepare the buffer that will hold the generated code. */ + spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* generate new code */ cell_gen_fragment_function(cell, &spe_code); + /* put the new code into the batch buffer */ fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; memcpy(&fops->code, spe_code.store, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); fops->dsa = cell->depth_stencil->base; fops->blend = cell->blend->base; + /* free codegen buffer */ spe_release_func(&spe_code); } diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 78260c4259..da2cb08972 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -50,7 +50,31 @@ helpful headers: /opt/cell/sdk/usr/include/libmisc.h */ +/* Set to 0 to disable all extraneous debugging code */ +#define DEBUG 1 + +#if DEBUG boolean Debug = FALSE; +boolean force_fragment_ops_fallback = TRUE; + +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define DEBUG_PRINTF(format,...) \ + if (Debug) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) + +#else + +#define DEBUG_PRINTF(...) +#define D_PRINTF(...) + +#endif struct spu_global spu; @@ -133,9 +157,7 @@ really_clear_tiles(uint surfaceIndex) static void cmd_clear_surface(const struct cell_command_clear_surface *clear) { - if (Debug) - printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, - clear->surface, clear->value); + DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; @@ -203,17 +225,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #endif /* CLEAR_OPT */ - if (Debug) - printf("SPU %u: CLEAR SURF done\n", spu.init.id); + DEBUG_PRINTF("CLEAR SURF done\n"); } static void cmd_release_verts(const struct cell_command_release_verts *release) { - if (Debug) - printf("SPU %u: RELEASE VERTS %u\n", - spu.init.id, release->vertex_buf); + DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf); ASSERT(release->vertex_buf != ~0U); release_buffer(release->vertex_buf); } @@ -228,16 +247,30 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); + DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - /* Point function pointer at new code */ - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if (spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + /* otherwise, the default fallback code remains in place */ spu.read_depth = spu.depth_stencil_alpha.depth.enabled; spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; @@ -247,8 +280,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) { - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); + DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n"); /* Copy SPU code from batch buffer to spu buffer */ memcpy(spu.fragment_program_code, fp->code, SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); @@ -262,9 +294,7 @@ cmd_state_fragment_program(const struct cell_command_fragment_program *fp) static void cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) { - if (Debug) - printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - spu.init.id, + DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", cmd->width, cmd->height, cmd->color_start, @@ -309,9 +339,7 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) static void cmd_state_sampler(const struct cell_command_sampler *sampler) { - if (Debug) - printf("SPU %u: SAMPLER [%u]\n", - spu.init.id, sampler->unit); + DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit); spu.sampler[sampler->unit] = sampler->state; if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) @@ -328,11 +356,9 @@ cmd_state_texture(const struct cell_command_texture *texture) const uint width = texture->width; const uint height = texture->height; - if (Debug) { - printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, + DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n", texture->unit, texture->start, texture->width, texture->height); - } spu.texture[unit].start = texture->start; spu.texture[unit].width = width; @@ -351,10 +377,7 @@ cmd_state_texture(const struct cell_command_texture *texture) static void cmd_state_vertex_info(const struct vertex_info *vinfo) { - if (Debug) { - printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, - vinfo->num_attribs); - } + DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); ASSERT(vinfo->num_attribs >= 1); ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); @@ -393,8 +416,7 @@ cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) static void cmd_finish(void) { - if (Debug) - printf("SPU %u: FINISH\n", spu.init.id); + DEBUG_PRINTF("FINISH\n"); really_clear_tiles(0); /* wait for all outstanding DMAs to finish */ mfc_write_tag_mask(~0); @@ -419,9 +441,8 @@ cmd_batch(uint opcode) const unsigned usize = size / sizeof(buffer[0]); uint pos; - if (Debug) - printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.buffers[buf]); + DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); @@ -440,8 +461,7 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - if (Debug) - printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + DEBUG_PRINTF("release batch buf %u\n", buf); release_buffer(buf); /* @@ -571,8 +591,7 @@ cmd_batch(uint opcode) } } - if (Debug) - printf("SPU %u: BATCH complete\n", spu.init.id); + DEBUG_PRINTF("BATCH complete\n"); } @@ -585,8 +604,7 @@ main_loop(void) struct cell_command cmd; int exitFlag = 0; - if (Debug) - printf("SPU %u: Enter main loop\n", spu.init.id); + DEBUG_PRINTF("Enter main loop\n"); ASSERT((sizeof(struct cell_command) & 0xf) == 0); ASSERT_ALIGN16(&cmd); @@ -595,14 +613,12 @@ main_loop(void) unsigned opcode; int tag = 0; - if (Debug) - printf("SPU %u: Wait for cmd...\n", spu.init.id); + DEBUG_PRINTF("Wait for cmd...\n"); /* read/wait from mailbox */ opcode = (unsigned int) spu_read_in_mbox(); - if (Debug) - printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); + DEBUG_PRINTF("got cmd 0x%x\n", opcode); /* command payload */ mfc_get(&cmd, /* dest */ @@ -619,8 +635,7 @@ main_loop(void) switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: - if (Debug) - printf("SPU %u: EXIT\n", spu.init.id); + DEBUG_PRINTF("EXIT\n"); exitFlag = 1; break; case CELL_CMD_VS_EXECUTE: @@ -632,13 +647,12 @@ main_loop(void) cmd_batch(opcode); break; default: - printf("Bad opcode!\n"); + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); } } - if (Debug) - printf("SPU %u: Exit main loop\n", spu.init.id); + DEBUG_PRINTF("Exit main loop\n"); spu_dcache_report(); } @@ -653,7 +667,8 @@ one_time_init(void) invalidate_tex_cache(); /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. */ spu.fragment_ops = spu_fallback_fragment_ops; } @@ -685,8 +700,8 @@ main(main_param_t speid, main_param_t argp) one_time_init(); - if (Debug) - printf("SPU: main() speid=%lu\n", (unsigned long) speid); + DEBUG_PRINTF("main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ |