summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/cell/spu/spu_command.c
diff options
context:
space:
mode:
authorRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:14 -0700
committerRobert Ellison <papillo@tungstengraphics.com>2008-11-21 11:42:35 -0700
commit11fc390f6478526d4f0bdb4b7e628284da31b3b9 (patch)
tree5fd526db7370668cf4cf1c36844fa5eac705cb2d /src/gallium/drivers/cell/spu/spu_command.c
parent81aa678ce8f4a1f7c75b928ba2b107908959d50d (diff)
CELL: use variant-length fragment ops programs
This is a set of changes that optimizes the memory use of fragment operation programs (by using and transmitting only as much memory as is needed for the fragment ops programs, instead of maximal sizes), as well as eliminate the dependency on hard-coded maximal program sizes. State that is not dependent on fragment facing (i.e. that isn't using two-sided stenciling) will only save and transmit a single fragment operation program, instead of two identical programs. - Added the ability to emit a LNOP (No Operation (Load)) instruction. This is used to pad the generated fragment operations programs to a multiple of 8 bytes, which is necessary for proper operation of the dual instruction pipeline, and also required for proper SPU-side decoding. - Added the ability to allocate and manage a variant-length struct cell_command_fragment_ops. This structure now puts the generated function field at the end, where it can be as large as necessary. - On the PPU side, we now combine the generated front-facing and back-facing code into a single variant-length buffer (and only use one if the two sets of code are identical) for transmission to the SPU. - On the SPU side, we pull the correct sizes out of the buffer, allocate a new code buffer if the one we have isn't large enough, and save the code to that buffer. The buffer is deallocated when the SPU exits. - Commented out the emit_fetch() static function, which was not being used.
Diffstat (limited to 'src/gallium/drivers/cell/spu/spu_command.c')
-rw-r--r--src/gallium/drivers/cell/spu/spu_command.c111
1 files changed, 84 insertions, 27 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index d5faf4e3aa..8500d19754 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -210,45 +210,72 @@ cmd_release_verts(const struct cell_command_release_verts *release)
static void
cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
{
- static int warned = 0;
-
D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
- /* Copy state info (for fallback case only) */
+
+ /* Copy state info (for fallback case only - this will eventually
+ * go away when the fallback case goes away)
+ */
memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
- /* Parity twist! For now, always use the fallback code by default,
- * only switching to codegen when specifically requested. This
- * allows us to develop freely without risking taking down the
- * branch.
- *
- * Later, the parity of this check will be reversed, so that
- * codegen is *always* used, unless we specifically indicate that
- * we don't want it.
- *
- * Eventually, the option will be removed completely, because in
- * final code we'll always use codegen and won't even provide the
- * raw state records that the fallback code requires.
+ /* Make sure the SPU knows which buffers it's expected to read when
+ * it's told to pull tiles.
*/
- if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
- spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front;
- spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back;
- }
- else {
- /* otherwise, the default fallback code remains in place */
+ spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
+
+ /* If we're forcing the fallback code to be used (for debug purposes),
+ * install that. Otherwise install the incoming SPU code.
+ */
+ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
+ static unsigned int warned = 0;
if (!warned) {
fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
warned = 1;
}
+ /* The following two lines aren't really necessary if you
+ * know the debug flags won't change during a run, and if you
+ * know that the function pointers are initialized correctly.
+ * We set them here to allow a person to change the debug
+ * flags during a run (from inside a debugger).
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
}
- spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
-}
+ /* Make sure the SPU code buffer is large enough to hold the incoming code.
+ * Note that we *don't* use align_malloc() and align_free(), because
+ * those utility functions are *not* available in SPU code.
+ * */
+ if (spu.fragment_ops_code_size < fops->total_code_size) {
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu.fragment_ops_code_size = fops->total_code_size;
+ spu.fragment_ops_code = malloc(fops->total_code_size);
+ if (spu.fragment_ops_code == NULL) {
+ /* Whoops. */
+ fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+ return;
+ }
+ }
+ /* Copy the SPU code from the command buffer to the spu buffer */
+ memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
+
+ /* Set the pointers for the front-facing and back-facing fragments
+ * to the specified offsets within the code. Note that if the
+ * front-facing and back-facing code are the same, they'll have
+ * the same offset.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
+ spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
+}
static void
cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
@@ -588,7 +615,8 @@ cmd_batch(uint opcode)
struct cell_command_fragment_ops *fops
= (struct cell_command_fragment_ops *) &buffer[pos];
cmd_state_fragment_ops(fops);
- pos += sizeof(*fops) / 8;
+ /* This is a variant-sized command */
+ pos += (sizeof(*fops) + fops->total_code_size)/ 8;
}
break;
case CELL_CMD_STATE_FRAGMENT_PROGRAM:
@@ -756,3 +784,32 @@ command_loop(void)
if (spu.init.debug_flags & CELL_DEBUG_CACHE)
spu_dcache_report();
}
+
+/* Initialize this module; we manage the fragment ops buffer here. */
+void
+spu_command_init(void)
+{
+ /* Install default/fallback fragment processing function.
+ * This will normally be overriden by a code-gen'd function
+ * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
+ */
+ spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
+ spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
+
+ /* Set up the basic empty buffer for code-gen'ed fragment ops */
+ spu.fragment_ops_code = NULL;
+ spu.fragment_ops_code_size = 0;
+}
+
+void
+spu_command_close(void)
+{
+ /* Deallocate the code-gen buffer for fragment ops, and reset the
+ * fragment ops functions to their initial setting (just to leave
+ * things in a good state).
+ */
+ if (spu.fragment_ops_code != NULL) {
+ free(spu.fragment_ops_code);
+ }
+ spu_command_init();
+}