From 66be2810c3be07dd1ee45a60cfc632725837f2cd Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 18 Feb 2008 18:55:39 -0800 Subject: Cell: emit vertex shaders and uniforms more intelligently --- src/gallium/drivers/cell/common.h | 22 ++++---- src/gallium/drivers/cell/ppu/cell_state_emit.c | 18 ++++++ src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 17 +++--- src/gallium/drivers/cell/spu/spu_main.c | 9 +++ src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 6 +- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 68 ++++++++++------------- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 5 ++ 7 files changed, 85 insertions(+), 60 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 74b131fbef..cf892206c6 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -87,10 +87,12 @@ #define CELL_CMD_STATE_TEXTURE 13 #define CELL_CMD_STATE_VERTEX_INFO 14 #define CELL_CMD_STATE_VIEWPORT 15 -#define CELL_CMD_STATE_VS_ARRAY_INFO 16 -#define CELL_CMD_STATE_BLEND 17 -#define CELL_CMD_VS_EXECUTE 18 -#define CELL_CMD_STATE_ATTRIB_FETCH 19 +#define CELL_CMD_STATE_UNIFORMS 16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_BLEND 19 +#define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_VS_EXECUTE 21 #define CELL_NUM_BUFFERS 4 @@ -144,14 +146,13 @@ struct cell_attribute_fetch_code { struct cell_shader_info { - unsigned num_outputs; - uint64_t declarations; - unsigned num_declarations; uint64_t instructions; - unsigned num_instructions; - uint64_t uniforms; uint64_t immediates; + + unsigned num_outputs; + unsigned num_declarations; + unsigned num_instructions; unsigned num_immediates; } ALIGN16_ATTRIB; @@ -160,10 +161,9 @@ struct cell_shader_info struct cell_command_vs { uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ - struct cell_shader_info shader; + uint64_t vOut[SPU_VERTS_PER_BATCH]; unsigned num_elts; unsigned elts[SPU_VERTS_PER_BATCH]; - uint64_t vOut[SPU_VERTS_PER_BATCH]; float plane[12][4]; unsigned nr_planes; unsigned nr_attrs; diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 5d2a786449..49c0d130c5 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -31,6 +31,8 @@ #include "cell_state_emit.h" #include "cell_batch.h" #include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" static void @@ -100,4 +102,20 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } + + if (cell->dirty & CELL_NEW_VS) { + const struct draw_context *const draw = cell->draw; + struct cell_shader_info info; + + info.num_outputs = draw->num_vs_outputs; + info.declarations = (uintptr_t) draw->machine.Declarations; + info.num_declarations = draw->machine.NumDeclarations; + info.instructions = (uintptr_t) draw->machine.Instructions; + info.num_instructions = draw->machine.NumInstructions; + info.immediates = (uintptr_t) draw->machine.Imms; + info.num_immediates = draw->machine.ImmLimit / 4; + + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, + & info, sizeof(info)); + } } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 6a1d3bc20a..64c7821c19 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -54,6 +54,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) struct cell_command_vs *const vs = &cell_global.command[0].vs; uint64_t *batch; struct cell_array_info *array_info; + struct cell_shader_info *shader_info; unsigned i, j; struct cell_attribute_fetch_code *cf; @@ -100,17 +101,17 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) (void) memcpy(&batch[1], &draw->viewport, sizeof(struct pipe_viewport_state)); + { + uint64_t uniforms = (uintptr_t) draw->user.constants; + + batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); + batch[0] = CELL_CMD_STATE_UNIFORMS; + batch[1] = uniforms; + } + cell_batch_flush(cell); vs->opcode = CELL_CMD_VS_EXECUTE; - vs->shader.num_outputs = draw->num_vs_outputs; - vs->shader.declarations = (uintptr_t) draw->machine.Declarations; - vs->shader.num_declarations = draw->machine.NumDeclarations; - vs->shader.instructions = (uintptr_t) draw->machine.Instructions; - vs->shader.num_instructions = draw->machine.NumInstructions; - vs->shader.uniforms = (uintptr_t) draw->user.constants; - vs->shader.immediates = (uintptr_t) draw->machine.Imms; - vs->shader.num_immediates = draw->machine.ImmLimit / 4; vs->nr_attrs = draw->vertex_fetch.nr_attrs; (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index fcbf0f841e..dbc3705c24 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -433,10 +433,19 @@ cmd_batch(uint opcode) sizeof(struct pipe_viewport_state)); pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; case CELL_CMD_STATE_VS_ARRAY_INFO: cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_BIND_VS: + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; case CELL_CMD_STATE_ATTRIB_FETCH: { struct cell_attribute_fetch_code *code = (struct cell_attribute_fetch_code *) &buffer[pos+1]; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 55c6c28717..e5d9910ff3 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -64,7 +64,7 @@ typedef void (*spu_fetch_func)(qword *out, const qword *in, const qword *shuffle_data); -static const qword fetch_shuffle_data[] = { +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { /* Shuffle used by CVT_64_FLOAT */ { @@ -108,7 +108,7 @@ static const qword fetch_shuffle_data[] = { static INLINE void fetch_unaligned(qword *dst, unsigned ea, unsigned size) { - qword tmp[4]; + qword tmp[4] ALIGN16_ATTRIB; const int shift = ea & 0x0f; const unsigned aligned_start_ea = ea & ~0x0f; const unsigned aligned_end_ea = (ea + size) & ~0x0f; @@ -169,7 +169,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - qword in[2 * 4]; + qword in[2 * 4] ALIGN16_ATTRIB; /* Fetch four attributes for four vertices. diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3f5bf41aa2..8363efeeb6 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -165,63 +165,55 @@ run_vertex_program(struct spu_vs_context *draw, } -static void -spu_bind_vertex_shader(struct spu_vs_context *draw, - void *uniforms, - void *planes, - unsigned nr_planes, - unsigned num_outputs - ) -{ - draw->constants = (float (*)[4]) uniforms; - - (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); - draw->nr_planes = nr_planes; - draw->num_vs_outputs = num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); -} - - unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] ALIGN16_ATTRIB; + void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs) { - unsigned i; - - const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_addr = vs->immediates; const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) - + (immediate_addr & 0x0f)); + ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) + + (immediate_addr & 0x0f)); + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, TAG_VERTEX_BUFFER, 0, 0); draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->shader.instructions; - draw->machine.NumInstructions = vs->shader.num_instructions; + vs->instructions; + draw->machine.NumInstructions = vs->num_instructions; draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->shader.declarations; - draw->machine.NumDeclarations = vs->shader.num_declarations; + vs->declarations; + draw->machine.NumDeclarations = vs->num_declarations; - draw->vertex_fetch.nr_attrs = vs->nr_attrs; + draw->num_vs_outputs = vs->num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); wait_on_mask(1 << TAG_VERTEX_BUFFER); (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->shader.num_immediates); + sizeof(float) * 4 * vs->num_immediates); +} - spu_bind_vertex_shader(draw, vs->shader.uniforms, - vs->plane, vs->nr_planes, - vs->shader.num_outputs); + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); + draw->nr_planes = vs->nr_planes; + draw->vertex_fetch.nr_attrs = vs->nr_attrs; for (i = 0; i < vs->num_elts; i += 4) { const unsigned batch_size = MIN2(vs->num_elts - i, 4); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index 0fb0bc28d0..54a4b8d9b9 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -1,6 +1,7 @@ #ifndef SPU_VERTEX_SHADER_H #define SPU_VERTEX_SHADER_H +#include "cell/common.h" #include "pipe/p_format.h" #include "spu_exec.h" @@ -54,6 +55,10 @@ static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, struct cell_command_vs; +extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs); + extern void spu_execute_vertex_shader(struct spu_vs_context *draw, const struct cell_command_vs *vs); -- cgit v1.2.3