diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2008-09-11 18:32:05 +0100 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2008-09-11 18:32:05 +0100 |
commit | cc7dd4fc1b3c765ca1ecd943d189bb156dae529d (patch) | |
tree | 1a3560eb6df8a443c4f0e5af0a916f190b1542f6 /src/gallium/drivers/cell/ppu | |
parent | 685248bea1fef5fd6335982570e34d0f6672030d (diff) | |
parent | d50d68a1c940ed9c8d8c65e8e33667fa90d5baa1 (diff) |
Merge commit 'origin/gallium-0.1' into gallium-0.2
Conflicts:
Makefile
progs/demos/Makefile
progs/glsl/Makefile
progs/redbook/Makefile
progs/samples/Makefile
progs/tests/Makefile
progs/trivial/Makefile
progs/xdemos/Makefile
src/gallium/Makefile
src/mesa/main/attrib.c
src/mesa/main/bufferobj.c
src/mesa/vbo/vbo_exec_draw.c
Diffstat (limited to 'src/gallium/drivers/cell/ppu')
25 files changed, 1293 insertions, 306 deletions
diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 0389a9554c..8699f3f8ec 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -5,7 +5,7 @@ TOP = ../../../../.. -include $(TOP)/configs/linux-cell +include $(TOP)/configs/current # This is the "top-level" cell PPU driver code, will get pulled into libGL.so @@ -25,9 +25,9 @@ SOURCES = \ cell_context.c \ cell_draw_arrays.c \ cell_flush.c \ + cell_gen_fragment.c \ cell_state_derived.c \ cell_state_emit.c \ - cell_state_per_fragment.c \ cell_state_shader.c \ cell_pipe_state.c \ cell_screen.c \ diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index f45e5f25b6..16882c0129 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -32,6 +32,13 @@ +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ uint cell_get_empty_buffer(struct cell_context *cell) { @@ -74,6 +81,11 @@ cell_get_empty_buffer(struct cell_context *cell) } +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ void cell_batch_flush(struct cell_context *cell) { @@ -93,11 +105,11 @@ cell_batch_flush(struct cell_context *cell) /* printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->batch_buffer[batch][0], size); + batch, &cell->buffer[batch][0], size); */ /* - * Build "BATCH" command and sent to all SPUs. + * Build "BATCH" command and send to all SPUs. */ cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); @@ -120,6 +132,9 @@ cell_batch_flush(struct cell_context *cell) } +/** + * Return the number of bytes free in the current batch buffer. + */ uint cell_batch_free_space(const struct cell_context *cell) { @@ -129,7 +144,9 @@ cell_batch_free_space(const struct cell_context *cell) /** - * Append data to current batch. + * Append data to the current batch buffer. + * \param data address of block of bytes to append + * \param bytes size of block of bytes */ void cell_batch_append(struct cell_context *cell, const void *data, uint bytes) @@ -165,6 +182,10 @@ cell_batch_append(struct cell_context *cell, const void *data, uint bytes) } +/** + * Allocate space in the current batch buffer for 'bytes' space. + * \return address in batch buffer to put data + */ void * cell_batch_alloc(struct cell_context *cell, uint bytes) { @@ -172,6 +193,10 @@ cell_batch_alloc(struct cell_context *cell, uint bytes) } +/** + * Same as \sa cell_batch_alloc, but return an address at a particular + * alignment. + */ void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment) @@ -215,3 +240,28 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, return pos; } + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ + uint spu, buf; + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h index a6eee0a8b1..f74dd60079 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -54,5 +54,8 @@ extern void * cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, uint alignment); +extern void +cell_init_batch_buffers(struct cell_context *cell); + #endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index a421c95c8e..c9c0c721bb 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -35,6 +35,7 @@ #include <stdint.h> #include "pipe/p_inlines.h" #include "util/u_memory.h" +#include "util/u_pack_color.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" @@ -44,6 +45,27 @@ #include "cell_state.h" +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Called via pipe->clear() + */ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) @@ -61,13 +83,21 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, PIPE_BUFFER_USAGE_GPU_WRITE); if (ps == cell->framebuffer.zsbuf) { + /* clear z/stencil buffer */ surfIndex = 1; } else { + /* clear color buffer */ surfIndex = 0; + + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } } + /* Build a CLEAR command and place it in the current batch buffer */ { struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 9ff4e86943..71f1a3049d 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -43,11 +43,11 @@ #include "draw/draw_private.h" #include "cell/common.h" +#include "cell_batch.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" #include "cell_flush.h" -#include "cell_render.h" #include "cell_state.h" #include "cell_surface.h" #include "cell_spu.h" @@ -85,12 +85,20 @@ cell_draw_create(struct cell_context *cell) } +#ifdef DEBUG +static const struct debug_named_value cell_debug_flags[] = { + {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {NULL, 0} +}; +#endif + + struct pipe_context * cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; - uint spu, buf; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -104,15 +112,6 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.screen = screen; cell->pipe.destroy = cell_destroy_context; - /* state setters */ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_vertex_elements = cell_set_vertex_elements; - - cell->pipe.draw_arrays = cell_draw_arrays; - cell->pipe.draw_elements = cell_draw_elements; - cell->pipe.draw_range_elements = cell_draw_range_elements; - cell->pipe.set_edgeflags = cell_set_edgeflags; - cell->pipe.clear = cell_clear_surface; cell->pipe.flush = cell_flush; @@ -122,20 +121,28 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.wait_query = cell_wait_query; #endif + cell_init_draw_functions(cell); cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); cell_init_texture_functions(cell); + cell_init_vertex_functions(cell); cell->draw = cell_draw_create(cell); cell_init_vbuf(cell); + draw_set_rasterize_stage(cell->draw, cell->vbuf); /* convert all points/lines to tris for the time being */ draw_wide_point_threshold(cell->draw, 0.0); draw_wide_line_threshold(cell->draw, 0.0); + /* get env vars or read config file to get debug flags */ + cell->debug_flags = debug_get_flags_option("CELL_DEBUG", + cell_debug_flags, + 0 ); + /* * SPU stuff */ @@ -146,20 +153,7 @@ cell_create_context(struct pipe_screen *screen, cell_start_spus(cell); - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } + cell_init_batch_buffers(cell); return &cell->pipe; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index f1d1ca89a9..8cec9f45b2 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -39,8 +39,13 @@ #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" + struct cell_vbuf_render; + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ struct cell_vertex_shader_state { struct pipe_shader_state shader; @@ -49,6 +54,9 @@ struct cell_vertex_shader_state }; +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ struct cell_fragment_shader_state { struct pipe_shader_state shader; @@ -57,7 +65,11 @@ struct cell_fragment_shader_state }; -struct cell_blend_state { +/** + * Cell blend state atom, subclass of pipe_blend_state. + */ +struct cell_blend_state +{ struct pipe_blend_state base; /** @@ -67,17 +79,24 @@ struct cell_blend_state { }; -struct cell_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state base; +/** + * Cell depth/stencil/alpha state atom, subclass of + * pipe_depth_stencil_alpha_state. + */ +struct cell_depth_stencil_alpha_state +{ + struct pipe_depth_stencil_alpha_state base; /** * Generated code to perform alpha, stencil, and depth testing on the SPE */ struct spe_function code; - }; +/** + * Per-context state, subclass of pipe_context. + */ struct cell_context { struct pipe_context pipe; @@ -144,6 +163,8 @@ struct cell_context struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + + unsigned debug_flags; }; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index f02dffe124..880d535320 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -34,6 +34,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -76,14 +77,6 @@ cell_unmap_constant_buffers(struct cell_context *sp) } -boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count) -{ - return cell_draw_elements(pipe, NULL, 0, mode, start, count); -} - - /** * Draw vertex arrays, with optional indexing. @@ -92,7 +85,7 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ -boolean +static boolean cell_draw_range_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -116,7 +109,7 @@ cell_draw_range_elements(struct pipe_context *pipe, * Map vertex buffers */ for (i = 0; i < sp->num_vertex_buffers; i++) { - void *buf = pipe->winsys->buffer_map(pipe->winsys, + void *buf = pipe_buffer_map(pipe->screen, sp->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); @@ -124,7 +117,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } /* Map index buffer, if present */ if (indexBuffer) { - void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, + void *mapped_indexes = pipe_buffer_map(pipe->screen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); @@ -143,11 +136,11 @@ cell_draw_range_elements(struct pipe_context *pipe, */ for (i = 0; i < sp->num_vertex_buffers; i++) { draw_set_mapped_vertex_buffer(draw, i, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); } if (indexBuffer) { draw_set_mapped_element_buffer(draw, 0, NULL); - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe_buffer_unmap(pipe->screen, indexBuffer); } /* Note: leave drawing surfaces mapped */ @@ -157,7 +150,7 @@ cell_draw_range_elements(struct pipe_context *pipe, } -boolean +static boolean cell_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, @@ -170,10 +163,29 @@ cell_draw_elements(struct pipe_context *pipe, } +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + -void +static void cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) { struct cell_context *cell = cell_context(pipe); draw_set_edgeflags(cell->draw, edgeflags); } + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h index cd35ec17b4..148873aa67 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -29,26 +29,8 @@ #define CELL_DRAW_ARRAYS_H -extern boolean -cell_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -extern boolean -cell_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - -extern boolean -cell_draw_range_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned min_index, - unsigned max_index, - unsigned mode, unsigned start, unsigned count); - extern void -cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); +cell_init_draw_functions(struct cell_context *cell); #endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 3aaf3de668..6596b72010 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -34,6 +34,9 @@ #include "draw/draw_context.h" +/** + * Called via pipe->flush() + */ void cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence) @@ -50,16 +53,19 @@ cell_flush(struct pipe_context *pipe, unsigned flags, flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); - cell_flush_int(pipe, flags); + cell_flush_int(cell, flags); } -/** internal flush */ +/** + * Cell internal flush function. Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero + */ void -cell_flush_int(struct pipe_context *pipe, unsigned flags) +cell_flush_int(struct cell_context *cell, unsigned flags) { static boolean flushing = FALSE; /* recursion catcher */ - struct cell_context *cell = cell_context(pipe); uint i; ASSERT(!flushing); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index 8f0645c429..509ae6239a 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -36,7 +36,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags, struct pipe_fence_handle **fence); extern void -cell_flush_int(struct pipe_context *pipe, unsigned flags); +cell_flush_int(struct cell_context *cell, unsigned flags); extern void cell_flush_buffer_range(struct cell_context *cell, void *ptr, diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 0000000000..79a82ef72b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,862 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg register containing integer fragment Z values (in) + * \param ifbZ_reg register containing integer frame buffer Z values (in/out) + * \param zmask_reg register containing result of Z test/comparison (out) + */ +static void +gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, + int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ + ASSERT(dsa->depth.enabled); + + switch (dsa->depth.func) { + case PIPE_FUNC_EQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GREATER: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LESS: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* zmask = (ifragZ > ref) */ + spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* zmask = (ref > ifragZ) */ + spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ + spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* mask unchanged */ + spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ + break; + + default: + ASSERT(0); + break; + } + + if (dsa->depth.writemask) { + /* + * If (ztest passed) { + * framebufferZ = fragmentZ; + * } + * OR, + * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; + */ + spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + } +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask_reg, int fragA_reg) +{ + int ref_reg = spe_allocate_available_register(f); + int amask_reg = spe_allocate_available_register(f); + + ASSERT(dsa->alpha.enabled); + + if ((dsa->alpha.func != PIPE_FUNC_NEVER) && + (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + /* load/splat the alpha reference float value */ + spe_load_float(f, ref_reg, dsa->alpha.ref); + } + + /* emit code to do the alpha comparison, updating 'mask' */ + switch (dsa->alpha.func) { + case PIPE_FUNC_EQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GREATER: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LESS: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ + break; + + case PIPE_FUNC_ALWAYS: + /* no-op, mask unchanged */ + break; + + default: + ASSERT(0); + break; + } + +#if OPTIMIZATIONS + /* if mask == {0,0,0,0} we're all done, return */ + { + /* re-use amask reg here */ + int tmp_reg = amask_reg; + /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ + spe_orx(f, tmp_reg, mask_reg); + /* if tmp[0] == 0 then return from function call */ + spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); + } +#endif + + spe_release_register(f, ref_reg); + spe_release_register(f, amask_reg); +} + + + +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int one_reg = spe_allocate_available_register(f); + int tmp_reg = spe_allocate_available_register(f); + + ASSERT(blend->blend_enable); + + /* Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ + { + int mask_reg = spe_allocate_available_register(f); + + /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ + spe_fsmbi(f, mask_reg, 0x1111); + + /* XXX there may be more clever ways to implement the following code */ + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbB & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 8 */ + spe_roti(f, fbG_reg, fbG_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 16 */ + spe_roti(f, fbR_reg, fbR_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask_reg); + } + + + /* + * Compute Src RGB terms + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term1R_reg); + spe_zero(f, term1G_reg); + spe_zero(f, term1B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term1A_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2R_reg); + spe_zero(f, term2G_reg); + spe_zero(f, term2B_reg); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* term = fb * tmp */ + spe_fm(f, term2R_reg, fbR_reg, tmp_reg); + spe_fm(f, term2G_reg, fbG_reg, tmp_reg); + spe_fm(f, term2B_reg, fbB_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + spe_zero(f, term2A_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(f, one_reg, 1.0f); + /* tmp = one - fragA */ + spe_fs(f, tmp_reg, one_reg, fragA_reg); + /* termA = fbA * tmp */ + spe_fm(f, term2A_reg, fbA_reg, tmp_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, one_reg); + spe_release_register(f, tmp_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + +static void +gen_colormask(uint colormask, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* XXX to-do */ + /* operate on 32-bit packed pixels, not float colors */ +} + + + +/** + * Generate code to pack a quad of float colors into a four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to least the significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rgba_reg, r_reg, g_reg); + spe_or(f, rgba_reg, rgba_reg, b_reg); + spe_or(f, rgba_reg, rgba_reg, a_reg); +} + + + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader. But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell the rendering context (in) + * \param f the generated function (out) + */ +void +gen_fragment_function(struct cell_context *cell, struct spe_function *f) +{ + const struct pipe_depth_stencil_alpha_state *dsa = + &cell->depth_stencil->base; + const struct pipe_blend_state *blend = &cell->blend->base; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + const int x_reg = 3; /* uint */ + const int y_reg = 4; /* uint */ + const int color_tile_reg = 5; /* tile_t * */ + const int depth_tile_reg = 6; /* tile_t * */ + const int fragZ_reg = 7; /* vector float */ + const int fragR_reg = 8; /* vector float */ + const int fragG_reg = 9; /* vector float */ + const int fragB_reg = 10; /* vector float */ + const int fragA_reg = 11; /* vector float */ + const int mask_reg = 12; /* vector uint */ + + /* offset of quad from start of tile + * XXX assuming 4-byte pixels for color AND Z/stencil!!!! + */ + int quad_offset_reg; + + int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ + int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ + + spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, x_reg); + spe_allocate_register(f, y_reg); + spe_allocate_register(f, color_tile_reg); + spe_allocate_register(f, depth_tile_reg); + spe_allocate_register(f, fragZ_reg); + spe_allocate_register(f, fragR_reg); + spe_allocate_register(f, fragG_reg); + spe_allocate_register(f, fragB_reg); + spe_allocate_register(f, fragA_reg); + spe_allocate_register(f, mask_reg); + + quad_offset_reg = spe_allocate_available_register(f); + fbRGBA_reg = spe_allocate_available_register(f); + fbZS_reg = spe_allocate_available_register(f); + + /* compute offset of quad from start of tile, in bytes */ + { + int x2_reg = spe_allocate_available_register(f); + int y2_reg = spe_allocate_available_register(f); + + ASSERT(TILE_SIZE == 32); + + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ + spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ + spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ + + spe_release_register(f, x2_reg); + spe_release_register(f, y2_reg); + } + + + if (dsa->alpha.enabled) { + gen_alpha_test(dsa, f, mask_reg, fragA_reg); + } + + if (dsa->depth.enabled || dsa->stencil[0].enabled) { + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + int fbZ_reg = spe_allocate_available_register(f); /* Z values */ + int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + + /* fetch quad of depth/stencil values from tile at (x,y) */ + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + if (dsa->depth.enabled) { + /* Extract Z bits from fbZS_reg into fbZ_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + int mask_reg = spe_allocate_available_register(f); + spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ + spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ + spe_release_register(f, mask_reg); + /* OK, fbZ_reg has four 24-bit Z values now */ + } + else { + /* XXX handle other z/stencil formats */ + ASSERT(0); + } + + /* Convert fragZ values from float[4] to uint[4] */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM || + zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* 24-bit Z values */ + int scale_reg = spe_allocate_available_register(f); + + /* scale_reg[0,1,2,3] = float(2^24-1) */ + spe_load_float(f, scale_reg, (float) 0xffffff); + + /* XXX these two instructions might be combined */ + spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ + + spe_release_register(f, scale_reg); + } + else { + /* XXX handle 16-bit Z format */ + ASSERT(0); + } + } + + if (dsa->stencil[0].enabled) { + /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX extract with a shift */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + /* XXX extract with a mask */ + ASSERT(0); + } + } + + + if (dsa->stencil[0].enabled) { + /* XXX this may involve depth testing too */ + // gen_stencil_test(dsa, f, ... ); + ASSERT(0); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + + /* do we need to write Z and/or Stencil back into framebuffer? */ + write_depth_stencil = (dsa->depth.writemask | + dsa->stencil[0].write_mask | + dsa->stencil[1].write_mask); + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + /* XXX to do */ + ASSERT(0); + } + else { + /* bad zs_format */ + ASSERT(0); + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + spe_release_register(f, fbZ_reg); + spe_release_register(f, fbS_reg); + } + + + /* Get framebuffer quad/colors. We'll need these for blending, + * color masking, and to obey the quad/pixel mask. + * Load: fbRGBA_reg = memory[color_tile + quad_offset] + * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking + * we could skip this load. + */ + spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + + + if (blend->blend_enable) { + gen_blend(blend, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); + } + + /* + * Write fragment colors to framebuffer/tile. + * This involves converting the fragment colors from float[4] to the + * tile's specific format and obeying the quad/pixel mask. + */ + { + int rgba_reg = spe_allocate_available_register(f); + + /* Pack four float colors as four 32-bit int colors */ + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); + + if (blend->logicop_enable) { + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); + } + + if (blend->colormask != 0xf) { + gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); + } + + + /* Mix fragment colors with framebuffer colors using the quad/pixel mask: + * if (mask[i]) + * rgba[i] = rgba[i]; + * else + * rgba[i] = framebuffer[i]; + */ + spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + + /* Store updated quad in tile: + * memory[color_tile + quad_offset] = rgba_reg; + */ + spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + + spe_release_register(f, rgba_reg); + } + + printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + + spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ + + + spe_release_register(f, fbRGBA_reg); + spe_release_register(f, fbZS_reg); + spe_release_register(f, quad_offset_reg); +} + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h new file mode 100644 index 0000000000..0ea0fc690c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H + + +extern void +gen_fragment_function(struct cell_context *cell, struct spe_function *f); + + +#endif /* CELL_GEN_FRAGMENT_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index fe5437023b..e04cf5f274 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -34,6 +34,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" +#include "cell_flush.h" #include "cell_state.h" #include "cell_texture.h" #include "cell_state_per_fragment.h" @@ -130,8 +131,9 @@ cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) } -static void cell_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) +static void +cell_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) { struct cell_context *cell = cell_context(pipe); @@ -310,8 +312,21 @@ cell_set_framebuffer_state(struct pipe_context *pipe, cell->zsbuf_map = NULL; } - /* update my state */ - cell->framebuffer = *fb; + /* Finish any pending rendering to the current surface before + * installing a new surface! + */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + /* update my state + * (this is also where old surfaces will finally get freed) + */ + cell->framebuffer.width = fb->width; + cell->framebuffer.height = fb->height; + cell->framebuffer.num_cbufs = fb->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); /* map new surfaces */ if (csurf) diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 973c0b1aa1..9508227e29 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -26,6 +26,11 @@ **************************************************************************/ +/** + * Utility/wrappers for communicating with the SPUs. + */ + + #include <pthread.h> #include "cell_spu.h" @@ -40,6 +45,9 @@ helpful headers: */ +/** + * Cell/SPU info that's not per-context. + */ struct cell_global_info cell_global; @@ -74,7 +82,11 @@ wait_mbox_message(spe_context_ptr_t ctx) } -static void *cell_thread_function(void *arg) +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) { struct cell_init_info *init = (struct cell_init_info *) arg; unsigned entry = SPE_DEFAULT_ENTRY; @@ -92,7 +104,10 @@ static void *cell_thread_function(void *arg) /** - * Create the SPU threads + * Create the SPU threads. This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. */ void cell_start_spus(struct cell_context *cell) @@ -100,7 +115,6 @@ cell_start_spus(struct cell_context *cell) static boolean one_time_init = FALSE; uint i, j; - if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " "on Cell.\n"); @@ -120,6 +134,7 @@ cell_start_spus(struct cell_context *cell) for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].debug_flags = cell->debug_flags; cell_global.inits[i].cmd = &cell_global.command[i]; for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; @@ -137,14 +152,17 @@ cell_start_spus(struct cell_context *cell) exit(1); } - pthread_create(&cell_global.spe_threads[i], NULL, &cell_thread_function, - &cell_global.inits[i]); + pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ + NULL, /* pthread attribs */ + &cell_thread_function, /* start routine */ + &cell_global.inits[i]); /* thread argument */ } } /** * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. */ void cell_spu_exit(struct cell_context *cell) diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 82580ea35a..a7771a55a3 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -48,19 +48,17 @@ #define CELL_NEW_VERTEX_INFO 0x8000 -void cell_set_vertex_elements(struct pipe_context *, - unsigned count, - const struct pipe_vertex_element *); +extern void +cell_update_derived( struct cell_context *softpipe ); -void cell_set_vertex_buffers(struct pipe_context *, - unsigned count, - const struct pipe_vertex_buffer *); -void cell_update_derived( struct cell_context *softpipe ); +extern void +cell_init_shader_functions(struct cell_context *cell); -void -cell_init_shader_functions(struct cell_context *cell); +extern void +cell_init_vertex_functions(struct cell_context *cell); + #endif /* CELL_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 8ab938a02a..efc4f78364 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -35,21 +35,6 @@ #include "cell_state_emit.h" -static int -find_vs_output(const struct cell_vertex_shader_state *vs, - uint semantic_name, - uint semantic_index) -{ - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == semantic_name && - vs->info.output_semantic_index[i] == semantic_index) - return i; - } - return -1; -} - - /** * Determine how to map vertex program outputs to fragment program inputs. * Basically, this will be used when computing the triangle interpolation @@ -58,7 +43,6 @@ find_vs_output(const struct cell_vertex_shader_state *vs, static void calculate_vertex_layout( struct cell_context *cell ) { - const struct cell_vertex_shader_state *vs = cell->vs; const struct cell_fragment_shader_state *fs = cell->fs; const enum interp_mode colorInterp = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -82,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell ) vinfo->num_attribs = 0; /* we always want to emit vertex pos */ - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); @@ -98,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell ) break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); #if 1 if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ src = 0; @@ -116,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, fs->info.input_semantic_index[i]); assert(src >= 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); @@ -163,6 +147,9 @@ compute_cliprect(struct cell_context *sp) +/** + * Update derived state, send current state to SPUs prior to rendering. + */ void cell_update_derived( struct cell_context *cell ) { if (cell->dirty & (CELL_NEW_RASTERIZER | diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9d88c1cf3d..180b89c1f6 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" #include "cell_context.h" +#include "cell_gen_fragment.h" #include "cell_state.h" #include "cell_state_emit.h" #include "cell_state_per_fragment.h" @@ -47,27 +48,13 @@ emit_state_cmd(struct cell_context *cell, uint cmd, } - +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ void cell_emit_state(struct cell_context *cell) { - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_BLEND)) { - struct cell_command_logicop logicop; - - if (cell->logic_op.store != NULL) { - spe_release_func(& cell->logic_op); - } - - cell_generate_logic_op(& cell->logic_op, - & cell->blend->base, - cell->framebuffer.cbufs[0]); - - logicop.base = (intptr_t) cell->logic_op.store; - logicop.size = 64 * 4; - emit_state_cmd(cell, CELL_CMD_STATE_LOGICOP, &logicop, - sizeof(logicop)); - } - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; struct pipe_surface *zbuf = cell->framebuffer.zsbuf; @@ -80,44 +67,33 @@ cell_emit_state(struct cell_context *cell) fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; fb->width = cell->framebuffer.width; fb->height = cell->framebuffer.height; +#if 0 + printf("EMIT color format %s\n", pf_name(fb->color_format)); + printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif } - if (cell->dirty & CELL_NEW_BLEND) { - struct cell_command_blend blend; - if (cell->blend != NULL) { - blend.base = (intptr_t) cell->blend->code.store; - blend.size = (char *) cell->blend->code.csr - - (char *) cell->blend->code.store; - blend.read_fb = TRUE; - } else { - blend.base = 0; - blend.size = 0; - blend.read_fb = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_BLEND, &blend, sizeof(blend)); - } - - if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - struct cell_command_depth_stencil_alpha_test dsat; - - - if (cell->depth_stencil != NULL) { - dsat.base = (intptr_t) cell->depth_stencil->code.store; - dsat.size = (char *) cell->depth_stencil->code.csr - - (char *) cell->depth_stencil->code.store; - dsat.read_depth = TRUE; - dsat.read_stencil = FALSE; - } else { - dsat.base = 0; - dsat.size = 0; - dsat.read_depth = FALSE; - dsat.read_stencil = FALSE; - } - - emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, &dsat, - sizeof(dsat)); + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | + CELL_NEW_DEPTH_STENCIL | + CELL_NEW_BLEND)) { + /* XXX we don't want to always do codegen here. We should have + * a hash/lookup table to cache previous results... + */ + struct cell_command_fragment_ops *fops + = cell_batch_alloc(cell, sizeof(*fops)); + struct spe_function spe_code; + + /* generate new code */ + gen_fragment_function(cell, &spe_code); + /* put the new code into the batch buffer */ + fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; + memcpy(&fops->code, spe_code.store, + SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + fops->dsa = cell->depth_stencil->base; + fops->blend = cell->blend->base; + /* free codegen buffer */ + spe_release_func(&spe_code); } if (cell->dirty & CELL_NEW_SAMPLER) { @@ -157,7 +133,8 @@ cell_emit_state(struct cell_context *cell) emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, &cell->vertex_info, sizeof(struct vertex_info)); } - + +#if 0 if (cell->dirty & CELL_NEW_VS) { const struct draw_context *const draw = cell->draw; struct cell_shader_info info; @@ -170,7 +147,7 @@ cell_emit_state(struct cell_context *cell) info.immediates = (uintptr_t) draw->vs.machine.Imms; info.num_immediates = draw->vs.machine.ImmLimit / 4; - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, - & info, sizeof(info)); + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); } +#endif } diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 53ae3aa50e..78cb446c14 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -132,9 +132,9 @@ emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to perform Z testing. Four Z values are tested at once. * \param dsa Current depth-test state * \param f Function to which code should be appended - * \param m Mask of allocated / free SPE registers * \param mask Index of register to contain depth-pass mask * \param stored Index of register containing values from depth buffer * \param calculated Index of register containing per-fragment depth values @@ -198,6 +198,7 @@ emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, /** + * Generate code to apply the stencil operation (after testing). * \note Emits a maximum of 5 instructions. * * \warning @@ -222,9 +223,13 @@ emit_stencil_op(struct spe_function *f, spe_il(f, result, ref); break; case PIPE_STENCIL_OP_INCR: + /* clamp = [0xff, 0xff, 0xff, 0xff] */ spe_il(f, clamp, 0x0ff); + /* result[i] = in[i] + 1 */ spe_ai(f, result, in, 1); + /* clamp_mask[i] = (result[i] > 0xff) */ spe_clgti(f, clamp_mask, result, 0x0ff); + /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ spe_selb(f, result, result, clamp, clamp_mask); break; case PIPE_STENCIL_OP_DECR: @@ -259,10 +264,10 @@ emit_stencil_op(struct spe_function *f, /** + * Generate code to do stencil test. Four pixels are tested at once. * \param dsa Depth / stencil test state * \param face 0 for front face, 1 for back face * \param f Function to append instructions to - * \param reg_mask Mask of allocated registers * \param mask Register containing mask of fragments passing the * alpha test * \param depth_mask Register containing mask of fragments passing the @@ -310,13 +315,14 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, switch (dsa->stencil[face].func) { case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); + spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ break; case PIPE_FUNC_NOTEQUAL: complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_EQUAL: + /* stencil_mask[i] = (stored[i] == ref) */ spe_ceqi(f, stencil_mask, stored, ref); break; @@ -324,6 +330,8 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GREATER: + complement = TRUE; + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); break; @@ -331,8 +339,11 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, complement = TRUE; /* FALLTHROUGH */ case PIPE_FUNC_GEQUAL: + /* stencil_mask[i] = (stored[i] > ref) */ spe_clgti(f, stencil_mask, stored, ref); + /* tmp[i] = (stored[i] == ref) */ spe_ceqi(f, tmp, stored, ref); + /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ spe_or(f, stencil_mask, stencil_mask, tmp); break; @@ -461,7 +472,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round * up to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Allocate registers for the function's input parameters. Cleverly (and @@ -540,7 +551,7 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) spe_selb(f, depth, depth, zvals, mask); } - spe_bi(f, 0, 0, 0); + spe_bi(f, 0, 0, 0); /* return from function call */ #if 0 @@ -956,7 +967,7 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to * make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); const int frag[4] = { @@ -1144,9 +1155,10 @@ cell_generate_alpha_blend(struct cell_blend_state *cb) } -int PC_OFFSET(const struct spe_function *f, const void *d) +static int +PC_OFFSET(const struct spe_function *f, const void *d) { - const intptr_t pc = (intptr_t) f->csr; + const intptr_t pc = (intptr_t) &f->store[f->num_inst]; const intptr_t ea = ~0x0f & (intptr_t) d; return (ea - pc) >> 2; @@ -1178,7 +1190,7 @@ cell_generate_logic_op(struct spe_function *f, * bytes (equiv. to 8 instructions) are needed for data storage. Round up * to 64 to make it a happy power-of-two. */ - spe_init_func(f, 4 * 64); + spe_init_func(f, SPE_INST_SIZE * 64); /* Pixel colors in framebuffer format in AoS layout. diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 86bcad05e9..97e44eeb1a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -53,7 +53,10 @@ cell_vertex_shader_state(void *shader) } - +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -77,6 +80,9 @@ cell_create_fs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_fs_state() + */ static void cell_bind_fs_state(struct pipe_context *pipe, void *fs) { @@ -88,6 +94,9 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Called via pipe->delete_fs_state() + */ static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { @@ -98,6 +107,10 @@ cell_delete_fs_state(struct pipe_context *pipe, void *fs) } +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ static void * cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) @@ -128,6 +141,9 @@ cell_create_vs_state(struct pipe_context *pipe, } +/** + * Called via pipe->bind_vs_state() + */ static void cell_bind_vs_state(struct pipe_context *pipe, void *vs) { @@ -142,6 +158,9 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->delete_vs_state() + */ static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { @@ -154,6 +173,9 @@ cell_delete_vs_state(struct pipe_context *pipe, void *vs) } +/** + * Called via pipe->set_constant_buffer() + */ static void cell_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, @@ -166,7 +188,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &cell->constants[shader].buffer, buf->buffer); cell->constants[shader].size = buf->size; diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 114684c2a3..fbe55c8472 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" -void +static void cell_set_vertex_elements(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_element *elements) @@ -53,7 +53,7 @@ cell_set_vertex_elements(struct pipe_context *pipe, } -void +static void cell_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *buffers) @@ -69,3 +69,11 @@ cell_set_vertex_buffers(struct pipe_context *pipe, draw_set_vertex_buffers(cell->draw, count, buffers); } + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index d9e3b510dc..732c64082e 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -25,108 +25,13 @@ * **************************************************************************/ -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" -#include "util/u_memory.h" #include "util/u_rect.h" -#include "util/u_tile.h" - #include "cell_context.h" -#include "cell_surface.h" - - -static void -cell_surface_copy(struct pipe_context *pipe, - boolean do_flip, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - struct pipe_surface *src, - unsigned srcx, unsigned srcy, - unsigned width, unsigned height) -{ - assert( dst->cpp == src->cpp ); - - pipe_copy_rect(pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE), - &dst->block, - dst->stride, - dstx, dsty, - width, height, - pipe_surface_map(src, PIPE_BUFFER_USAGE_CPU_READ), - do_flip ? -src->stride : src->stride, - srcx, do_flip ? height - 1 - srcy : srcy); - - pipe_surface_unmap(src); - pipe_surface_unmap(dst); -} - - -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; -} - - -#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) - - -/** - * Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void -cell_surface_fill(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - unsigned width, unsigned height, unsigned value) -{ - unsigned i, j; - void *dst_map = pipe_surface_map(dst, PIPE_BUFFER_USAGE_CPU_WRITE); - - assert(dst->stride > 0); - - switch (dst->block.size) { - case 1: - case 2: - case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); - break; - case 8: - { - /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); - ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); - ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); - ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); - ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); - val0 = (val0 << 8) | val0; - val1 = (val1 << 8) | val1; - val2 = (val2 << 8) | val2; - val3 = (val3 << 8) | val3; - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - row[j*4+0] = val0; - row[j*4+1] = val1; - row[j*4+2] = val2; - row[j*4+3] = val3; - } - row += dst->stride/2; - } - } - break; - default: - assert(0); - break; - } - - pipe_surface_unmap( dst ); -} void cell_init_surface_functions(struct cell_context *cell) { - cell->pipe.surface_copy = cell_surface_copy; - cell->pipe.surface_fill = cell_surface_fill; + cell->pipe.surface_copy = util_surface_copy; + cell->pipe.surface_fill = util_surface_fill; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 5a0942bbd6..b6590dfb86 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -63,19 +63,30 @@ cell_texture_layout(struct cell_texture * spt) spt->buffer_size = 0; for ( level = 0 ; level <= pt->last_level ; level++ ) { + unsigned size; + unsigned w_tile, h_tile; + + /* width, height, rounded up to tile size */ + w_tile = align(width, TILE_SIZE); + h_tile = align(height, TILE_SIZE); + pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; - pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); - pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); spt->stride[level] = pt->nblocksx[level] * pt->block.size; spt->level_offset[level] = spt->buffer_size; - spt->buffer_size += (pt->nblocksy[level] * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - pt->nblocksx[level] * pt->block.size); + size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + if (pt->target == PIPE_TEXTURE_CUBE) + size *= 6; + else + size *= depth; + + spt->buffer_size += size; width = minify(width); height = minify(height); @@ -85,8 +96,8 @@ cell_texture_layout(struct cell_texture * spt) static struct pipe_texture * -cell_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) +cell_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = CALLOC_STRUCT(cell_texture); @@ -113,8 +124,8 @@ cell_texture_create_screen(struct pipe_screen *screen, static void -cell_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) +cell_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) { if (!*pt) return; @@ -130,7 +141,7 @@ cell_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } @@ -138,6 +149,7 @@ cell_texture_release_screen(struct pipe_screen *screen, } +#if 0 static void cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, uint face, uint levelsMask) @@ -145,13 +157,14 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, /* XXX TO DO: re-tile the texture data ... */ } +#endif static struct pipe_surface * -cell_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct cell_texture *spt = cell_texture(pt); @@ -161,7 +174,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + winsys_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; @@ -174,12 +187,17 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, /* XXX may need to override usage flags (see sp_texture.c) */ + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * ps->nblocksy * ps->stride; - } else { + } + else { assert(face == 0); assert(zslice == 0); } @@ -189,6 +207,11 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, +/** + * Copy tile data from linear layout to tiled layout. + * XXX this should be rolled into the future surface-creation code. + * XXX also need "untile" code... + */ static void tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) { @@ -219,6 +242,7 @@ tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) /** * Convert linear texture image data to tiled format for SPU usage. + * XXX recast this in terms of pipe_surfaces (aka texture views). */ static void cell_tile_texture(struct cell_context *cell, @@ -285,6 +309,21 @@ cell_update_texture_mapping(struct cell_context *cell) } +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + static void * cell_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, @@ -297,7 +336,7 @@ cell_surface_map( struct pipe_screen *screen, return NULL; } - map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + map = pipe_buffer_map( screen, surface->buffer, flags ); if (map == NULL) return NULL; @@ -323,7 +362,7 @@ static void cell_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); + pipe_buffer_unmap( screen, surface->buffer ); } @@ -333,12 +372,15 @@ cell_init_texture_functions(struct cell_context *cell) /*cell->pipe.texture_update = cell_texture_update;*/ } + void cell_init_screen_texture_funcs(struct pipe_screen *screen) { - screen->texture_create = cell_texture_create_screen; - screen->texture_release = cell_texture_release_screen; - screen->get_tex_surface = cell_get_tex_surface_screen; + screen->texture_create = cell_texture_create; + screen->texture_release = cell_texture_release; + + screen->get_tex_surface = cell_get_tex_surface; + screen->tex_surface_release = cell_tex_surface_release; screen->surface_map = cell_surface_map; screen->surface_unmap = cell_surface_unmap; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index e4230c7a5f..aa63435b93 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -26,6 +26,11 @@ **************************************************************************/ /** + * Vertex buffer code. The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * * Authors * Brian Paul */ @@ -113,7 +118,7 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } cvbr->vertex_buf = ~0; - cell_flush_int(&cell->pipe, 0x0); + cell_flush_int(cell, 0x0); assert(vertices == cvbr->vertex_buffer); cvbr->vertex_buffer = NULL; @@ -121,12 +126,13 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, -static void +static boolean cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); cvbr->prim = prim; /*printf("cell_set_prim %u\n", prim);*/ + return TRUE; } @@ -244,7 +250,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, #if 0 /* helpful for debug */ - cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); #endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 2ece0250f6..566df7f59e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -297,10 +297,9 @@ void cell_update_vertex_fetch(struct draw_context *draw) /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). That means (34 * 4) = 136 bytes - * each maximum. + * actually a slight over-estimate). */ - spe_init_func(p, 136 * unique_attr_formats); + spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); /* Allocate registers for the function's input parameters. diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 3658947715..2b10c116fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -135,7 +135,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) vs->num_elts = n; send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT); + cell_flush_int(cell, CELL_FLUSH_WAIT); } draw->vs.post_nr = draw->vs.queue_nr; |