diff options
Diffstat (limited to 'src/mesa/pipe/cell/spu')
21 files changed, 0 insertions, 5852 deletions
diff --git a/src/mesa/pipe/cell/spu/Makefile b/src/mesa/pipe/cell/spu/Makefile deleted file mode 100644 index f202971d73..0000000000 --- a/src/mesa/pipe/cell/spu/Makefile +++ /dev/null @@ -1,72 +0,0 @@ -# Gallium3D Cell driver: SPU code - -# This makefile builds the g3d_spu.a file that's linked into the -# PPU code/library. - - -TOP = ../../../../.. -include $(TOP)/configs/linux-cell - - -PROG = g3d - -PROG_SPU = $(PROG)_spu -PROG_SPU_A = $(PROG)_spu.a -PROG_SPU_EMBED_O = $(PROG)_spu-embed.o - - -SOURCES = \ - spu_main.c \ - spu_blend.c \ - spu_render.c \ - spu_texture.c \ - spu_tile.c \ - spu_tri.c \ - spu_exec.c \ - spu_util.c \ - spu_vertex_fetch.c \ - spu_vertex_shader.c - -SPU_OBJECTS = $(SOURCES:.c=.o) \ - -SPU_ASM_OUT = $(SOURCES:.c=.s) \ - -INCLUDE_DIRS = -I$(TOP)/src/mesa - - -.c.o: - $(SPU_CC) $(SPU_CFLAGS) -c $< - -.c.s: - $(SPU_CC) $(SPU_CFLAGS) -S $< - - -# The .a file will be linked into the main/PPU executable -default: $(PROG_SPU_A) - -$(PROG_SPU_A): $(PROG_SPU_EMBED_O) - $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) - -$(PROG_SPU_EMBED_O): $(PROG_SPU) - $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) - -$(PROG_SPU): $(SPU_OBJECTS) - $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) - - - -asmfiles: $(SPU_ASM_OUT) - - -clean: - rm -f *~ *.o *.a *.d *.s $(PROG_SPU) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - diff --git a/src/mesa/pipe/cell/spu/spu_blend.c b/src/mesa/pipe/cell/spu/spu_blend.c deleted file mode 100644 index 23ec0eeb45..0000000000 --- a/src/mesa/pipe/cell/spu/spu_blend.c +++ /dev/null @@ -1,62 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "spu_main.h" -#include "spu_blend.h" -#include "spu_colorpack.h" - - -void -blend_quad(uint itx, uint ity, vector float colors[4]) -{ - /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ - vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); - vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); - vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); - vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); - - vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); - vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); - vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); - vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); - - vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); - vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); - vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); - vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); - - colors[0] = spu_add(spu_mul(colors[0], alpha00), - spu_mul(fbc00, one_minus_alpha00)); - colors[1] = spu_add(spu_mul(colors[1], alpha01), - spu_mul(fbc01, one_minus_alpha01)); - colors[2] = spu_add(spu_mul(colors[2], alpha10), - spu_mul(fbc10, one_minus_alpha10)); - colors[3] = spu_add(spu_mul(colors[3], alpha11), - spu_mul(fbc11, one_minus_alpha11)); -} - diff --git a/src/mesa/pipe/cell/spu/spu_blend.h b/src/mesa/pipe/cell/spu/spu_blend.h deleted file mode 100644 index 2b594b578b..0000000000 --- a/src/mesa/pipe/cell/spu/spu_blend.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_BLEND_H -#define SPU_BLEND_H - - -extern void -blend_quad(uint itx, uint ity, vector float colors[4]); - - -#endif /* SPU_BLEND_H */ diff --git a/src/mesa/pipe/cell/spu/spu_colorpack.h b/src/mesa/pipe/cell/spu/spu_colorpack.h deleted file mode 100644 index e9fee8a3a6..0000000000 --- a/src/mesa/pipe/cell/spu/spu_colorpack.h +++ /dev/null @@ -1,110 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef SPU_COLORPACK_H -#define SPU_COLORPACK_H - - -#include <spu_intrinsics.h> - - -static INLINE unsigned int -spu_pack_R8G8B8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - - out = spu_shuffle(out, out, ((vector unsigned char) { - 0, 4, 8, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }) ); - - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_A8R8G8B8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_B8G8R8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, shuffle); - return spu_extract(out, 0); -} - - -static INLINE vector float -spu_unpack_color(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 0, 0, 0, 0, - 5, 5, 5, 5, - 10, 10, 10, 10, - 15, 15, 15, 15}) ); - return spu_convtf(color_u4, 32); -} - - -static INLINE vector float -spu_unpack_A8R8G8B8(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 5, 5, 5, 5, - 10, 10, 10, 10, - 15, 15, 15, 15, - 0, 0, 0, 0}) ); - - return spu_convtf(color_u4, 32); -} - - -#endif /* SPU_COLORPACK_H */ diff --git a/src/mesa/pipe/cell/spu/spu_exec.c b/src/mesa/pipe/cell/spu/spu_exec.c deleted file mode 100644 index e51008b9b3..0000000000 --- a/src/mesa/pipe/cell/spu/spu_exec.c +++ /dev/null @@ -1,1948 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include <libmisc.h> -#include <spu_mfcio.h> -#include <transpose_matrix4x4.h> -#include <simdmath/ceilf4.h> -#include <simdmath/cosf4.h> -#include <simdmath/divf4.h> -#include <simdmath/floorf4.h> -#include <simdmath/log2f4.h> -#include <simdmath/powf4.h> -#include <simdmath/sinf4.h> -#include <simdmath/sqrtf4.h> -#include <simdmath/truncf4.h> - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "spu_exec.h" -#include "spu_main.h" -#include "spu_vertex_shader.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call spu_exec_machine_run() many times. - */ -void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor) -{ - qword zero; - qword not_zero; - uint i; - - mach->Samplers = samplers; - mach->Processor = processor; - mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - - zero = si_xor(zero, zero); - not_zero = si_xori(zero, 0xff); - - /* Setup constants. */ - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); - - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); -} - - -static INLINE qword -micro_abs(qword src) -{ - return si_rotmi(si_shli(src, 1), -1); -} - -static INLINE qword -micro_ceil(qword src) -{ - return (qword) _ceilf4((vec_float4) src); -} - -static INLINE qword -micro_cos(qword src) -{ - return (qword) _cosf4((vec_float4) src); -} - -static const qword br_shuf = { - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, -}; - -static const qword bl_shuf = { - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, -}; - -static const qword tl_shuf = { - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, -}; - -static qword -micro_ddx(qword src) -{ - qword bottom_right = si_shufb(src, src, br_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(bottom_right, bottom_left); -} - -static qword -micro_ddy(qword src) -{ - qword top_left = si_shufb(src, src, tl_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(top_left, bottom_left); -} - -static INLINE qword -micro_div(qword src0, qword src1) -{ - return (qword) _divf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_flr(qword src) -{ - return (qword) _floorf4((vec_float4) src); -} - -static qword -micro_frc(qword src) -{ - return si_fs(src, (qword) _floorf4((vec_float4) src)); -} - -static INLINE qword -micro_ge(qword src0, qword src1) -{ - return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); -} - -static qword -micro_lg2(qword src) -{ - return (qword) _log2f4((vec_float4) src); -} - -static INLINE qword -micro_lt(qword src0, qword src1) -{ - const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); - - return si_xori(tmp, 0xff); -} - -static INLINE qword -micro_max(qword src0, qword src1) -{ - return si_selb(src1, src0, si_fcgt(src0, src1)); -} - -static INLINE qword -micro_min(qword src0, qword src1) -{ - return si_selb(src0, src1, si_fcgt(src0, src1)); -} - -static qword -micro_neg(qword src) -{ - return si_xor(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_set_sign(qword src) -{ - return si_or(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_pow(qword src0, qword src1) -{ - return (qword) _powf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_rnd(qword src) -{ - const qword half = (qword) spu_splats(0.5f); - - /* May be able to use _roundf4. There may be some difference, though. - */ - return (qword) _floorf4((vec_float4) si_fa(src, half)); -} - -static INLINE qword -micro_ishr(qword src0, qword src1) -{ - return si_rotma(src0, si_sfi(src1, 0)); -} - -static qword -micro_trunc(qword src) -{ - return (qword) _truncf4((vec_float4) src); -} - -static qword -micro_sin(qword src) -{ - return (qword) _sinf4((vec_float4) src); -} - -static INLINE qword -micro_sqrt(qword src) -{ - return (qword) _sqrtf4((vec_float4) src); -} - -static void -fetch_src_file_channel( - const struct spu_exec_machine *mach, - const uint file, - const uint swizzle, - const union spu_exec_channel *index, - union spu_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: { - unsigned char buffer[32] ALIGN16_ATTRIB; - unsigned i; - - for (i = 0; i < 4; i++) { - const float *ptr = mach->Consts[index->i[i]]; - const uint64_t addr = (uint64_t)(uintptr_t) ptr; - const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; - - mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) - + (sizeof(float) * swizzle)], sizeof(float)); - } - break; - } - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - assert( index->i[1] < (int) mach->ImmLimit ); - assert( index->i[2] < (int) mach->ImmLimit ); - assert( index->i[3] < (int) mach->ImmLimit ); - - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - - default: - assert( 0 ); - } -} - -static void -fetch_source( - const struct spu_exec_machine *mach, - union spu_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union spu_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->SrcRegister.Index; - - if (reg->SrcRegister.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, - CHAN_X); - fetch_src_file_channel( - mach, - reg->SrcRegisterInd.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { - case TGSI_FILE_INPUT: - index.q = si_mpyi(index.q, 17); - break; - case TGSI_FILE_CONSTANT: - index.q = si_shli(index.q, 12); - break; - default: - assert( 0 ); - } - - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; - - if (reg->SrcRegisterDim.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterDimInd.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - } - - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->SrcRegister.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - chan->q = micro_abs(chan->q); - break; - - case TGSI_UTIL_SIGN_SET: - chan->q = micro_set_sign(chan->q); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - chan->q = micro_neg(chan->q); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->SrcRegisterExtMod.Complement) { - chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); - } -} - -static void -store_dest( - struct spu_exec_machine *mach, - const union spu_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union spu_exec_channel *dst; - - switch( reg->DstRegister.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; - break; - - default: - assert( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - - default: - assert( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union spu_exec_channel r[1]; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( - &inst->FullSrcRegisters[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct spu_sampler *sampler, - const union spu_exec_channel *s, - const union spu_exec_channel *t, - const union spu_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union spu_exec_channel *r, - union spu_exec_channel *g, - union spu_exec_channel *b, - union spu_exec_channel *a ) -{ - qword rgba[4]; - qword out[4]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); - - _transpose_matrix4x4(out, rgba); - r->q = out[0]; - g->q = out[1]; - b->q = out[2]; - a->q = out[3]; -} - - -static void -exec_tex(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod) -{ - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union spu_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[1], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[1].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - assert (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - - -static void -constant_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -static void -linear_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -static void -perspective_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* interpolation_func)( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration(struct spu_exec_machine *mach, - const struct tgsi_full_declaration *decl) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - interpolation_func interp; - - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Interpolation.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - interp = constant_interpolation; - break; - - case TGSI_INTERPOLATE_LINEAR: - interp = linear_interpolation; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = perspective_interpolation; - break; - - default: - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - interp( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - interp( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union spu_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_cflts(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - - FETCH( &r[2], 0, CHAN_W ); - r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); - r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); - r[1].q = micro_pow(r[1].q, r[2].q); - - /* r0 = (r0 > 0.0) ? r1 : 0.0 - */ - r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, - r[0].q); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sqrt(r[0].q); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - assert (0); - break; - - case TGSI_OPCODE_LOG: - assert (0); - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fm(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fa(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - r[0].q = si_fm(r[0].q, r[1].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_min(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_max(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = micro_ge(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ge(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - FETCH( &r[2], 2, chan_index ); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fs(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - r[1].q = si_fs(r[1].q, r[2].q); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_frc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_flr(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_rnd(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_lg2(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = micro_pow(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - /* r2 = (r0 * r1) - (r3 * r5) - */ - r[2].q = si_fm(r[3].q, r[5].q); - r[2].q = si_fms(r[0].q, r[1].q, r[2].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - FETCH(&r[5], 0, CHAN_X); - - /* r3 = (r3 * r2) - (r1 * r5) - */ - r[1].q = si_fm(r[1].q, r[5].q); - r[3].q = si_fms(r[3].q, r[2].q, r[1].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - /* r5 = (r5 * r4) - (r0 * r2) - */ - r[0].q = si_fm(r[0].q, r[2].q); - r[5].q = si_fms(r[5].q, r[4].q, r[0].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - r[0].q = micro_abs(r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - assert (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 1, CHAN_W); - - r[0].q = si_fa(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_cos(r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddx(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddy(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; - break; - - case TGSI_OPCODE_PK2H: - assert (0); - break; - - case TGSI_OPCODE_PK2US: - assert (0); - break; - - case TGSI_OPCODE_PK4B: - assert (0); - break; - - case TGSI_OPCODE_PK4UB: - assert (0); - break; - - case TGSI_OPCODE_RFL: - assert (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - assert (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fcgt(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sin(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fcgt(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - assert (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - assert (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); - break; - - case TGSI_OPCODE_UP2H: - assert (0); - break; - - case TGSI_OPCODE_UP2US: - assert (0); - break; - - case TGSI_OPCODE_UP4B: - assert (0); - break; - - case TGSI_OPCODE_UP4UB: - assert (0); - break; - - case TGSI_OPCODE_X2D: - assert (0); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - - case TGSI_OPCODE_ARR: - assert (0); - break; - - case TGSI_OPCODE_BRA: - assert (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - assert (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - /* r0 = (r0 < 0.0) ? r1 : r2 - */ - r[3].q = si_xor(r[3].q, r[3].q); - r[0].q = micro_lt(r[0].q, r[3].q); - r[0].q = si_selb(r[1].q, r[2].q, r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - r[1].q = micro_cos(r[0].q); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - r[1].q = micro_sin(r[0].q); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - assert (0); - break; - - case TGSI_OPCODE_DIV: - assert( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - assert(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ceil(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_csflt(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_xorbi(r[0].q, 0xff); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_trunc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_shl(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ishr(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_and(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_or(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - assert (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_xor(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - assert (0); - break; - - case TGSI_OPCODE_TXF: - assert (0); - break; - - case TGSI_OPCODE_TXQ: - assert (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_LOOP: - /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: - /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - if (mach->LoopMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - assert( 0 ); - break; - - case TGSI_OPCODE_NOP: - break; - - default: - assert( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -spu_exec_machine_run( struct spu_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - for (i = 0; i < mach->NumDeclarations; i++) { - uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; - struct tgsi_full_declaration decl; - unsigned long decl_addr = (unsigned long) (mach->Declarations+i); - unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); - - mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); - exec_declaration( mach, &decl ); - } - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; - struct tgsi_full_instruction inst; - unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); - unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); - - assert(pc < mach->NumInstructions); - mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); - wait_on_mask(1 << TAG_INSTRUCTION_FETCH); - - memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); - exec_instruction( mach, & inst, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - diff --git a/src/mesa/pipe/cell/spu/spu_exec.h b/src/mesa/pipe/cell/spu/spu_exec.h deleted file mode 100644 index b4c7661ef6..0000000000 --- a/src/mesa/pipe/cell/spu/spu_exec.h +++ /dev/null @@ -1,172 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#if !defined SPU_EXEC_H -#define SPU_EXEC_H - -#include "pipe/p_compiler.h" -#include "pipe/tgsi/exec/tgsi_exec.h" - -#if defined __cplusplus -extern "C" { -#endif - -/** - * Registers may be treated as float, signed int or unsigned int. - */ -union spu_exec_channel -{ - float f[QUAD_SIZE]; - int i[QUAD_SIZE]; - unsigned u[QUAD_SIZE]; - qword q; -}; - -/** - * A vector[RGBA] of channels[4 pixels] - */ -struct spu_exec_vector -{ - union spu_exec_channel xyzw[NUM_CHANNELS]; -}; - -/** - * For fragment programs, information for computing fragment input - * values from plane equation of the triangle/line. - */ -struct spu_interp_coef -{ - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - -/** - * Information for sampling textures, which must be implemented - * by code outside the TGSI executor. - */ -struct spu_sampler -{ - const struct pipe_sampler_state *state; - struct pipe_texture *texture; - /** Get samples for four fragments in a quad */ - void (*get_samples)(struct spu_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; -}; - - -/** - * Run-time virtual machine state for executing TGSI shader. - */ -struct spu_exec_machine -{ - /* - * 32 program temporaries - * 4 internal temporaries - * 1 address - */ - struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_ADDRS + 1] - ALIGN16_ATTRIB; - - struct spu_exec_vector *Addrs; - - struct spu_sampler *Samplers; - - float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - unsigned ImmLimit; - float (*Consts)[4]; - struct spu_exec_vector *Inputs; - struct spu_exec_vector *Outputs; - unsigned Processor; - - /* GEOMETRY processor only. */ - unsigned *Primitives; - - /* FRAGMENT processor only. */ - const struct spu_interp_coef *InterpCoefs; - struct spu_exec_vector QuadPos; - - /* Conditional execution masks */ - uint CondMask; /**< For IF/ELSE/ENDIF */ - uint LoopMask; /**< For BGNLOOP/ENDLOOP */ - uint ContMask; /**< For loop CONT statements */ - uint FuncMask; /**< For function calls */ - uint ExecMask; /**< = CondMask & LoopMask */ - - /** Condition mask stack (for nested conditionals) */ - uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; - int CondStackTop; - - /** Loop mask stack (for nested loops) */ - uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopStackTop; - - /** Loop continue mask stack (see comments in tgsi_exec.c) */ - uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int ContStackTop; - - /** Function execution mask stack (for executing subroutine code) */ - uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; - int FuncStackTop; - - /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; - int CallStackTop; - - struct tgsi_full_instruction *Instructions; - uint NumInstructions; - - struct tgsi_full_declaration *Declarations; - uint NumDeclarations; -}; - - -extern void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor); - -extern uint -spu_exec_machine_run( struct spu_exec_machine *mach ); - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* SPU_EXEC_H */ diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c deleted file mode 100644 index e375197fe6..0000000000 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ /dev/null @@ -1,567 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* main() for Cell SPU code */ - - -#include <stdio.h> -#include <libmisc.h> - -#include "spu_main.h" -#include "spu_render.h" -#include "spu_texture.h" -#include "spu_tile.h" -//#include "spu_test.h" -#include "spu_vertex_shader.h" -#include "pipe/cell/common.h" -#include "pipe/p_defines.h" - - -/* -helpful headers: -/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h -*/ - -boolean Debug = FALSE; - -struct spu_global spu; - -struct spu_vs_context draw; - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** - * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled - * tiles back to the main framebuffer. - */ -static void -really_clear_tiles(uint surfaceIndex) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (surfaceIndex == 0) { - clear_c_tile(&spu.ctile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - } - } - } - else { - clear_z_tile(&spu.ztile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); - } - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (Debug) - printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, - clear->surface, clear->value); - -#define CLEAR_OPT 1 -#if CLEAR_OPT - /* set all tile's status to CLEAR */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - spu.fb.color_clear_value = clear->value; - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - spu.fb.depth_clear_value = clear->value; - } - return; -#endif - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - clear_c_tile(&spu.ctile); - } - else { - spu.fb.depth_clear_value = clear->value; - clear_z_tile(&spu.ztile); - } - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - /* XXX we don't want this here, but it fixes bad tile results */ - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif - - if (Debug) - printf("SPU %u: CLEAR SURF done\n", spu.init.id); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - if (Debug) - printf("SPU %u: RELEASE VERTS %u\n", - spu.init.id, release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - if (Debug) - printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - spu.init.id, - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - if (spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM) - spu.fb.zsize = 4; - else if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) - spu.fb.zsize = 2; - else - spu.fb.zsize = 0; - - if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) - spu.color_shuffle = ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}); - else - ASSERT(0); -} - - -static void -cmd_state_blend(const struct pipe_blend_state *state) -{ - if (Debug) - printf("SPU %u: BLEND: enabled %d\n", - spu.init.id, - state->blend_enable); - - memcpy(&spu.blend, state, sizeof(*state)); -} - - -static void -cmd_state_depth_stencil(const struct pipe_depth_stencil_alpha_state *state) -{ - if (Debug) - printf("SPU %u: DEPTH_STENCIL: ztest %d\n", - spu.init.id, - state->depth.enabled); - - memcpy(&spu.depth_stencil, state, sizeof(*state)); -} - - -static void -cmd_state_sampler(const struct pipe_sampler_state *state) -{ - if (Debug) - printf("SPU %u: SAMPLER\n", - spu.init.id); - - memcpy(&spu.sampler[0], state, sizeof(*state)); - if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) - spu.sample_texture = sample_texture_bilinear; - else - spu.sample_texture = sample_texture_nearest; -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - if (Debug) - printf("SPU %u: TEXTURE at %p size %u x %u\n", - spu.init.id, texture->start, texture->width, texture->height); - - memcpy(&spu.texture, texture, sizeof(*texture)); - spu.tex_size = (vector float) - { spu.texture.width, spu.texture.height, 0.0, 0.0}; - spu.tex_size_mask = (vector unsigned int) - { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - if (Debug) { - printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, - vinfo->num_attribs); - } - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_ATTRIB_MAX); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.format[attr] = vs_info->format; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_finish(void) -{ - if (Debug) - printf("SPU %u: FINISH\n", spu.init.id); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; - const unsigned usize = size / sizeof(buffer[0]); - uint pos; - - if (Debug) - printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - if (Debug) - printf("SPU %u: release batch buf %u\n", spu.init.id, buf); - release_buffer(buf); - - for (pos = 0; pos < usize; /* no incr */) { - switch (buffer[pos]) { - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 8; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += pos_incr; - } - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; - } - break; - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_STATE_BLEND: - cmd_state_blend((struct pipe_blend_state *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); - break; - case CELL_CMD_STATE_DEPTH_STENCIL: - cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); - break; - case CELL_CMD_STATE_SAMPLER: - cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); - break; - case CELL_CMD_STATE_TEXTURE: - cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); - break; - default: - printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); - ASSERT(0); - break; - } - } - - if (Debug) - printf("SPU %u: BATCH complete\n", spu.init.id); -} - - -/** - * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. - */ -static void -main_loop(void) -{ - struct cell_command cmd; - int exitFlag = 0; - - if (Debug) - printf("SPU %u: Enter main loop\n", spu.init.id); - - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - - while (!exitFlag) { - unsigned opcode; - int tag = 0; - - if (Debug) - printf("SPU %u: Wait for cmd...\n", spu.init.id); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - - if (Debug) - printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); - - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - if (Debug) - printf("SPU %u: EXIT\n", spu.init.id); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: - spu_execute_vertex_shader(&draw, &cmd.vs); - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode!\n"); - } - - } - - if (Debug) - printf("SPU %u: Exit main loop\n", spu.init.id); -} - - - -static void -one_time_init(void) -{ - memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); - memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); - invalidate_tex_cache(); -} - - - -/* In some versions of the SDK the SPE main takes 'unsigned long' as a - * parameter. In others it takes 'unsigned long long'. Use a define to - * select between the two. - */ -#ifdef SPU_MAIN_PARAM_LONG_LONG -typedef unsigned long long main_param_t; -#else -typedef unsigned long main_param_t; -#endif - -/** - * SPE entrypoint. - */ -int -main(main_param_t speid, main_param_t argp) -{ - int tag = 0; - - (void) speid; - - ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); - ASSERT(sizeof(struct cell_command_render) % 8 == 0); - - one_time_init(); - - if (Debug) - printf("SPU: main() speid=%lu\n", speid); - - mfc_get(&spu.init, /* dest */ - (unsigned int) argp, /* src */ - sizeof(struct cell_init_info), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - -#if 0 - if (spu.init.id==0) - spu_test_misc(); -#endif - - main_loop(); - - return 0; -} diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h deleted file mode 100644 index 1710a17512..0000000000 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ /dev/null @@ -1,177 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_MAIN_H -#define SPU_MAIN_H - - -#include <spu_mfcio.h> - -#include "pipe/cell/common.h" -#include "pipe/draw/draw_vertex.h" -#include "pipe/p_state.h" - - - -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 - - -typedef union { - ushort us[TILE_SIZE][TILE_SIZE]; - uint ui[TILE_SIZE][TILE_SIZE]; - vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; - vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; -} tile_t; - - -#define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ -#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ -#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ -#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ - - -struct spu_framebuffer { - void *color_start; /**< addr of color surface in main memory */ - void *depth_start; /**< addr of depth surface in main memory */ - enum pipe_format color_format; - enum pipe_format depth_format; - uint width, height; /**< size in pixels */ - uint width_tiles, height_tiles; /**< width and height in tiles */ - - uint color_clear_value; - uint depth_clear_value; - - uint zsize; /**< 0, 2 or 4 bytes per Z */ -} ALIGN16_ATTRIB; - - -/** - * All SPU global/context state will be in singleton object of this type: - */ -struct spu_global -{ - struct cell_init_info init; - - struct spu_framebuffer fb; - struct pipe_blend_state blend_stencil; - struct pipe_depth_stencil_alpha_state depth_stencil; - struct pipe_blend_state blend; - struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; - struct cell_command_texture texture; - - struct vertex_info vertex_info; - - /* XXX more state to come */ - - - /** current color and Z tiles */ - tile_t ctile ALIGN16_ATTRIB; - tile_t ztile ALIGN16_ATTRIB; - - /** Current tiles' status */ - ubyte cur_ctile_status, cur_ztile_status; - - /** Status of all tiles in framebuffer */ - ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - - - /** for converting RGBA to PIPE_FORMAT_x colors */ - vector unsigned char color_shuffle; - - vector float tex_size; - vector unsigned int tex_size_mask; /**< == int(size - 1) */ - - vector float (*sample_texture)(vector float texcoord); - -} ALIGN16_ATTRIB; - - -extern struct spu_global spu; -extern boolean Debug; - - - - -/* DMA TAGS */ - -#define TAG_SURFACE_CLEAR 10 -#define TAG_VERTEX_BUFFER 11 -#define TAG_READ_TILE_COLOR 12 -#define TAG_READ_TILE_Z 13 -#define TAG_WRITE_TILE_COLOR 14 -#define TAG_WRITE_TILE_Z 15 -#define TAG_INDEX_BUFFER 16 -#define TAG_BATCH_BUFFER 17 -#define TAG_MISC 18 -#define TAG_TEXTURE_TILE 19 -#define TAG_INSTRUCTION_FETCH 20 - - - -static INLINE void -wait_on_mask(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_any(); -} - - -static INLINE void -wait_on_mask_all(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_all(); -} - - - - - -static INLINE void -memset16(ushort *d, ushort value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -static INLINE void -memset32(uint *d, uint value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -#endif /* SPU_MAIN_H */ diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c deleted file mode 100644 index 932fb500b3..0000000000 --- a/src/mesa/pipe/cell/spu/spu_render.c +++ /dev/null @@ -1,301 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include <stdio.h> -#include <libmisc.h> -#include <spu_mfcio.h> - -#include "spu_main.h" -#include "spu_render.h" -#include "spu_tri.h" -#include "spu_tile.h" -#include "pipe/cell/common.h" - - - -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 0 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - if (txmax >= spu.fb.width_tiles) - txmax = spu.fb.width_tiles-1; - if (tymax >= spu.fb.height_tiles) - tymax = spu.fb.height_tiles-1; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, - render->xmin, render->ymin, render->xmax, render->ymax); - printf("SPU %u: tiles: %u, %u .. %u, %u\n", - spu.init.id, *txmin, *tymin, txmax, tymax); - ASSERT(render->xmin <= render->xmax); - ASSERT(render->ymin <= render->ymax); -#endif -} - - -/** Check if the tile at (tx,ty) belongs to this SPU */ -static INLINE boolean -my_tile(uint tx, uint ty) -{ - return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; -} - - -/** - * Start fetching non-clear color/Z tiles from main memory - */ -static INLINE void -get_cz_tiles(uint tx, uint ty) -{ - if (spu.depth_stencil.depth.enabled) { - if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_GETTING; - } - } - - if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_GETTING; - } -} - - -/** - * Start putting dirty color/Z tiles back to main memory - */ -static INLINE void -put_cz_tiles(uint tx, uint ty) -{ - if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ztile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); - } - - if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ctile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); - } -} - - -/** - * Wait for 'put' of color/z tiles to complete. - */ -static INLINE void -wait_put_cz_tiles(void) -{ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ -void -cmd_render(const struct cell_command_render *render, uint *pos_incr) -{ - /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; - const uint vertex_size = render->vertex_size; /* in bytes */ - /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; - uint index_bytes; - const ubyte *vertices; - const ushort *indexes; - uint i, j; - - - if (Debug) { - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ - } - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - - /* indexes are right after the render command in the batch buffer */ - indexes = (const ushort *) (render + 1); - index_bytes = ROUNDUP8(render->num_indexes * 2); - *pos_incr = index_bytes / 8 + sizeof(*render) / 8; - - - if (render->inline_verts) { - /* Vertices are after indexes in batch buffer at next 16-byte addr */ - vertices = (const ubyte *) render + (*pos_incr * 8); - vertices = (const ubyte *) align_pointer((void *) vertices, 16); - ASSERT_ALIGN16(vertices); - *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; - } - else { - /* Begin DMA fetch of vertex buffer */ - ubyte *src = spu.init.buffers[render->vertex_buf]; - ubyte *dest = vertex_data; - - /* skip vertex data we won't use */ -#if 01 - src += render->min_index * vertex_size; - dest += render->min_index * vertex_size; - total_vertex_bytes -= render->min_index * vertex_size; -#endif - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT_ALIGN16(dest); - ASSERT_ALIGN16(src); - - mfc_get(dest, /* in vertex_data[] array */ - (unsigned int) src, /* src in main memory */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - } - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); - - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ - - - /** - ** loop over tiles, rendering tris - **/ - for (i = 0; i < box_num_tiles; i++) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - if (!my_tile(tx, ty)) - continue; - - spu.cur_ctile_status = spu.ctile_status[ty][tx]; - spu.cur_ztile_status = spu.ztile_status[ty][tx]; - - get_cz_tiles(tx, ty); - - uint drawn = 0; - - /* loop over tris */ - for (j = 0; j < render->num_indexes; j += 3) { - const float *v0, *v1, *v2; - - v0 = (const float *) (vertices + indexes[j+0] * vertex_size); - v1 = (const float *) (vertices + indexes[j+1] * vertex_size); - v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - - drawn += tri_draw(v0, v1, v2, tx, ty); - } - - //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); - - /* write color/z tiles back to main framebuffer, if dirtied */ - put_cz_tiles(tx, ty); - - wait_put_cz_tiles(); /* XXX seems unnecessary... */ - - spu.ctile_status[ty][tx] = spu.cur_ctile_status; - spu.ztile_status[ty][tx] = spu.cur_ztile_status; - } - - if (Debug) - printf("SPU %u: RENDER done\n", - spu.init.id); -} - - diff --git a/src/mesa/pipe/cell/spu/spu_render.h b/src/mesa/pipe/cell/spu/spu_render.h deleted file mode 100644 index fbcdc5ec31..0000000000 --- a/src/mesa/pipe/cell/spu/spu_render.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_RENDER_H -#define SPU_RENDER_H - -#include "pipe/cell/common.h" - -extern void -cmd_render(const struct cell_command_render *render, uint *pos_incr); - -#endif /* SPU_RENDER_H */ - diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c deleted file mode 100644 index 3962aaa4a9..0000000000 --- a/src/mesa/pipe/cell/spu/spu_texture.c +++ /dev/null @@ -1,217 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "pipe/p_compiler.h" -#include "spu_main.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_colorpack.h" - - -/** - * Number of texture tiles to cache. - * Note that this will probably be the largest consumer of SPU local store/ - * memory for this driver! - */ -#define CACHE_SIZE 16 - -static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; - -static vector unsigned int tex_tile_xy[CACHE_SIZE]; - - - -/** - * Mark all tex cache entries as invalid. - */ -void -invalidate_tex_cache(void) -{ - /* XXX memset? */ - uint i; - for (i = 0; i < CACHE_SIZE; i++) { - tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); - } -} - - -/** - * Return the cache pos/index which corresponds to tile (tx,ty) - */ -static INLINE uint -cache_pos(vector unsigned int txty) -{ - uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; - return pos; -} - - -/** - * Make sure the tile for texel (i,j) is present, return its position/index - * in the cache. - */ -static uint -get_tex_tile(vector unsigned int ij) -{ - /* tile address: tx,ty */ - const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ - const uint pos = cache_pos(txty); - - if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || - (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { - - /* texture cache miss, fetch tile from main memory */ - const uint tiles_per_row = spu.texture.width / TILE_SIZE; - const uint bytes_per_tile = sizeof(tile_t); - const void *src = (const ubyte *) spu.texture.start - + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; - - printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", - spu.init.id, - spu_extract(txty,0), - spu_extract(txty,1), - pos, - spu_extract(tex_tile_xy[pos],0), - spu_extract(tex_tile_xy[pos],1)); - - ASSERT_ALIGN16(tex_tiles[pos].ui); - ASSERT_ALIGN16(src); - - mfc_get(tex_tiles[pos].ui, /* dest */ - (unsigned int) src, - bytes_per_tile, /* size */ - TAG_TEXTURE_TILE, - 0, /* tid */ - 0 /* rid */); - - wait_on_mask(1 << TAG_TEXTURE_TILE); - - tex_tile_xy[pos] = txty; - } - else { -#if 0 - printf("SPU %u: tex cache HIT at %d, %d\n", - spu.init.id, tx, ty); -#endif - } - - return pos; -} - - -/** - * Get texture sample at texcoord. - * XXX this is extremely primitive for now. - */ -vector float -sample_texture_nearest(vector float texcoord) -{ - vector float tc = spu_mul(texcoord, spu.tex_size); - vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ - vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ - uint pos = get_tex_tile(itc); - uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; - return spu_unpack_A8R8G8B8(texel); -} - - -vector float -sample_texture_bilinear(vector float texcoord) -{ - static const vector unsigned int offset10 = {1, 0, 0, 0}; - static const vector unsigned int offset01 = {0, 1, 0, 0}; - - vector float tc = spu_mul(texcoord, spu.tex_size); - tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ - - /* integer texcoords S,T: */ - vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ - vector unsigned int itc01 = spu_add(itc00, offset01); - vector unsigned int itc10 = spu_add(itc00, offset10); - vector unsigned int itc11 = spu_add(itc10, offset01); - - /* mask (GL_REPEAT) */ - itc00 = spu_and(itc00, spu.tex_size_mask); - itc01 = spu_and(itc01, spu.tex_size_mask); - itc10 = spu_and(itc10, spu.tex_size_mask); - itc11 = spu_and(itc11, spu.tex_size_mask); - - /* intra tile addr */ - vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); - vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); - vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); - vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); - - /* get tile cache positions */ - uint pos00 = get_tex_tile(itc00); - uint pos01, pos10, pos11; - if ((spu_extract(ij00, 0) < TILE_SIZE-1) && - (spu_extract(ij00, 1) < TILE_SIZE-1)) { - /* all texels are in the same tile */ - pos01 = pos10 = pos11 = pos00; - } - else { - pos01 = get_tex_tile(itc01); - pos10 = get_tex_tile(itc10); - pos11 = get_tex_tile(itc11); - } - - /* get texels from tiles and convert to float[4] */ - vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); - vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); - vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); - vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); - - /* Compute weighting factors in [0,1] - * Multiply texcoord by 1024, AND with 1023, convert back to float. - */ - vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); - vector signed int itc1024 = spu_convts(tc1024, 0); - itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); - vector float weight = spu_convtf(itc1024, 10); - - /* smeared frac and 1-frac */ - vector float sfrac = spu_splats(spu_extract(weight, 0)); - vector float tfrac = spu_splats(spu_extract(weight, 1)); - vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); - vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); - - /* multiply the samples (colors) by the S/T weights */ - texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); - texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); - texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); - texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); - - /* compute sum of weighted samples */ - vector float texel_sum = spu_add(texel00, texel01); - texel_sum = spu_add(texel_sum, texel10); - texel_sum = spu_add(texel_sum, texel11); - - return texel_sum; -} diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h deleted file mode 100644 index 95eb87080f..0000000000 --- a/src/mesa/pipe/cell/spu/spu_texture.h +++ /dev/null @@ -1,47 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TEXTURE_H -#define SPU_TEXTURE_H - - -#include "pipe/p_compiler.h" - - -extern void -invalidate_tex_cache(void); - - -extern vector float -sample_texture_nearest(vector float texcoord); - - -extern vector float -sample_texture_bilinear(vector float texcoord); - - -#endif /* SPU_TEXTURE_H */ diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c deleted file mode 100644 index 12dc246328..0000000000 --- a/src/mesa/pipe/cell/spu/spu_tile.c +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#include "spu_tile.h" -#include "spu_main.h" - - -void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - src += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("get_tile: dest: %p src: 0x%x size: %d\n", - tile, (unsigned int) src, bytesPerTile); - */ - mfc_get(tile->ui, /* dest in local memory */ - (unsigned int) src, /* src in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - - -void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - dst += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", - spu.init.id, - tile, (unsigned int) dst, bytesPerTile); - */ - mfc_put((void *) tile->ui, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h deleted file mode 100644 index e53340a55a..0000000000 --- a/src/mesa/pipe/cell/spu/spu_tile.h +++ /dev/null @@ -1,73 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TILE_H -#define SPU_TILE_H - - -#include <libmisc.h> -#include <spu_mfcio.h> -#include "spu_main.h" -#include "pipe/cell/common.h" - - - -void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); - -void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); - - - -static INLINE void -clear_c_tile(tile_t *ctile) -{ - memset32((uint*) ctile->ui, - spu.fb.color_clear_value, - TILE_SIZE * TILE_SIZE); -} - - -static INLINE void -clear_z_tile(tile_t *ztile) -{ - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - memset16((ushort*) ztile->us, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } - else { - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); - memset32((uint*) ztile->ui, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } -} - - -#endif /* SPU_TILE_H */ diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c deleted file mode 100644 index be9624cf7d..0000000000 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ /dev/null @@ -1,926 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Triangle rendering within a tile. - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "pipe/p_util.h" -#include "spu_blend.h" -#include "spu_colorpack.h" -#include "spu_main.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_tri.h" - -#include "spu_ztest.h" - - -/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ -typedef vector unsigned int mask_t; - -typedef union -{ - vector float v; - float f[4]; -} float4; - - -/** - * Simplified types taken from other parts of Gallium - */ -struct vertex_header { - vector float data[1]; -}; - - - -/* XXX fix this */ -#undef CEILF -#define CEILF(X) ((float) (int) ((X) + 0.99999)) - - -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -#define DEBUG_VERTS 0 - -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -struct interp_coef -{ - float4 a0; - float4 dadx; - float4 dady; -}; - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - - uint tx, ty; - - int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; - -#if 0 - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#else - struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#endif - -#if 0 - struct quad_header quad; -#endif - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; -}; - - - -static struct setup_stage setup; - - - - -#if 0 -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) -{ - return (struct setup_stage *)stage; -} -#endif - -#if 0 -/** - * Clip setup.quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup.quad.x0 >= maxx || - setup.quad.y0 >= maxy || - setup.quad.x0 + 1 < minx || - setup.quad.y0 + 1 < miny) { - /* totally clipped */ - setup.quad.mask = 0x0; - return; - } - if (setup.quad.x0 < minx) - setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup.quad.y0 < miny) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup.quad.x0 == maxx - 1) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup.quad.y0 == maxy - 1) - setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} -#endif - -#if 0 -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup.quad.mask) { - struct softpipe_context *sp = setup.softpipe; - sp->quad.first->run(sp->quad.first, &setup.quad); - } -} -#endif - -/** - * Evaluate attribute coefficients (plane equations) to compute - * attribute values for the four fragments in a quad. - * Eg: four colors will be compute. - */ -static INLINE void -eval_coeff(uint slot, float x, float y, vector float result[4]) -{ - switch (spu.vertex_info.interp_mode[slot]) { - case INTERP_CONSTANT: - result[QUAD_TOP_LEFT] = - result[QUAD_TOP_RIGHT] = - result[QUAD_BOTTOM_LEFT] = - result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; - break; - - case INTERP_LINEAR: - /* fall-through, for now */ - default: - { - register vector float dadx = setup.coef[slot].dadx.v; - register vector float dady = setup.coef[slot].dady.v; - register vector float topLeft - = spu_add(setup.coef[slot].a0.v, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); - - result[QUAD_TOP_LEFT] = topLeft; - result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); - result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); - result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); - } - } -} - - -static INLINE vector float -eval_z(float x, float y) -{ - const uint slot = 0; - const float dzdx = setup.coef[slot].dadx.f[2]; - const float dzdy = setup.coef[slot].dady.f[2]; - const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; - const vector float topLeftv = spu_splats(topLeft); - const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; - return spu_add(topLeftv, derivs); -} - - -static INLINE mask_t -do_depth_test(int x, int y, mask_t quadmask) -{ - float4 zvals; - mask_t mask; - - zvals.v = eval_z((float) x, (float) y); - - if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - int ix = (x - setup.cliprect_minx) / 4; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); - } - else { - int ix = (x - setup.cliprect_minx) / 2; - int iy = (y - setup.cliprect_miny) / 2; - mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); - } - - if (spu_extract(spu_orx(mask), 0)) - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - return mask; -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - * Note: about 1/5 to 1/7 of the time, mask is zero and this function - * should be skipped. But adding the test for that slows things down - * overall. - */ -static INLINE void -emit_quad( int x, int y, mask_t mask ) -{ -#if 0 - struct softpipe_context *sp = setup.softpipe; - setup.quad.x0 = x; - setup.quad.y0 = y; - setup.quad.mask = mask; - sp->quad.first->run(sp->quad.first, &setup.quad); -#else - - if (spu.depth_stencil.depth.enabled) { - mask = do_depth_test(x, y, mask); - } - - /* If any bits in mask are set... */ - if (spu_extract(spu_orx(mask), 0)) { - const int ix = x - setup.cliprect_minx; - const int iy = y - setup.cliprect_miny; - const vector unsigned char shuffle = spu.color_shuffle; - vector float colors[4]; - - spu.cur_ctile_status = TILE_STATUS_DIRTY; - - if (spu.texture.start) { - /* texture mapping */ - vector float texcoords[4]; - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture(texcoords[0]); - if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture(texcoords[1]); - if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture(texcoords[2]); - if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture(texcoords[3]); - } - else { - /* simple shading */ - eval_coeff(1, (float) x, (float) y, colors); - } - -#if 1 - if (spu.blend.blend_enable) - blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); -#endif - - if (spu_extract(mask, 0)) - spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); - if (spu_extract(mask, 1)) - spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); - if (spu_extract(mask, 2)) - spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); - if (spu_extract(mask, 3)) - spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); - -#if 0 - /* SIMD_Z with swizzled color buffer (someday) */ - vector unsigned int uicolors = *((vector unsigned int *) &colors); - spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); -#endif - } - -#endif -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~1; -} - - -/** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * The mask is a uint4 vector and each element will be 0 or 0xffffffff. - */ -static INLINE mask_t calculate_mask( int x ) -{ - /* This is a little tricky. - * Use & instead of && to avoid branches. - * Use negation to convert true/false to ~0/0 values. - */ - mask_t mask; - mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); - mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); - mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); - mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); - return mask; -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( void ) -{ - int minleft, maxright; - int x; - - switch (setup.span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup.span.left[0], setup.span.left[1]); - maxright = MAX2(setup.span.right[0], setup.span.right[1]); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = setup.span.left[0]; - maxright = setup.span.right[0]; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = setup.span.left[1]; - maxright = setup.span.right[1]; - break; - - default: - return; - } - - - /* OK, we're very likely to need the tile data now. - * clear or finish waiting if needed. - */ - if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ctile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_COLOR); - spu.cur_ctile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_c_tile(&spu.ctile); - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - - if (spu.depth_stencil.depth.enabled) { - if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ztile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_Z); - spu.cur_ztile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_z_tile(&spu.ztile); - spu.cur_ztile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); - } - - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { -#if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); -#endif - } - - setup.span.y = 0; - setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; -} - -#if DEBUG_VERTS -static void print_vertex(const struct vertex_header *v) -{ - int i; - fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup.quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); - } -} -#endif - - -static boolean setup_sort_vertices(const struct vertex_header *v0, - const struct vertex_header *v1, - const struct vertex_header *v2) -{ - -#if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); -#endif - - setup.vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = spu_extract(v0->data[0], 1); - float y1 = spu_extract(v1->data[0], 1); - float y2 = spu_extract(v2->data[0], 1); - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup.vmin = v0; - setup.vmid = v1; - setup.vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup.vmin = v2; - setup.vmid = v0; - setup.vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup.vmin = v0; - setup.vmid = v2; - setup.vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup.vmin = v1; - setup.vmid = v0; - setup.vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup.vmin = v2; - setup.vmid = v1; - setup.vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup.vmin = v1; - setup.vmid = v2; - setup.vmax = v0; - } - } - } - - /* Check if triangle is completely outside the tile bounds */ - if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) - return FALSE; - if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) - return FALSE; - if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && - spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && - spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) - return FALSE; - if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && - spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && - spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) - return FALSE; - - setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); - setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup.emaj.dx * setup.ebot.dy - - setup.ebot.dx * setup.emaj.dy); - - setup.oneoverarea = 1.0f / area; - /* - _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup.oneoverarea, area, prim->det ); - */ - } - -#if 0 - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); -#endif - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot]. - * The result will be put into setup.coef[slot].a0. - * \param slot which attribute slot - */ -static INLINE void -const_coeff(uint slot) -{ - setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0.v = setup.vprovoke->data[slot]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static INLINE void -tri_linear_coeff(uint slot, uint firstComp, uint lastComp) -{ - uint i; - const float *vmin_d = (float *) &setup.vmin->data[slot]; - const float *vmid_d = (float *) &setup.vmid->data[slot]; - const float *vmax_d = (float *) &setup.vmax->data[slot]; - const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; - const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - - for (i = firstComp; i < lastComp; i++) { - float botda = vmid_d[i] - vmin_d[i]; - float majda = vmax_d[i] - vmin_d[i]; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup.coef[slot].a0.f[i] = (vmin_d[i] - - (setup.coef[slot].dadx.f[i] * x + - setup.coef[slot].dady.f[i] * y)); - } - - /* - _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup.coef[slot].a0[i], - setup.coef[slot].dadx.f[i], - setup.coef[slot].dady.f[i]); - */ -} - - -/** - * As above, but interp setup all four vector components. - */ -static INLINE void -tri_linear_coeff4(uint slot) -{ - const vector float vmin_d = setup.vmin->data[slot]; - const vector float vmid_d = setup.vmid->data[slot]; - const vector float vmax_d = setup.vmax->data[slot]; - const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); - const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); - - vector float botda = vmid_d - vmin_d; - vector float majda = vmax_d - vmin_d; - - vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), - spu_mul(botda, spu_splats(setup.emaj.dy))); - vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), - spu_mul(majda, spu_splats(setup.ebot.dx))); - - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); - - vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); - - setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); -} - - - -#if 0 -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( unsigned slot, - unsigned i ) -{ - /* premultiply by 1/w: - */ - float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; - float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; - float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; - - float botda = mida - mina; - float majda = maxa - mina; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup.vmin->data[slot][i], - setup.vmid->data[slot][i], - setup.vmax->data[slot][i] - ); - */ - - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); - - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - setup.coef[slot].a0.f[i] = (mina - - (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); -} -#endif - - -/** - * Compute the setup.coef[] array dadx, dady, a0 values. - * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients(void) -{ -#if 1 - uint i; - - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.interp_mode[i]) { - case INTERP_NONE: - break; - case INTERP_POS: - /*tri_linear_coeff(i, 2, 3);*/ - /* XXX interp W if PERSPECTIVE... */ - tri_linear_coeff4(i); - break; - case INTERP_CONSTANT: - const_coeff(i); - break; - case INTERP_LINEAR: - tri_linear_coeff4(i); - break; - case INTERP_PERSPECTIVE: - tri_linear_coeff4(i); /* temporary */ - break; - default: - ASSERT(0); - } - } -#else - ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); - ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || - spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); - tri_linear_coeff(0, 2, 3); /* slot 0, z */ - tri_linear_coeff(1, 0, 4); /* slot 1, color */ -#endif -} - - -static void setup_tri_edges(void) -{ - float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; - float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; - - float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; - float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; - - setup.emaj.sy = CEILF(vmin_y); - setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); - setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; - setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; - - setup.etop.sy = CEILF(vmid_y); - setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); - setup.etop.dxdy = setup.etop.dx / setup.etop.dy; - setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; - - setup.ebot.sy = CEILF(vmin_y); - setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); - setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; - setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const int minx = setup.cliprect_minx; - const int maxx = setup.cliprect_maxx; - const int miny = setup.cliprect_miny; - const int maxy = setup.cliprect_maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - ASSERT((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup.span.y) { - flush_spans(); - setup.span.y = block(_y); - } - - setup.span.left[_y&1] = left; - setup.span.right[_y&1] = right; - setup.span.y_flags |= 1<<(_y&1); - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Draw triangle into tile at (tx, ty) (tile coords) - * The tile data should have already been fetched. - */ -boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) -{ - setup.tx = tx; - setup.ty = ty; - - /* set clipping bounds to tile bounds */ - setup.cliprect_minx = tx * TILE_SIZE; - setup.cliprect_miny = ty * TILE_SIZE; - setup.cliprect_maxx = (tx + 1) * TILE_SIZE; - setup.cliprect_maxy = (ty + 1) * TILE_SIZE; - - if (!setup_sort_vertices((struct vertex_header *) v0, - (struct vertex_header *) v1, - (struct vertex_header *) v2)) { - return FALSE; /* totally clipped */ - } - - setup_tri_coefficients(); - setup_tri_edges(); - - setup.span.y = 0; - setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; - /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup.oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); - subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); - subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); - } - - flush_spans(); - - return TRUE; -} diff --git a/src/mesa/pipe/cell/spu/spu_tri.h b/src/mesa/pipe/cell/spu/spu_tri.h deleted file mode 100644 index aa694dd7c9..0000000000 --- a/src/mesa/pipe/cell/spu/spu_tri.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_TRI_H -#define SPU_TRI_H - - -extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); - - -#endif /* SPU_TRI_H */ diff --git a/src/mesa/pipe/cell/spu/spu_util.c b/src/mesa/pipe/cell/spu/spu_util.c deleted file mode 100644 index ac373240c1..0000000000 --- a/src/mesa/pipe/cell/spu/spu_util.c +++ /dev/null @@ -1,165 +0,0 @@ -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" -//#include "tgsi_build.h" -#include "pipe/tgsi/util/tgsi_util.h" - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->SwizzleX; - case 1: - return reg->SwizzleY; - case 2: - return reg->SwizzleZ; - case 3: - return reg->SwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_full_src_register_extswizzle( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, - component ); - - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - component ); - } - - return swizzle; -} - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - assert( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned sign_mode; - - if( reg->SrcRegisterExtMod.Absolute ) { - /* Consider only the post-abs negation. */ - - if( reg->SrcRegisterExtMod.Negate ) { - sign_mode = TGSI_UTIL_SIGN_SET; - } - else { - sign_mode = TGSI_UTIL_SIGN_CLEAR; - } - } - else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { - sign_mode = TGSI_UTIL_SIGN_TOGGLE; - } - else { - sign_mode = TGSI_UTIL_SIGN_KEEP; - } - } - - return sign_mode; -} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c deleted file mode 100644 index 6e86a919ce..0000000000 --- a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c +++ /dev/null @@ -1,393 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include <spu_mfcio.h> -#include <transpose_matrix4x4.h> - -#include "pipe/p_util.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "spu_exec.h" -#include "spu_vertex_shader.h" -#include "spu_main.h" - - -#define DRAW_DBG 0 - - -static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 }; - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT ) \ -static qword \ -fetch_##NAME(const void *ptr) \ -{ \ - vec_float4 attrib = defaults; \ - int i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib = spu_insert(CVT, attrib, i); \ - } \ - return (qword) attrib; \ -} - -#define CVT_64_FLOAT (float) ((double *) ptr)[i] -#define CVT_32_FLOAT ((float *) ptr)[i] - -#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] - -#define CVT_8_SSCALED (float) ((char *) ptr)[i] -#define CVT_16_SSCALED (float) ((short *) ptr)[i] -#define CVT_32_SSCALED (float) ((int *) ptr)[i] - -#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f - -#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f - -FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) -FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) -FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) -FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) -FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) -FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) -FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) -FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) -FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) -FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) -FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) -FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) -FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) -FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) -FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) -//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) - - - -static spu_fetch_func get_fetch_func( enum pipe_format format ) -{ -#if 0 - { - char tmp[80]; - pf_sprint_name(tmp, format); - _mesa_printf("%s: %s\n", __FUNCTION__, tmp); - } -#endif - - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - assert(0); - return NULL; - } -} - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - assert(count <= 4); - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - -#if DRAW_DBG - printf("SPU: %s count = %u, nr_attrs = %u\n", - __FUNCTION__, count, nr_attrs); -#endif - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; - unsigned i; - qword p[4]; - - - /* Fetch four attributes for four vertices. - * - * Could fetch directly into AOS format, but this is meant to be - * a prototype for an sse implementation, which would have - * difficulties doing that. - */ - for (i = 0; i < count; i++) { - uint8_t buffer[32] ALIGN16_ATTRIB; - const uint64_t addr = src + (elts[i] * pitch); - const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; - -#if DRAW_DBG - printf("SPU: fetching = 0x%llx\n", addr); -#endif - mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - p[i] = (*fetch)(buffer + (addr & 0x0f)); - } - - /* Be nice and zero out any missing vertices: - */ - for (/* empty */; i < 4; i++) - p[i] = si_xor(p[i], p[i]); - - /* Transpose/swizzle into vector-friendly format. Currently - * assuming that all vertex shader inputs are float[4], but this - * isn't true -- if the vertex shader only wants tex0.xy, we - * could optimize for that. - * - * To do so fully without codegen would probably require an - * excessive number of fetch functions, but we could at least - * minimize the transpose step: - */ - _transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p); - } -} - - -void spu_update_vertex_fetch( struct spu_vs_context *draw ) -{ - unsigned i; - - - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - draw->vertex_fetch.fetch[i] = - get_fetch_func(draw->vertex_fetch.format[i]); - } - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; -} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.c b/src/mesa/pipe/cell/spu/spu_vertex_shader.c deleted file mode 100644 index c1cbbb6d1e..0000000000 --- a/src/mesa/pipe/cell/spu/spu_vertex_shader.c +++ /dev/null @@ -1,231 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * Brian Paul - * Ian Romanick <idr@us.ibm.com> - */ - -#include <spu_mfcio.h> - -#include "pipe/p_util.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "spu_vertex_shader.h" -#include "spu_exec.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_context.h" -#include "pipe/cell/common.h" -#include "spu_main.h" - -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} - - -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static void -run_vertex_program(struct spu_vs_context *draw, - unsigned elts[4], unsigned count, - const uint64_t *vOut) -{ - struct spu_exec_machine *machine = &draw->machine; - unsigned int j; - - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - - machine->Processor = TGSI_PROCESSOR_VERTEX; - - ASSERT_ALIGN16(draw->constants); - machine->Consts = (float (*)[4]) draw->constants; - - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); - - spu_vertex_fetch( draw, machine, elts, count ); - - /* run shader */ - spu_exec_machine_run( machine ); - - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; - struct vertex_header *const tmpOut = - (struct vertex_header *) buffer; - const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) - + (sizeof(float) * 4 - * draw->num_vs_outputs)); - - mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, - draw->nr_planes); - tmpOut->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - tmpOut->data[0][0] = x * scale[0] + trans[0]; - tmpOut->data[0][1] = y * scale[1] + trans[1]; - tmpOut->data[0][2] = z * scale[2] + trans[2]; - tmpOut->data[0][3] = w; - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - - mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - } /* loop over vertices */ -} - - -static void -spu_bind_vertex_shader(struct spu_vs_context *draw, - void *uniforms, - void *planes, - unsigned nr_planes, - unsigned num_outputs - ) -{ - draw->constants = (float (*)[4]) uniforms; - - (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); - draw->nr_planes = nr_planes; - draw->num_vs_outputs = num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); -} - - -unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] - ALIGN16_ATTRIB; - -void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) -{ - unsigned i; - - const uint64_t immediate_addr = vs->shader.immediates; - const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) - + (immediate_addr & 0x0f)); - - mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, - TAG_VERTEX_BUFFER, 0, 0); - - draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->shader.instructions; - draw->machine.NumInstructions = vs->shader.num_instructions; - - draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->shader.declarations; - draw->machine.NumDeclarations = vs->shader.num_declarations; - - draw->vertex_fetch.nr_attrs = vs->nr_attrs; - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->shader.num_immediates); - - spu_bind_vertex_shader(draw, vs->shader.uniforms, - vs->plane, vs->nr_planes, - vs->shader.num_outputs); - - for (i = 0; i < vs->num_elts; i += 4) { - const unsigned batch_size = MIN2(vs->num_elts - i, 4); - - run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); - } -} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h deleted file mode 100644 index c96b93ff0a..0000000000 --- a/src/mesa/pipe/cell/spu/spu_vertex_shader.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef SPU_VERTEX_SHADER_H -#define SPU_VERTEX_SHADER_H - -#include "pipe/p_format.h" -#include "spu_exec.h" - -struct spu_vs_context; - -typedef qword (*spu_fetch_func)(const void *ptr); -typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -struct spu_vs_context { - struct pipe_viewport_state viewport; - - struct { - uint64_t src_ptr[PIPE_ATTRIB_MAX]; - unsigned pitch[PIPE_ATTRIB_MAX]; - enum pipe_format format[PIPE_ATTRIB_MAX]; - unsigned nr_attrs; - boolean dirty; - - spu_fetch_func fetch[PIPE_ATTRIB_MAX]; - spu_full_fetch_func fetch_func; - } vertex_fetch; - - /* Clip derived state: - */ - float plane[12][4]; - unsigned nr_planes; - - struct spu_exec_machine machine; - const float (*constants)[4]; - - unsigned num_vs_outputs; -}; - -extern void spu_update_vertex_fetch(struct spu_vs_context *draw); - -static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - if (draw->vertex_fetch.dirty) { - spu_update_vertex_fetch(draw); - draw->vertex_fetch.dirty = 0; - } - - (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); -} - -struct cell_command_vs; - -extern void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs); - -#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/mesa/pipe/cell/spu/spu_ztest.h b/src/mesa/pipe/cell/spu/spu_ztest.h deleted file mode 100644 index ce8ad00339..0000000000 --- a/src/mesa/pipe/cell/spu/spu_ztest.h +++ /dev/null @@ -1,135 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Zbuffer/depth test code. - */ - - -#ifndef SPU_ZTEST_H -#define SPU_ZTEST_H - - -#ifdef __SPU__ -#include <spu_intrinsics.h> -#endif - - - -/** - * Perform Z testing for a 16-bit/value Z buffer. - * - * \param zvals vector of four fragment zvalues as floats - * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this - * contains the Z values for 2 quads, 8 pixels. - * \param x x coordinate of quad (only lsbit is significant) - * \param inMask indicates which fragments in the quad are alive - * \return new mask indicating which fragments are alive after ztest - */ -static INLINE vector unsigned int -spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, - uint x, vector unsigned int inMask) -{ -#define ZERO 0x80 - vector unsigned int zvals_ui4, zbuf_ui4, mask; - - /* convert floats to uints in [0, 65535] */ - zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ - zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ - - /* XXX this conditional could be removed with a bit of work */ - if (x & 1) { - /* convert zbuffer values from ushorts to uints */ - /* gather lower four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, - ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 16, 17, 18, 19, 20, 21, 22, 23, - 2, 3, 6, 7, 10, 11, 14, 15})); - } - else { - /* convert zbuffer values from ushorts to uints */ - /* gather upper four ushorts */ - zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, - (vector unsigned int) *zbuf, - ((vector unsigned char) { - ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, - ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); - /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf_ui4, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); - /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ - *zbuf = (vector unsigned short) - spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, - ((vector unsigned char) { - 2, 3, 6, 7, 10, 11, 14, 15, - 24, 25, 26, 27, 28, 29, 30, 31})); - } - return mask; -#undef ZERO -} - - -/** - * As above, but Zbuffer values as 32-bit uints - */ -static INLINE vector unsigned int -spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, - vector unsigned int inMask) -{ - vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; - - /* convert floats to uints in [0, 0xffffffff] */ - zvals_ui4 = spu_convtu(zvals, 32); - /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ - mask = spu_cmpgt(zbuf, zvals_ui4); - /* mask &= inMask */ - mask = spu_and(mask, inMask); - /* zbuf = mask ? zval : zbuf */ - *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); - - return mask; -} - - -#endif /* SPU_ZTEST_H */ |