summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-10-09 19:48:53 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-10-09 19:51:26 -0600
commit583098e3cb602fd9810a7c65718155fd9b0b3fda (patch)
treea2d61dcc36d8d40ac53b27c81119b9c49a80c236
parentf6e806a2b8c3e54ac694810616e79924dfd84826 (diff)
cell: implement basic TXP instruction in fragment shaders
Lots of restrictions for now (one 2D texture, no mipmaps, etc.) for now but basic texture demos work. TEX, TXD, TXP do the same thing for the time being.
-rw-r--r--src/gallium/drivers/cell/ppu/cell_gen_fp.c109
-rw-r--r--src/gallium/drivers/cell/spu/spu_funcs.c51
-rw-r--r--src/gallium/drivers/cell/spu/spu_tri.c2
3 files changed, 138 insertions, 24 deletions
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 5647bb23e6..c8125a8a05 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -226,6 +226,11 @@ get_src_reg(struct codegen *gen,
spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
}
break;
+ case TGSI_FILE_SAMPLER:
+ {
+ reg = 3; /* XXX total hack */
+ }
+ break;
default:
assert(0);
}
@@ -1162,6 +1167,21 @@ print_functions(struct cell_context *cell)
#endif
+static uint
+lookup_function(struct cell_context *cell, const char *funcname)
+{
+ const struct cell_spu_function_info *funcs = &cell->spu_functions;
+ uint i, addr = 0;
+ for (i = 0; i < funcs->num; i++) {
+ if (strcmp(funcs->names[i], funcname) == 0) {
+ addr = funcs->addrs[i];
+ }
+ }
+ assert(addr && "spu function not found");
+ return addr / 4; /* discard 2 least significant bits */
+}
+
+
/**
* Emit code to call a SPU function.
* Used to implement instructions like SIN/COS/POW/TEX/etc.
@@ -1171,27 +1191,12 @@ emit_function_call(struct codegen *gen,
const struct tgsi_full_instruction *inst,
char *funcname, uint num_args)
{
- const struct cell_spu_function_info *funcs = &gen->cell->spu_functions;
+ const uint addr = lookup_function(gen->cell, funcname);
char comment[100];
- uint addr;
int ch;
assert(num_args <= 3);
- /* lookup function address */
- {
- uint i;
- addr = 0;
- for (i = 0; i < funcs->num; i++) {
- if (strcmp(funcs->names[i], funcname) == 0) {
- addr = funcs->addrs[i];
- }
- }
- assert(addr && "spu function not found");
- }
-
- addr /= 4; /* discard 2 least significant bits */
-
snprintf(comment, sizeof(comment), "CALL %s:", funcname);
spe_comment(gen->f, -4, comment);
@@ -1245,6 +1250,72 @@ emit_function_call(struct codegen *gen,
}
+static boolean
+emit_TXP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+{
+ const uint addr = lookup_function(gen->cell, "spu_txp");
+ int ch;
+ int coord_regs[4], d_regs[4];
+
+ spe_comment(gen->f, -4, "CALL txp:");
+
+ /* get src/dst reg info */
+ for (ch = 0; ch < 4; ch++) {
+ coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+ d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+ }
+
+ {
+ ubyte usedRegs[SPE_NUM_REGS];
+ uint i, numUsed;
+
+ numUsed = spe_get_registers_used(gen->f, usedRegs);
+ assert(numUsed < gen->frame_size / 16 - 32);
+
+ /* save registers to stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ int offset = 2 + i;
+ spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+
+ /* setup function arguments */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, 3 + i, coord_regs[i]);
+ }
+
+ /* branch to function, save return addr */
+ spe_brasl(gen->f, SPE_REG_RA, addr);
+
+ /* save function's return values (four pixel's colors) */
+ for (i = 0; i < 4; i++) {
+ spe_move(gen->f, d_regs[i], 3 + i);
+ }
+
+ /* restore registers from stack */
+ for (i = 0; i < numUsed; i++) {
+ uint reg = usedRegs[i];
+ if (reg != d_regs[0] &&
+ reg != d_regs[1] &&
+ reg != d_regs[2] &&
+ reg != d_regs[3]) {
+ int offset = 2 + i;
+ spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
+ }
+ }
+ }
+
+ for (ch = 0; ch < 4; ch++) {
+ if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
+ store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+ free_itemps(gen);
+ }
+ }
+
+ return TRUE;
+}
+
+
/**
* Emit max. See emit_SGT for comments.
*/
@@ -1483,6 +1554,12 @@ emit_instruction(struct codegen *gen,
return emit_function_call(gen, inst, "spu_exp2", 1);
case TGSI_OPCODE_LOGBASE2:
return emit_function_call(gen, inst, "spu_log2", 1);
+ case TGSI_OPCODE_TEX:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXD:
+ /* fall-through for now */
+ case TGSI_OPCODE_TXP:
+ return emit_TXP(gen, inst);
case TGSI_OPCODE_IF:
return emit_IF(gen, inst);
diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c
index 1adf9de0e8..c7bcb3de9d 100644
--- a/src/gallium/drivers/cell/spu/spu_funcs.c
+++ b/src/gallium/drivers/cell/spu/spu_funcs.c
@@ -38,12 +38,20 @@
#include <math.h>
#include <cos14_v.h>
#include <sin14_v.h>
+#include <transpose_matrix4x4.h>
#include "cell/common.h"
#include "spu_main.h"
#include "spu_funcs.h"
+/** For "return"-ing four vectors */
+struct vec_4x4
+{
+ vector float v[4];
+};
+
+
static vector float
spu_cos(vector float x)
{
@@ -92,16 +100,44 @@ spu_log2(vector float x)
return spu_mul(v, k);
}
+static struct vec_4x4
+spu_txp(vector float s, vector float t, vector float r, vector float q)
+{
+ const uint unit = 0;
+ struct vec_4x4 colors;
+ vector float coords[4];
+
+ coords[0] = s;
+ coords[1] = t;
+ coords[2] = r;
+ coords[3] = q;
+ _transpose_matrix4x4(coords, coords);
+
+ /* get four texture samples */
+ colors.v[0] = spu.sample_texture[unit](unit, coords[0]);
+ colors.v[1] = spu.sample_texture[unit](unit, coords[1]);
+ colors.v[2] = spu.sample_texture[unit](unit, coords[2]);
+ colors.v[3] = spu.sample_texture[unit](unit, coords[3]);
+
+ _transpose_matrix4x4(colors.v, colors.v);
+ return colors;
+}
+
+/**
+ * Add named function to list of "exported" functions that will be
+ * made available to the PPU-hosted code generator.
+ */
static void
-add_func(struct cell_spu_function_info *spu_functions,
- const char *name, void *addr)
+export_func(struct cell_spu_function_info *spu_functions,
+ const char *name, void *addr)
{
uint n = spu_functions->num;
ASSERT(strlen(name) < 16);
strcpy(spu_functions->names[n], name);
spu_functions->addrs[n] = (uint) addr;
spu_functions->num++;
+ ASSERT(spu_functions->num <= 16);
}
@@ -119,11 +155,12 @@ return_function_info(void)
ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
funcs.num = 0;
- add_func(&funcs, "spu_cos", &spu_cos);
- add_func(&funcs, "spu_sin", &spu_sin);
- add_func(&funcs, "spu_pow", &spu_pow);
- add_func(&funcs, "spu_exp2", &spu_exp2);
- add_func(&funcs, "spu_log2", &spu_log2);
+ export_func(&funcs, "spu_cos", &spu_cos);
+ export_func(&funcs, "spu_sin", &spu_sin);
+ export_func(&funcs, "spu_pow", &spu_pow);
+ export_func(&funcs, "spu_exp2", &spu_exp2);
+ export_func(&funcs, "spu_log2", &spu_log2);
+ export_func(&funcs, "spu_txp", &spu_txp);
/* Send the function info back to the PPU / main memory */
mfc_put((void *) &funcs, /* src in local store */
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 6039cd80b2..87991c3136 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -286,7 +286,7 @@ emit_quad( int x, int y, mask_t mask)
spu.cur_ctile_status = TILE_STATUS_DIRTY;
spu.cur_ztile_status = TILE_STATUS_DIRTY;
- if (spu.texture[0].start) {
+ if (0/*spu.texture[0].start*/) {
/*
* Temporary texture mapping path
* This will go away when fragment programs support TEX inst.