summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>2009-10-19 18:17:45 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2009-10-19 18:25:09 +0200
commitec5c23551cdb4c369d8f8f392208f4d4bf29911b (patch)
treecb4121abcd4a576aac94795c548cd9452fa00eb6
parenteb7ea97e7fff1ee39921ad81294c4963b5b3ded8 (diff)
nv50: add support for address regs
Allow indirect uniform access and increase the limit on parameters from 128 to 512.
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c178
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c10
2 files changed, 175 insertions, 13 deletions
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bfd979ce0f..c7145bb9be 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -32,6 +32,7 @@
#include "nv50_context.h"
#define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_ADDR 7
//#define NV50_PROGRAM_DUMP
/* ARL - gallium craps itself on progs/vp/arl.txt
@@ -79,7 +80,8 @@ struct nv50_reg {
P_ATTR,
P_RESULT,
P_CONST,
- P_IMMD
+ P_IMMD,
+ P_ADDR
} type;
int index;
@@ -99,6 +101,7 @@ struct nv50_pc {
/* hw resources */
struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+ struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
/* tgsi resources */
struct nv50_reg *temp;
@@ -112,6 +115,8 @@ struct nv50_pc {
struct nv50_reg *immd;
float *immd_buf;
int immd_nr;
+ struct nv50_reg **addr;
+ int addr_nr;
struct nv50_reg *temp_temp[16];
unsigned temp_temp_nr;
@@ -159,6 +164,17 @@ popcnt4(uint32_t val)
}
static void
+terminate_mbb(struct nv50_pc *pc)
+{
+ int i;
+
+ /* remove records of temporary address register values */
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+ if (pc->r_addr[i].index < 0)
+ pc->r_addr[i].rhw = -1;
+}
+
+static void
alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
{
int i = 0;
@@ -454,9 +470,68 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
e->inst[1] |= (val >> 6) << 2;
}
+static void
+emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ assert(val <= 0xffff);
+ e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9);
+ e->inst[1] = 0x20000000;
+ e->inst[0] |= dst->hw << 2;
+ set_long(pc, e);
+
+ emit(pc, e);
+}
+
+static struct nv50_reg *
+alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+ int i;
+ struct nv50_reg *a = NULL;
+
+ if (!ref) {
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+ if (pc->r_addr[i].index >= 0)
+ continue;
+ if (pc->r_addr[i].rhw >= 0 &&
+ pc->r_addr[i].acc == pc->insn_cur)
+ continue;
+
+ pc->r_addr[i].rhw = -1;
+ pc->r_addr[i].index = i;
+ return &pc->r_addr[i];
+ }
+ assert(0);
+ return NULL;
+ }
+
+ for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
+ if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
+ continue;
+ if (pc->r_addr[i].rhw < 0) { /* unused */
+ a = &pc->r_addr[i];
+ continue;
+ }
+ if (!a && pc->r_addr[i].acc != pc->insn_cur)
+ a = &pc->r_addr[i];
+
+ if (ref->hw - pc->r_addr[i].rhw < 128) {
+ /* alloc'd & suitable */
+ pc->r_addr[i].acc = pc->insn_cur;
+ return &pc->r_addr[i];
+ }
+ }
+ assert(a);
+ emit_set_addr(pc, a, ref->hw * 4);
+
+ a->rhw = ref->hw % 128;
+ a->acc = pc->insn_cur;
+ return a;
+}
#define INTERP_LINEAR 0
-#define INTERP_FLAT 1
+#define INTERP_FLAT 1
#define INTERP_PERSPECTIVE 2
#define INTERP_CENTROID 4
@@ -488,6 +563,16 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
emit(pc, e);
}
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+ assert(!(e->inst[0] & 0x0c000000));
+ assert(!(e->inst[1] & 0x00000004));
+
+ e->inst[0] |= (a->hw & 3) << 26;
+ e->inst[1] |= (a->hw >> 2) << 2;
+}
+
static void
set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
struct nv50_program_exec *e)
@@ -498,6 +583,14 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
e->param.shift = s;
e->param.mask = m << (s % 32);
+ if (src->hw > 127)
+ set_addr(e, alloc_addr(pc, src));
+ else
+ if (src->acc < 0) {
+ assert(src->type == P_CONST);
+ set_addr(e, pc->addr[src->index]);
+ }
+
e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
}
@@ -632,7 +725,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
alloc_reg(pc, src);
- e->inst[0] |= (src->hw << 16);
+ e->inst[0] |= ((src->hw & 127) << 16);
}
static void
@@ -660,7 +753,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
}
alloc_reg(pc, src);
- e->inst[1] |= (src->hw << 14);
+ e->inst[1] |= ((src->hw & 127) << 14);
}
static void
@@ -723,6 +816,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
}
static void
+emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+ uint8_t s)
+{
+ struct nv50_program_exec *e = exec(pc);
+
+ set_long(pc, e);
+ e->inst[1] |= 0xc0000000;
+
+ e->inst[0] |= dst->hw << 2;
+ e->inst[0] |= s << 16; /* shift left */
+ set_src_0_restricted(pc, src, e);
+
+ emit(pc, e);
+}
+
+static void
emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
struct nv50_reg *src0, struct nv50_reg *src1)
{
@@ -1403,6 +1512,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
return &pc->temp[dst->DstRegister.Index * 4 + c];
case TGSI_FILE_OUTPUT:
return &pc->result[dst->DstRegister.Index * 4 + c];
+ case TGSI_FILE_ADDRESS:
+ {
+ struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c];
+ if (!r) {
+ r = alloc_addr(pc, NULL);
+ pc->addr[dst->DstRegister.Index * 4 + c] = r;
+ }
+ assert(r);
+ return r;
+ }
case TGSI_FILE_NULL:
return NULL;
default:
@@ -1418,7 +1537,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
{
struct nv50_reg *r = NULL;
struct nv50_reg *temp;
- unsigned sgn, c;
+ unsigned sgn, c, swz;
+
+ if (src->SrcRegister.File != TGSI_FILE_CONSTANT)
+ assert(!src->SrcRegister.Indirect);
sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
@@ -1436,13 +1558,29 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
r = &pc->temp[src->SrcRegister.Index * 4 + c];
break;
case TGSI_FILE_CONSTANT:
- r = &pc->param[src->SrcRegister.Index * 4 + c];
+ if (!src->SrcRegister.Indirect) {
+ r = &pc->param[src->SrcRegister.Index * 4 + c];
+ break;
+ }
+ /* Indicate indirection by setting r->acc < 0 and
+ * use the index field to select the address reg.
+ */
+ r = MALLOC_STRUCT(nv50_reg);
+ swz = tgsi_util_get_src_register_swizzle(
+ &src->SrcRegisterInd, 0);
+ ctor_reg(r, P_CONST,
+ src->SrcRegisterInd.Index * 4 + swz, c);
+ r->acc = -1;
break;
case TGSI_FILE_IMMEDIATE:
r = &pc->immd[src->SrcRegister.Index * 4 + c];
break;
case TGSI_FILE_SAMPLER:
break;
+ case TGSI_FILE_ADDRESS:
+ r = pc->addr[src->SrcRegister.Index * 4 + c];
+ assert(r);
+ break;
default:
assert(0);
break;
@@ -1678,8 +1816,15 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_add(pc, dst[c], src[0][c], src[1][c]);
}
break;
+ case TGSI_OPCODE_ARL:
+ assert(src[0][0]);
+ temp = temp_temp(pc);
+ emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+ emit_arl(pc, dst[0], temp, 4);
+ break;
case TGSI_OPCODE_BGNLOOP:
pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+ terminate_mbb(pc);
break;
case TGSI_OPCODE_BRK:
emit_branch(pc, -1, 0, NULL);
@@ -1763,6 +1908,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_branch(pc, -1, 0, NULL);
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ terminate_mbb(pc);
break;
case TGSI_OPCODE_ENDIF:
pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -1775,6 +1921,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
pc->br_join[pc->if_lvl] = NULL;
}
+ terminate_mbb(pc);
/* emit a NOP as join point, we could set it on the next
* one, but would have to make sure it is long and !immd
*/
@@ -1785,6 +1932,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
emit_branch(pc, -1, 0, NULL);
pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+ terminate_mbb(pc);
break;
case TGSI_OPCODE_EX2:
emit_preex2(pc, temp, src[0][0]);
@@ -1812,6 +1960,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
set_pred_wr(pc, 1, 0, pc->if_cond);
emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+ terminate_mbb(pc);
break;
case TGSI_OPCODE_KIL:
emit_kil(pc, src[0][0]);
@@ -1989,6 +2138,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
src[i][c]->neg = 0;
if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
FREE(src[i][c]);
+ else
+ if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
+ FREE(src[i][c]); /* indirect constant */
}
}
@@ -2332,8 +2484,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
pc->interp_mode[i] = mode;
}
break;
+ case TGSI_FILE_ADDRESS:
case TGSI_FILE_CONSTANT:
- break;
case TGSI_FILE_SAMPLER:
break;
default:
@@ -2527,6 +2679,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+ pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
+ assert(pc->addr_nr <= 2);
p->cfg.high_temp = 4;
@@ -2595,6 +2749,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
ctor_reg(&pc->param[rid], P_CONST, i, rid);
}
+ if (pc->addr_nr) {
+ pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
+ if (!pc->addr)
+ return FALSE;
+ }
+ for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+ ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
return TRUE;
}
@@ -2774,7 +2936,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
p->immd_nr, NV50_CB_PMISC);
}
- assert(p->param_nr <= 128);
+ assert(p->param_nr <= 512);
if (p->param_nr) {
unsigned cb;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 66361dc3ba..0bd5487695 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -301,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_data (so, 8);
/* constant buffers for immediates and VP/FP parameters */
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
&screen->constbuf_misc[0]);
if (ret) {
nv50_screen_destroy(pscreen);
@@ -309,7 +309,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
for (i = 0; i < 2; i++) {
- ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
&screen->constbuf_parm[i]);
if (ret) {
nv50_screen_destroy(pscreen);
@@ -318,8 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
}
if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
- nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
- nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+ nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
+ nouveau_resource_init(&screen->parm_heap[1], 0, 512))
{
NOUVEAU_ERR("Error initialising constant buffers.\n");
nv50_screen_destroy(pscreen);
@@ -340,7 +340,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PMISC << 16) | 0x00000800);
+ so_data (so, (NV50_CB_PMISC << 16) | 0x00000200);
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
so_data (so, 0x00000001 | (NV50_CB_PMISC << 12));
so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);