summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/i965/brw_vs_emit.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vs_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c299
1 files changed, 245 insertions, 54 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 0d6c6ab9a8..d7f75e3685 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -38,14 +38,49 @@
#include "brw_vs.h"
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+ struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+ if (++c->last_tmp > c->prog_data.total_grf)
+ c->prog_data.total_grf = c->last_tmp;
+
+ return tmp;
+}
-/* Do things as simply as possible. Allocate and populate all regs
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+ if (tmp.nr == c->last_tmp-1)
+ c->last_tmp--;
+}
+
+static void release_tmps( struct brw_vs_compile *c )
+{
+ c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible. Allocate and populate all regs
* ahead of time.
*/
static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
- GLuint nr_params;
+
+ /* Determine whether to use a real constant buffer or use a block
+ * of GRF registers for constants. The later is faster but only
+ * works if everything fits in the GRF.
+ * XXX this heuristic/check may need some fine tuning...
+ */
+ if (c->vp->program.Base.Parameters->NumParameters +
+ c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+ c->vp->use_const_buffer = GL_TRUE;
+ else
+ c->vp->use_const_buffer = GL_FALSE;
+
+ /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* r0 -- reserved as usual
*/
@@ -66,13 +101,22 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Vertex program parameters from curbe:
*/
- nr_params = c->vp->program.Base.Parameters->NumParameters;
- for (i = 0; i < nr_params; i++) {
- c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
- }
- reg += (nr_params + 1) / 2;
+ if (c->vp->use_const_buffer) {
+ /* get constants from a real constant buffer */
+ c->prog_data.curb_read_length = 0;
+ c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+ }
+ else {
+ /* use a section of the GRF for constants */
+ GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+ for (i = 0; i < nr_params; i++) {
+ c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+ }
+ reg += (nr_params + 1) / 2;
+ c->prog_data.curb_read_length = reg - 1;
- c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = nr_params * 4;
+ }
/* Allocate input regs:
*/
@@ -133,6 +177,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
reg++;
}
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ c->current_const[i].reg = brw_vec8_grf(reg, 0);
+ reg++;
+ }
+ }
+
for (i = 0; i < 128; i++) {
if (c->output_regs[i].used_in_src) {
c->output_regs[i].reg = brw_vec8_grf(reg, 0);
@@ -165,28 +217,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
}
-static struct brw_reg get_tmp( struct brw_vs_compile *c )
-{
- struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
-
- if (++c->last_tmp > c->prog_data.total_grf)
- c->prog_data.total_grf = c->last_tmp;
-
- return tmp;
-}
-
-static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
-{
- if (tmp.nr == c->last_tmp-1)
- c->last_tmp--;
-}
-
-static void release_tmps( struct brw_vs_compile *c )
-{
- c->last_tmp = c->first_tmp;
-}
-
-
/**
* If an instruction uses a temp reg both as a src and the dest, we
* sometimes need to allocate an intermediate temporary.
@@ -633,6 +663,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
}
brw_ENDIF(p, if_insn);
+
+ release_tmp(c, tmp);
}
static void emit_lrp_noalias(struct brw_vs_compile *c,
@@ -673,13 +705,84 @@ static void emit_nrm( struct brw_vs_compile *c,
}
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex)
+{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
+ struct brw_compile *p = &c->func;
+ struct brw_reg const_reg;
+ struct brw_reg const2_reg;
+ const GLboolean relAddr = src->RelAddr;
+
+ assert(argIndex < 3);
+
+ if (c->current_const[argIndex].index != src->Index || relAddr) {
+ struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+ c->current_const[argIndex].index = src->Index;
+
+#if 0
+ printf(" fetch const[%d] for arg %d into reg %d\n",
+ src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+ /* need to fetch the constant now */
+ brw_dp_READ_4_vs(p,
+ c->current_const[argIndex].reg,/* writeback dest */
+ 0, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */
+ );
+
+ if (relAddr) {
+ /* second read */
+ const2_reg = get_tmp(c);
+
+ /* use upper half of address reg for second read */
+ addrReg = stride(addrReg, 0, 4, 0);
+ addrReg.subnr = 16;
+
+ brw_dp_READ_4_vs(p,
+ const2_reg, /* writeback dest */
+ 1, /* oword */
+ relAddr, /* relative indexing? */
+ addrReg, /* address register */
+ 16 * src->Index, /* byte offset */
+ SURF_INDEX_VERT_CONST_BUFFER
+ );
+ }
+ }
+
+ const_reg = c->current_const[argIndex].reg;
+
+ if (relAddr) {
+ /* merge the two Owords into the constant register */
+ /* const_reg[7..4] = const2_reg[7..4] */
+ brw_MOV(p,
+ suboffset(stride(const_reg, 0, 4, 1), 4),
+ suboffset(stride(const2_reg, 0, 4, 1), 4));
+ release_tmp(c, const2_reg);
+ }
+ else {
+ /* replicate lower four floats into upper half (to get XYZWXYZW) */
+ const_reg = stride(const_reg, 0, 4, 0);
+ const_reg.subnr = 0;
+ }
+
+ return const_reg;
+}
+
+
+
/* TODO: relative addressing!
*/
static struct brw_reg get_reg( struct brw_vs_compile *c,
gl_register_file file,
GLuint index )
{
-
switch (file) {
case PROGRAM_TEMPORARY:
case PROGRAM_INPUT:
@@ -708,13 +811,17 @@ static struct brw_reg get_reg( struct brw_vs_compile *c,
}
+/**
+ * Indirect addressing: get reg[[arg] + offset].
+ */
static struct brw_reg deref( struct brw_vs_compile *c,
struct brw_reg arg,
GLint offset)
{
struct brw_compile *p = &c->func;
struct brw_reg tmp = vec4(get_tmp(c));
- struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
+ struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+ struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -735,10 +842,67 @@ static struct brw_reg deref( struct brw_vs_compile *c,
brw_pop_insn_state(p);
}
+ /* NOTE: tmp not released */
return vec8(tmp);
}
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+ const struct prog_instruction *inst,
+ GLuint argIndex )
+{
+ const GLuint file = inst->SrcReg[argIndex].File;
+ const GLint index = inst->SrcReg[argIndex].Index;
+ const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+ switch (file) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_INPUT:
+ case PROGRAM_OUTPUT:
+ if (relAddr) {
+ return deref(c, c->regs[file][0], index);
+ }
+ else {
+ assert(c->regs[file][index].nr != 0);
+ return c->regs[file][index];
+ }
+
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM:
+ if (c->vp->use_const_buffer) {
+ return get_constant(c, inst, argIndex);
+ }
+ else if (relAddr) {
+ return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+ }
+ else {
+ assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+ return c->regs[PROGRAM_STATE_VAR][index];
+ }
+ case PROGRAM_ADDRESS:
+ assert(index == 0);
+ return c->regs[file][index];
+
+ case PROGRAM_UNDEFINED:
+ /* this is a normal case since we loop over all three src args */
+ return brw_null_reg();
+
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_WRITE_ONLY:
+ default:
+ assert(0);
+ return brw_null_reg();
+ }
+}
+
+
static void emit_arl( struct brw_vs_compile *c,
struct brw_reg dst,
struct brw_reg arg0 )
@@ -750,30 +914,31 @@ static void emit_arl( struct brw_vs_compile *c,
if (need_tmp)
tmp = get_tmp(c);
- brw_RNDD(p, tmp, arg0);
- brw_MUL(p, dst, tmp, brw_imm_d(16));
+ brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */
+ brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */
if (need_tmp)
release_tmp(c, tmp);
}
-/* Will return mangled results for SWZ op. The emit_swz() function
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op. The emit_swz() function
* ignores this result and recalculates taking extended swizzles into
* account.
*/
static struct brw_reg get_arg( struct brw_vs_compile *c,
- struct prog_src_register *src )
+ const struct prog_instruction *inst,
+ GLuint argIndex )
{
+ const struct prog_src_register *src = &inst->SrcReg[argIndex];
struct brw_reg reg;
if (src->File == PROGRAM_UNDEFINED)
return brw_null_reg();
- if (src->RelAddr)
- reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
- else
- reg = get_reg(c, src->File, src->Index);
+ reg = get_src_reg(c, inst, argIndex);
/* Convert 3-bit swizzle to 2-bit.
*/
@@ -784,16 +949,38 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
/* Note this is ok for non-swizzle instructions:
*/
- reg.negate = src->NegateBase ? 1 : 0;
+ reg.negate = src->Negate ? 1 : 0;
return reg;
}
+/**
+ * Get brw register for the given program dest register.
+ */
static struct brw_reg get_dst( struct brw_vs_compile *c,
struct prog_dst_register dst )
{
- struct brw_reg reg = get_reg(c, dst.File, dst.Index);
+ struct brw_reg reg;
+
+ switch (dst.File) {
+ case PROGRAM_TEMPORARY:
+ case PROGRAM_OUTPUT:
+ assert(c->regs[dst.File][dst.Index].nr != 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_ADDRESS:
+ assert(dst.Index == 0);
+ reg = c->regs[dst.File][dst.Index];
+ break;
+ case PROGRAM_UNDEFINED:
+ /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+ reg = brw_null_reg();
+ break;
+ default:
+ assert(0);
+ reg = brw_null_reg();
+ }
reg.dw1.bits.writemask = dst.WriteMask;
@@ -803,14 +990,16 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
static void emit_swz( struct brw_vs_compile *c,
struct brw_reg dst,
- struct prog_src_register src )
+ const struct prog_instruction *inst)
{
+ const GLuint argIndex = 0;
+ const struct prog_src_register src = inst->SrcReg[argIndex];
struct brw_compile *p = &c->func;
GLuint zeros_mask = 0;
GLuint ones_mask = 0;
GLuint src_mask = 0;
GLubyte src_swz[4];
- GLboolean need_tmp = (src.NegateBase &&
+ GLboolean need_tmp = (src.Negate &&
dst.file != BRW_GENERAL_REGISTER_FILE);
struct brw_reg tmp = dst;
GLuint i;
@@ -844,10 +1033,7 @@ static void emit_swz( struct brw_vs_compile *c,
if (src_mask) {
struct brw_reg arg0;
- if (src.RelAddr)
- arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
- else
- arg0 = get_reg(c, src.File, src.Index);
+ arg0 = get_src_reg(c, inst, argIndex);
arg0 = brw_swizzle(arg0,
src_swz[0], src_swz[1],
@@ -862,8 +1048,8 @@ static void emit_swz( struct brw_vs_compile *c,
if (ones_mask)
brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
- if (src.NegateBase)
- brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp));
+ if (src.Negate)
+ brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
if (need_tmp) {
brw_MOV(p, dst, tmp);
@@ -1039,21 +1225,26 @@ void brw_vs_emit(struct brw_vs_compile *c )
for (insn = 0; insn < nr_insns; insn++) {
- struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+ const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
struct brw_reg args[3], dst;
GLuint i;
+#if 0
+ printf("%d: ", insn);
+ _mesa_print_instruction(inst);
+#endif
+
/* Get argument regs. SWZ is special and does this itself.
*/
if (inst->Opcode != OPCODE_SWZ)
for (i = 0; i < 3; i++) {
- struct prog_src_register *src = &inst->SrcReg[i];
+ const struct prog_src_register *src = &inst->SrcReg[i];
index = src->Index;
file = src->File;
if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
args[i] = c->output_regs[index].reg;
else
- args[i] = get_arg(c, src);
+ args[i] = get_arg(c, inst, i);
}
/* Get dest regs. Note that it is possible for a reg to be both
@@ -1181,7 +1372,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
/* The args[0] value can't be used here as it won't have
* correctly encoded the full swizzle:
*/
- emit_swz(c, dst, inst->SrcReg[0] );
+ emit_swz(c, dst, inst);
break;
case OPCODE_TRUNC:
/* round toward zero */