summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-05-28 23:54:18 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-05-28 23:54:18 +0100
commit728d1f7f43b6db9f4f42c2d16ba223c492d1147d (patch)
treed7cc98010797d0fde137aa2d065a75198cacc24c /src/gallium
parent648da5158e5f418bf859aee6aa4532b6899b0d94 (diff)
draw: enable FSE by default
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c11
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.h4
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c105
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.h34
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos_io.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c14
6 files changed, 129 insertions, 64 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 75f44d503e..d48c6c220d 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw,
if (opt == 0)
middle = draw->pt.middle.fetch_emit;
- else if (opt == PT_SHADE && draw->pt.test_fse)
+ else if (opt == PT_SHADE)
middle = draw->pt.middle.fetch_shade_emit;
else
middle = draw->pt.middle.general;
@@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw )
if (!draw->pt.middle.fetch_emit)
return FALSE;
- if (draw->pt.test_fse) {
- draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
- if (!draw->pt.middle.fetch_shade_emit)
- return FALSE;
- }
-
+ draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw );
+ if (!draw->pt.middle.fetch_shade_emit)
+ return FALSE;
draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw );
if (!draw->pt.middle.general)
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 01171bc23d..7aa0415baf 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -123,6 +123,10 @@ struct draw_vertex_shader {
struct tgsi_shader_info info;
+ /* Extracted from shader:
+ */
+ const float (*immediates)[4];
+
/*
*/
struct draw_vs_varient *varient[16];
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 0cd82ff599..9056785e7a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a,
a.disp == b.disp);
}
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned value )
+{
+ if (cp->ebp != value) {
+ unsigned offset;
+
+ switch (value) {
+ case X86_IMMEDIATES:
+ offset = Offset(struct aos_machine, immediates);
+ break;
+ case X86_CONSTANTS:
+ offset = Offset(struct aos_machine, constants);
+ break;
+ case X86_ATTRIBS:
+ offset = Offset(struct aos_machine, attrib);
+ break;
+ default:
+ assert(0);
+ offset = 0;
+ }
+
+ x86_mov(cp->func, cp->temp_EBP,
+ x86_make_disp(cp->machine_EDX, offset));
+ /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */
+
+ cp->ebp = value;
+ }
+
+ return cp->temp_EBP;
+}
+
static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
unsigned file,
@@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp,
case TGSI_FILE_TEMPORARY:
return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx]));
- case TGSI_FILE_IMMEDIATE:
- return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx]));
-
- case TGSI_FILE_CONSTANT:
- return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx]));
-
case AOS_FILE_INTERNAL:
return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx]));
+ case TGSI_FILE_IMMEDIATE:
+ return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float));
+
+ case TGSI_FILE_CONSTANT:
+ return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float));
+
default:
ERROR(cp, "unknown reg file");
return x86_make_reg(0,0);
@@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp )
}
+#if 0
static boolean note_immediate( struct aos_compilation *cp,
struct tgsi_full_immediate *imm )
{
@@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp,
return TRUE;
}
+#endif
@@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
cp.machine_EDX = x86_make_reg(file_REG32, reg_DX);
cp.count_ESI = x86_make_reg(file_REG32, reg_SI);
+ cp.temp_EBP = x86_make_reg(file_REG32, reg_BP);
x86_init_func(cp.func);
@@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
x86_push(cp.func, cp.idx_EBX);
x86_push(cp.func, cp.count_ESI);
+ x86_push(cp.func, cp.temp_EBP);
/* Load arguments into regs:
@@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
switch (parse.FullToken.Token.Type) {
case TGSI_TOKEN_TYPE_IMMEDIATE:
+#if 0
if (!note_immediate( &cp, &parse.FullToken.FullImmediate ))
goto fail;
+#endif
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
@@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient,
if (cp.func->need_emms)
mmx_emms(cp.func);
+ x86_pop(cp.func, cp.temp_EBP);
x86_pop(cp.func, cp.count_ESI);
x86_pop(cp.func, cp.idx_EBX);
@@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient,
for (i = 0; i < vaos->base.key.nr_inputs; i++) {
if (vaos->base.key.element[i].in.buffer == buf) {
- vaos->machine->attrib[i].input_ptr = ((char *)ptr +
- vaos->base.key.element[i].in.offset);
- vaos->machine->attrib[i].input_stride = stride;
+ vaos->attrib[i].input_ptr = ((char *)ptr +
+ vaos->base.key.element[i].in.offset);
+ vaos->attrib[i].input_stride = stride;
}
}
}
-static void vaos_destroy( struct draw_vs_varient *varient )
-{
- struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
-
- if (vaos->machine)
- align_free( vaos->machine );
-
- x86_release_func( &vaos->func[0] );
- x86_release_func( &vaos->func[1] );
-
- FREE(vaos);
-}
static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
const unsigned *elts,
@@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ vaos->machine->constants = vaos->draw->pt.user.constants;
+ vaos->machine->immediates = vaos->base.vs->immediates;
+ vaos->machine->attrib = vaos->attrib;
+
vaos->gen_run_elts( varient,
elts,
count,
@@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient,
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size;
+ vaos->machine->constants = vaos->draw->pt.user.constants;
+ vaos->machine->immediates = vaos->base.vs->immediates;
+ vaos->machine->attrib = vaos->attrib;
+
vaos->gen_run_linear( varient,
start,
count,
@@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient,
{
struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
- memcpy(vaos->machine->constant,
- constants,
- (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float));
-
#if 0
unsigned i;
for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++)
@@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient,
memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float));
}
+static void vaos_destroy( struct draw_vs_varient *varient )
+{
+ struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient;
+
+ if (vaos->machine)
+ align_free( vaos->machine );
+
+ FREE( vaos->attrib );
+
+ x86_release_func( &vaos->func[0] );
+ x86_release_func( &vaos->func[1] );
+
+ FREE(vaos);
+}
+
static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
@@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
vaos->base.run_elts = vaos_run_elts;
vaos->draw = vs->draw;
+
+ vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) );
+ if (!vaos->attrib)
+ goto fail;
+
vaos->machine = align_malloc( sizeof(struct aos_machine), 16 );
if (!vaos->machine)
goto fail;
@@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs,
return &vaos->base;
fail:
- if (vaos->machine)
+ if (vaos && vaos->attrib)
+ FREE(vaos->attrib);
+
+ if (vaos && vaos->machine)
align_free( vaos->machine );
if (vaos)
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h
index 837b32794f..295d2cb3fe 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.h
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.h
@@ -78,6 +78,14 @@ struct lit_info {
#define MAX_SHINE_TAB 4
#define MAX_LIT_INFO 16
+struct aos_attrib {
+ const void *input_ptr;
+ unsigned input_stride;
+};
+
+
+
+
/* This is the temporary storage used by all the aos_sse vs varients.
* Create one per context and reuse by passing a pointer in at
* vs_varient creation??
@@ -86,8 +94,6 @@ struct aos_machine {
float input [MAX_INPUTS ][4];
float output [MAX_OUTPUTS ][4];
float temp [MAX_TEMPS ][4];
- float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */
- float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */
float internal [MAX_INTERNALS ][4];
float scale[4]; /* viewport */
@@ -105,12 +111,10 @@ struct aos_machine {
ushort fpu_restore;
ushort fpucntl; /* one of FPU_* above */
- struct {
- const void *input_ptr;
- unsigned input_stride;
+ const float (*immediates)[4]; /* points to shader data */
+ const float (*constants)[4]; /* points to draw data */
- unsigned output_offset;
- } attrib[PIPE_MAX_ATTRIBS];
+ const struct aos_attrib *attrib; /* points to ? */
};
@@ -132,6 +136,7 @@ struct aos_compilation {
unsigned last_used;
} xmm[8];
+ unsigned ebp; /* one of X86_* */
boolean input_fetched[PIPE_MAX_ATTRIBS];
unsigned output_last_write[PIPE_MAX_ATTRIBS];
@@ -148,6 +153,7 @@ struct aos_compilation {
struct x86_reg outbuf_ECX;
struct x86_reg machine_EDX;
struct x86_reg count_ESI; /* decrements to zero */
+ struct x86_reg temp_EBP;
};
struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
@@ -192,20 +198,20 @@ do { \
} while (0)
+#define X86_NULL 0
+#define X86_IMMEDIATES 1
+#define X86_CONSTANTS 2
+#define X86_ATTRIBS 3
-
+struct x86_reg aos_get_x86( struct aos_compilation *cp,
+ unsigned value );
struct draw_vs_varient_aos_sse {
struct draw_vs_varient base;
struct draw_context *draw;
-#if 0
- struct {
- const void *ptr;
- unsigned stride;
- } attrib[PIPE_MAX_ATTRIBS];
-#endif
+ struct aos_attrib *attrib;
struct aos_machine *machine; /* XXX: temporarily unshared */
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
index 836110f382..45e2092209 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c
@@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
-static void get_src_ptr( struct x86_function *func,
+static void get_src_ptr( struct aos_compilation *cp,
struct x86_reg src,
- struct x86_reg machine,
struct x86_reg elt,
unsigned a )
{
- struct x86_reg input_ptr =
- x86_make_disp(machine,
- Offset(struct aos_machine, attrib[a].input_ptr));
+ struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ),
+ a * sizeof(struct aos_attrib));
- struct x86_reg input_stride =
- x86_make_disp(machine,
- Offset(struct aos_machine, attrib[a].input_stride));
+ struct x86_reg input_ptr = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_ptr));
+
+ struct x86_reg input_stride = x86_make_disp(attrib,
+ Offset(struct aos_attrib, input_stride));
/* Calculate pointer to current attrib:
*/
- x86_mov(func, src, input_stride);
- x86_imul(func, src, elt);
- x86_add(func, src, input_ptr);
+ x86_mov(cp->func, src, input_stride);
+ x86_imul(cp->func, src, elt);
+ x86_add(cp->func, src, input_ptr);
}
@@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp,
/* Figure out source pointer address:
*/
- get_src_ptr(cp->func,
+ get_src_ptr(cp,
src,
- cp->machine_EDX,
linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
idx);
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 7781782ae8..24f619a278 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -68,8 +68,6 @@ struct draw_sse_vertex_shader {
codegen_function func;
struct tgsi_exec_machine *machine;
-
- float immediates[TGSI_EXEC_NUM_IMMEDIATES][4];
};
@@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Outputs,
(float (*)[4])constants,
machine->Temps,
- shader->immediates,
+ (float (*)[4])shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base )
x86_release_func( &shader->sse2_program );
+ align_free(shader->base.immediates);
+
FREE( (void*) shader->base.state.tokens );
FREE( shader );
}
@@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.prepare = vs_sse_prepare;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+
+ vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
+ sizeof(float), 16);
+
vs->machine = &draw->vs.machine;
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates, TRUE ))
+ &vs->sse2_program,
+ (float (*)[4])vs->base.immediates,
+ TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );