summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/draw
diff options
context:
space:
mode:
authorKeith Whitwell <keith@tungstengraphics.com>2008-04-17 23:44:32 +0100
committerKeith Whitwell <keith@tungstengraphics.com>2008-04-18 10:48:54 +0100
commita773f06e969a3992451dd7fe6fd55ea96b2774fa (patch)
tree7b8f2ef0bf53da7312c8a89774a5159a87c90e76 /src/gallium/auxiliary/draw
parent01b6354e72a84f8c3c22be1f77eab8d9c05920a3 (diff)
draw: split off all the extra functionality in the vertex shader
This will at least allow us to make the initial gains to get decent vertex performance much more quickly & with higher confidence of getting it right. At some later point can look again at code-generating all the fetch/cliptest/viewport extras in the same block as the vertex shader. For now, just need to get some decent baseline performance.
Diffstat (limited to 'src/gallium/auxiliary/draw')
-rw-r--r--src/gallium/auxiliary/draw/Makefile4
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c26
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h51
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c16
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch.c175
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c109
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_pipeline.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c202
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c82
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c39
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c38
13 files changed, 635 insertions, 118 deletions
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 836e98f086..154c8a99b5 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -20,8 +20,10 @@ C_SOURCES = \
draw_pt_fetch_emit.c \
draw_pt_fetch_pipeline.c \
draw_pt_fetch_shade_pipeline.c \
- draw_pt_pipeline.c \
+ draw_pt_fetch.c \
+ draw_pt_post_vs.c \
draw_pt_emit.c \
+ draw_pt_pipeline.c \
draw_pt_elts.c \
draw_prim.c \
draw_pstipple.c \
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 9b2dcc0b57..4838b68ed1 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -110,6 +110,12 @@ struct draw_context *draw_create( void )
tgsi_exec_machine_init(&draw->machine);
+ /* FIXME: give this machine thing a proper constructor:
+ */
+ draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+ draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
+
+
if (!draw_pt_init( draw ))
goto fail;
@@ -155,8 +161,13 @@ void draw_destroy( struct draw_context *draw )
if (draw->pipeline.rasterize)
draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+ if (draw->machine.Inputs)
+ align_free(draw->machine.Inputs);
+ if (draw->machine.Outputs)
+ align_free(draw->machine.Outputs);
tgsi_exec_machine_free_data(&draw->machine);
-
+
+
if (draw->vs.vertex_cache)
align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */
@@ -265,6 +276,7 @@ draw_set_vertex_elements(struct draw_context *draw,
draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
memcpy(draw->vertex_element, elements, count * sizeof(elements[0]));
+ draw->nr_vertex_elements = count;
}
@@ -463,15 +475,3 @@ boolean draw_get_edgeflag( struct draw_context *draw,
return 1;
}
-
-#if 0
-/* Crufty init function. Fix me.
- */
-boolean draw_init_machine( struct draw_context *draw )
-{
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- machine->Outputs = ALIGN16_ASSIGN(outputs);
-}
-#endif
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 9407217fd3..da94e69781 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -224,6 +224,8 @@ struct draw_context
unsigned nr_vertex_buffers;
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+ unsigned nr_vertex_elements;
+
struct draw_vertex_shader *vertex_shader;
boolean identity_viewport;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 31d18ec62b..316289969b 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -112,6 +112,7 @@ struct draw_pt_middle_end {
* mode...
*/
struct vbuf_render;
+struct vertex_header;
/* Helper functions.
@@ -132,25 +133,25 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *d
*/
void draw_pt_run_pipeline( struct draw_context *draw,
unsigned prim,
- char *verts,
- unsigned vertex_stride,
+ struct vertex_header *verts,
unsigned vertex_count,
+ unsigned vertex_stride,
const ushort *elts,
unsigned count );
-/* HW vertex emit:
+/*******************************************************************************
+ * HW vertex emit:
*/
struct pt_emit;
void draw_pt_emit_prepare( struct pt_emit *emit,
- unsigned prim,
- unsigned opt );
+ unsigned prim );
void draw_pt_emit( struct pt_emit *emit,
- char *verts,
- unsigned stride,
+ const float (*vertex_data)[4],
unsigned vertex_count,
+ unsigned stride,
const ushort *elts,
unsigned count );
@@ -159,6 +160,42 @@ void draw_pt_emit_destroy( struct pt_emit *emit );
struct pt_emit *draw_pt_emit_create( struct draw_context *draw );
+/*******************************************************************************
+ * API vertex fetch:
+ */
+
+struct pt_fetch;
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ boolean emit_header,
+ unsigned vertex_size );
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts );
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch );
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw );
+
+/*******************************************************************************
+ * Post-VS: cliptest, rhw, viewport
+ */
+struct pt_post_vs;
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned stride,
+ unsigned count );
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl );
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index e9ed29450a..ef9db70a02 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -38,16 +38,11 @@ struct pt_emit {
struct draw_context *draw;
struct translate *translate;
-
- unsigned pipeline_vertex_size;
- unsigned prim;
- unsigned opt;
};
void draw_pt_emit_prepare( struct pt_emit *emit,
- unsigned prim,
- unsigned opt )
+ unsigned prim )
{
struct draw_context *draw = emit->draw;
const struct vertex_info *vinfo;
@@ -75,8 +70,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (sizeof(struct vertex_header) +
- vinfo->src_index[i] * 4 * sizeof(float) );
+ unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) );
@@ -139,9 +133,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit,
void draw_pt_emit( struct pt_emit *emit,
- char *verts,
- unsigned stride,
+ const float (*vertex_data)[4],
unsigned vertex_count,
+ unsigned stride,
const ushort *elts,
unsigned count )
{
@@ -164,7 +158,7 @@ void draw_pt_emit( struct pt_emit *emit,
translate->set_buffer(translate,
0,
- verts,
+ vertex_data,
stride );
translate->set_buffer(translate,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
new file mode 100644
index 0000000000..a7553023b8
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -0,0 +1,175 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+#include "translate/translate.h"
+
+
+struct pt_fetch {
+ struct draw_context *draw;
+
+ struct translate *translate;
+
+ unsigned vertex_size;
+};
+
+
+
+/* Perform the fetch from API vertex elements & vertex buffers, to a
+ * contiguous set of float[4] attributes as required for the
+ * vertex_shader->run_linear() method.
+ *
+ * This is used in all cases except pure passthrough
+ * (draw_pt_fetch_emit.c) which has its own version to translate
+ * directly to hw vertices.
+ *
+ */
+void draw_pt_fetch_prepare( struct pt_fetch *fetch,
+ boolean emit_header,
+ unsigned vertex_size )
+{
+ struct draw_context *draw = fetch->draw;
+ unsigned i, nr = 0;
+ unsigned dst_offset = 0;
+ struct translate_key key;
+
+ fetch->vertex_size = vertex_size;
+
+ memset(&key, 0, sizeof(key));
+
+ /* If PT_SHADE is not set, then we are creating post-shader
+ * vertices, meaning that we need to emit/leave space for a vertex
+ * header.
+ *
+ * It's worth considering whether the vertex headers should contain
+ * a pointer to the 'data', rather than having it inline.
+ * Something to look at after we've fully switched over to the pt
+ * paths.
+ */
+ if (emit_header)
+ {
+ /* Need to set header->vertex_id = 0xffff somehow.
+ */
+ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].input_buffer = draw->nr_vertex_buffers;
+ key.element[nr].input_offset = 0;
+ key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+ dst_offset += 1 * sizeof(float);
+ nr++;
+
+
+ /* Just leave the clip[] array untouched.
+ */
+ dst_offset += 4 * sizeof(float);
+ }
+
+
+ for (i = 0; i < draw->nr_vertex_elements; i++) {
+ key.element[nr].input_format = draw->vertex_element[i].src_format;
+ key.element[nr].input_buffer = draw->vertex_element[i].vertex_buffer_index;
+ key.element[nr].input_offset = draw->vertex_element[i].src_offset;
+ key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ key.element[nr].output_offset = dst_offset;
+
+ dst_offset += 4 * sizeof(float);
+ nr++;
+ }
+
+ assert(dst_offset <= vertex_size);
+
+ key.nr_elements = nr;
+ key.output_stride = vertex_size;
+
+
+ /* Don't bother with caching at this stage:
+ */
+ if (!fetch->translate ||
+ memcmp(&fetch->translate->key, &key, sizeof(key)) != 0)
+ {
+ if (fetch->translate)
+ fetch->translate->release(fetch->translate);
+
+ fetch->translate = translate_generic_create( &key );
+
+ if (emit_header) {
+ static struct vertex_header vh = { 0, 0, 0, 0xffff };
+ fetch->translate->set_buffer(fetch->translate,
+ draw->nr_vertex_buffers,
+ &vh,
+ 0);
+ }
+ }
+}
+
+
+
+
+void draw_pt_fetch_run( struct pt_fetch *fetch,
+ const unsigned *elts,
+ unsigned count,
+ char *verts )
+{
+ struct draw_context *draw = fetch->draw;
+ struct translate *translate = fetch->translate;
+ unsigned i;
+
+ for (i = 0; i < draw->nr_vertex_buffers; i++) {
+ translate->set_buffer(translate,
+ i,
+ ((char *)draw->user.vbuffer[i] +
+ draw->vertex_buffer[i].buffer_offset),
+ draw->vertex_buffer[i].pitch );
+ }
+
+ translate->run_elts( translate,
+ elts,
+ count,
+ verts );
+}
+
+
+struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw )
+{
+ struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch);
+ if (!fetch)
+ return NULL;
+
+ fetch->draw = draw;
+ return fetch;
+}
+
+void draw_pt_fetch_destroy( struct pt_fetch *fetch )
+{
+ FREE(fetch);
+}
+
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
index 79548d4156..26d0b37286 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c
@@ -286,9 +286,9 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
*/
draw_pt_run_pipeline( fpme->draw,
fpme->prim,
- pipeline_verts,
- fpme->pipeline_vertex_size,
+ (struct vertex_header *)pipeline_verts,
fetch_count,
+ fpme->pipeline_vertex_size,
draw_elts,
draw_count );
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 8db9e31e2d..0b9e8d15ba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -39,8 +39,11 @@ struct fetch_pipeline_middle_end {
struct draw_context *draw;
struct pt_emit *emit;
+ struct pt_fetch *fetch;
+ struct pt_post_vs *post_vs;
- unsigned pipeline_vertex_size;
+ unsigned vertex_data_offset;
+ unsigned vertex_size;
unsigned prim;
unsigned opt;
};
@@ -51,15 +54,43 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
unsigned opt )
{
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+ struct draw_context *draw = fpme->draw;
+ struct draw_vertex_shader *vs = draw->vertex_shader;
+ unsigned nr = MAX2( vs->info.num_inputs,
+ vs->info.num_outputs );
fpme->prim = prim;
fpme->opt = opt;
+ /* Always leave room for the vertex header whether we need it or
+ * not. It's hard to get rid of it in particular because of the
+ * viewport code in draw_pt_post_vs.c.
+ */
+ fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
+
+
+
+ draw_pt_fetch_prepare( fpme->fetch,
+ (opt & (PT_CLIPTEST | PT_PIPELINE)) != 0,
+ fpme->vertex_size );
+
+ /* XXX: it's not really gl rasterization rules we care about here,
+ * but gl vs dx9 clip spaces.
+ */
+ draw_pt_post_vs_prepare( fpme->post_vs,
+ draw->rasterizer->bypass_clipping,
+ draw->identity_viewport,
+ draw->rasterizer->gl_rasterization_rules );
+
+
if (!(opt & PT_PIPELINE))
- draw_pt_emit_prepare( fpme->emit, prim, opt );
+ draw_pt_emit_prepare( fpme->emit,
+ prim );
+
+ /* No need to prepare the shader.
+ */
+ vs->prepare(vs, draw);
- //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float);
- fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION;
}
@@ -74,44 +105,63 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
struct draw_context *draw = fpme->draw;
struct draw_vertex_shader *shader = draw->vertex_shader;
- char *pipeline_verts;
- unsigned pipeline = PT_PIPELINE;
+ unsigned opt = fpme->opt;
- pipeline_verts = MALLOC(fpme->pipeline_vertex_size *
- fetch_count);
+ struct vertex_header *pipeline_verts =
+ (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count);
if (!pipeline_verts) {
assert(0);
return;
}
-
- /* Shade
+ /* Fetch into our vertex buffer
*/
- shader->prepare(shader, draw);
-
- if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts,
- fpme->pipeline_vertex_size))
+ draw_pt_fetch_run( fpme->fetch,
+ fetch_elts,
+ fetch_count,
+ (char *)pipeline_verts );
+
+ /* Run the shader, note that this overwrites the data[] parts of
+ * the pipeline verts. If there is no shader, ie a bypass shader,
+ * then the inputs == outputs, and are already in the correct
+ * place.
+ */
+ if (opt & PT_SHADE)
{
- pipeline |= PT_CLIPTEST;
+ shader->run_linear(shader,
+ (const float (*)[4])pipeline_verts->data,
+ ( float (*)[4])pipeline_verts->data,
+ (const float (*)[4])draw->user.constants,
+ fetch_count,
+ fpme->vertex_size,
+ fpme->vertex_size);
}
+ if (draw_pt_post_vs_run( fpme->post_vs,
+ pipeline_verts,
+ fetch_count,
+ fpme->vertex_size ))
+ {
+ opt |= PT_PIPELINE;
+ }
/* Do we need to run the pipeline?
*/
- if (fpme->opt & pipeline) {
+ if (opt & PT_PIPELINE) {
draw_pt_run_pipeline( fpme->draw,
fpme->prim,
pipeline_verts,
- fpme->pipeline_vertex_size,
fetch_count,
+ fpme->vertex_size,
draw_elts,
draw_count );
- } else {
+ }
+ else {
draw_pt_emit( fpme->emit,
- pipeline_verts,
- fpme->pipeline_vertex_size,
+ (const float (*)[4])pipeline_verts->data,
fetch_count,
+ fpme->vertex_size,
draw_elts,
draw_count );
}
@@ -129,6 +179,17 @@ static void fetch_pipeline_finish( struct draw_pt_middle_end *middle )
static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle )
{
+ struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
+
+ if (fpme->fetch)
+ draw_pt_fetch_destroy( fpme->fetch );
+
+ if (fpme->emit)
+ draw_pt_emit_destroy( fpme->emit );
+
+ if (fpme->post_vs)
+ draw_pt_post_vs_destroy( fpme->post_vs );
+
FREE(middle);
}
@@ -146,6 +207,14 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *
fpme->draw = draw;
+ fpme->fetch = draw_pt_fetch_create( draw );
+ if (!fpme->fetch)
+ goto fail;
+
+ fpme->post_vs = draw_pt_post_vs_create( draw );
+ if (!fpme->post_vs)
+ goto fail;
+
fpme->emit = draw_pt_emit_create( draw );
if (!fpme->emit)
goto fail;
diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c
index 17ce6febec..1a9a3adb03 100644
--- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c
@@ -117,12 +117,13 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw )
*/
void draw_pt_run_pipeline( struct draw_context *draw,
unsigned prim,
- char *verts,
- unsigned stride,
+ struct vertex_header *pipeline_verts,
unsigned vertex_count,
+ unsigned stride,
const ushort *elts,
unsigned count )
{
+ char *verts = (char *)pipeline_verts;
unsigned i;
draw->pt.pipeline.verts = verts;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
new file mode 100644
index 0000000000..315b02f4ee
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -0,0 +1,202 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_context.h"
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+#include "draw/draw_vbuf.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_pt.h"
+
+struct pt_post_vs {
+ struct draw_context *draw;
+
+ boolean (*run)( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride );
+};
+
+
+
+static INLINE unsigned
+compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0x0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/* The normal case - cliptest, rhw divide, viewport transform.
+ *
+ * Also handle identity viewport here at the expense of a few wasted
+ * instructions
+ */
+static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned j;
+ unsigned clipped = 0;
+
+ for (j = 0; j < count; j++) {
+ out->clip[0] = out->data[0][0];
+ out->clip[1] = out->data[0][1];
+ out->clip[2] = out->data[0][2];
+ out->clip[3] = out->data[0][3];
+
+ out->vertex_id = 0xffff;
+ out->edgeflag = 1;
+ out->clipmask = compute_clipmask_gl(out->clip,
+ pvs->draw->plane,
+ pvs->draw->nr_planes);
+ clipped += out->clipmask;
+
+ if (out->clipmask == 0)
+ {
+ /* divide by w */
+ float w = 1.0f / out->data[0][3];
+
+ /* Viewport mapping */
+ out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2];
+ out->data[0][3] = w;
+ }
+
+ out = (struct vertex_header *)( (char *)out + stride );
+ }
+
+ return clipped != 0;
+}
+
+
+
+/* If bypass_clipping is set, skip cliptest and rhw divide.
+ */
+static boolean post_vs_viewport( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ struct vertex_header *out = vertices;
+ const float *scale = pvs->draw->viewport.scale;
+ const float *trans = pvs->draw->viewport.translate;
+ unsigned j;
+
+ debug_printf("%s\n", __FUNCTION__);
+ for (j = 0; j < count; j++) {
+ /* Viewport mapping only, no cliptest/rhw divide
+ */
+ out->data[0][0] = out->data[0][0] * scale[0] + trans[0];
+ out->data[0][1] = out->data[0][1] * scale[1] + trans[1];
+ out->data[0][2] = out->data[0][2] * scale[2] + trans[2];
+
+ out = (struct vertex_header *)((char *)out + stride);
+ }
+
+ return FALSE;
+}
+
+
+/* If bypass_clipping is set and we have an identity viewport, nothing
+ * to do.
+ */
+static boolean post_vs_none( struct pt_post_vs *pvs,
+ struct vertex_header *vertices,
+ unsigned count,
+ unsigned stride )
+{
+ debug_printf("%s\n", __FUNCTION__);
+ return FALSE;
+}
+
+boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
+ struct vertex_header *pipeline_verts,
+ unsigned count,
+ unsigned stride )
+{
+ return pvs->run( pvs, pipeline_verts, count, stride );
+}
+
+
+void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
+ boolean bypass_clipping,
+ boolean identity_viewport,
+ boolean opengl )
+{
+ if (bypass_clipping) {
+ if (identity_viewport)
+ pvs->run = post_vs_none;
+ else
+ pvs->run = post_vs_viewport;
+ }
+ else {
+ //if (opengl)
+ pvs->run = post_vs_cliptest_viewport_gl;
+ }
+}
+
+
+struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw )
+{
+ struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs );
+ if (!pvs)
+ return NULL;
+
+ pvs->draw = draw;
+
+ return pvs;
+}
+
+void draw_pt_post_vs_destroy( struct pt_post_vs *pvs )
+{
+ FREE(pvs);
+}
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index 0e05b79715..184151b9b1 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -58,8 +58,10 @@ static void
vs_exec_prepare( struct draw_vertex_shader *shader,
struct draw_context *draw )
{
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+
/* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(&draw->machine,
+ tgsi_exec_machine_bind_shader(evs->machine,
shader->state.tokens,
PIPE_MAX_SAMPLERS,
NULL /*samplers*/ );
@@ -84,31 +86,45 @@ vs_exec_run( struct draw_vertex_shader *shader,
void *vOut,
unsigned vertex_size)
{
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct exec_vertex_shader *evs = exec_vertex_shader(shader);
+ struct tgsi_exec_machine *machine = evs->machine;
unsigned int i, j;
unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ struct tgsi_exec_vector *outputs = 0;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION);
machine->Consts = (const float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
+
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
+ outputs = machine->Inputs;
}
else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ outputs = machine->Outputs;
}
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices );
+#if 0
+ for (j = 0; j < max_vertices; j++) {
+ unsigned slot;
+ debug_printf("%d) Input vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ machine->Inputs[slot].xyzw[0].f[j],
+ machine->Inputs[slot].xyzw[1].f[j],
+ machine->Inputs[slot].xyzw[2].f[j],
+ machine->Inputs[slot].xyzw[3].f[j]);
+ }
+ }
+#endif
+
+
if (!draw->rasterizer->bypass_vs) {
/* run interpreter */
tgsi_exec_machine_run( machine );
@@ -127,10 +143,10 @@ vs_exec_run( struct draw_vertex_shader *shader,
* program as a set of DP4 instructions appended to the
* user-provided code.
*/
- x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+ x = out->clip[0] = outputs[0].xyzw[0].f[j];
+ y = out->clip[1] = outputs[0].xyzw[1].f[j];
+ z = out->clip[2] = outputs[0].xyzw[2].f[j];
+ w = out->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
out->clipmask = compute_clipmask(out->clip, draw->plane,
@@ -156,7 +172,8 @@ vs_exec_run( struct draw_vertex_shader *shader,
out->data[0][2] = z * scale[2] + trans[2];
out->data[0][3] = w;
}
- else {
+ else
+ {
out->data[0][0] = x;
out->data[0][1] = y;
out->data[0][2] = z;
@@ -167,10 +184,10 @@ vs_exec_run( struct draw_vertex_shader *shader,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ out->data[slot][0] = outputs[slot].xyzw[0].f[j];
+ out->data[slot][1] = outputs[slot].xyzw[1].f[j];
+ out->data[slot][2] = outputs[slot].xyzw[2].f[j];
+ out->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
#if 0 /*DEBUG*/
@@ -216,12 +233,25 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
/* Swizzle inputs.
*/
for (j = 0; j < max_vertices; j++) {
+#if 0
+ debug_printf("%d) Input vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_inputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ input[slot][0],
+ input[slot][1],
+ input[slot][2],
+ input[slot][3]);
+ }
+#endif
+
for (slot = 0; slot < shader->info.num_inputs; slot++) {
machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
+
+ input = (const float (*)[4])((const char *)input + input_stride);
}
/* run interpreter */
@@ -235,13 +265,23 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+
}
+
+#if 0
+ debug_printf("%d) Post xform vert:\n", i + j);
+ for (slot = 0; slot < shader->info.num_outputs; slot++) {
+ debug_printf("\t%d: %f %f %f %f\n", slot,
+ output[slot][0],
+ output[slot][1],
+ output[slot][2],
+ output[slot][3]);
+ }
+#endif
+
+ output = (float (*)[4])((char *)output + output_stride);
}
- /* Advance input, output pointers:
- */
- input = (const float (*)[4])((const char *)input + input_stride);
- output = (float (*)[4])((char *)output + output_stride);
}
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index d0baca715e..4dbfa955a4 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -47,6 +47,7 @@
struct draw_llvm_vertex_shader {
struct draw_vertex_shader base;
struct gallivm_prog *llvm_prog;
+ struct tgsi_exec_machine *machine;
};
@@ -77,12 +78,9 @@ vs_llvm_run( struct draw_vertex_shader *base,
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct tgsi_exec_machine *machine = shader->machine;
unsigned int j;
unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@@ -93,13 +91,12 @@ vs_llvm_run( struct draw_vertex_shader *base,
/* Consts does not require 16 byte alignment. */
machine->Consts = (float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
+ outputs = machine->Inputs;
}
else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ outputs = machine->Outputs;
}
@@ -119,10 +116,10 @@ vs_llvm_run( struct draw_vertex_shader *base,
unsigned slot;
float x, y, z, w;
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+ x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane,
@@ -159,10 +156,10 @@ vs_llvm_run( struct draw_vertex_shader *base,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ vOut[j]->data[slot][0] = outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
} /* loop over vertices */
return clipped != 0;
@@ -183,7 +180,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
struct draw_llvm_vertex_shader *shader =
(struct draw_llvm_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct tgsi_exec_machine *machine = shader->machine;
unsigned int j;
@@ -199,6 +196,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
+
+ input = (const float (*)[4])((const char *)input + input_stride);
}
/* run shader */
@@ -216,12 +215,9 @@ vs_llvm_run_linear( struct draw_vertex_shader *base,
output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
- /* Advance input, output pointers:
- */
- input = (const float (*)[4])((const char *)input + input_stride);
- output = (float (*)[4])((char *)output + output_stride);
+ output = (float (*)[4])((char *)output + output_stride);
+ }
}
}
@@ -263,6 +259,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.run = vs_llvm_run;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
+ vs->machine = &draw->machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 873ecfdc5d..a763f3845c 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -91,12 +91,10 @@ vs_sse_run( struct draw_vertex_shader *base,
unsigned vertex_size )
{
struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
- struct tgsi_exec_machine *machine = &draw->machine;
+ struct tgsi_exec_machine *machine = shader->machine;
unsigned int i, j;
unsigned int clipped = 0;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS);
+ struct tgsi_exec_vector *outputs = 0;
const float *scale = draw->viewport.scale;
const float *trans = draw->viewport.translate;
@@ -104,13 +102,13 @@ vs_sse_run( struct draw_vertex_shader *base,
/* Consts does not require 16 byte alignment. */
machine->Consts = (const float (*)[4]) draw->user.constants;
- machine->Inputs = ALIGN16_ASSIGN(inputs);
+
if (draw->rasterizer->bypass_vs) {
/* outputs are just the inputs */
- machine->Outputs = machine->Inputs;
+ outputs = machine->Inputs;
}
else {
- machine->Outputs = ALIGN16_ASSIGN(outputs);
+ outputs = machine->Outputs;
}
for (i = 0; i < count; i += SSE_MAX_VERTICES) {
@@ -142,10 +140,10 @@ vs_sse_run( struct draw_vertex_shader *base,
struct vertex_header *out =
draw_header_from_block(vOut, vertex_size, i + j);
- x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+ x = out->clip[0] = outputs[0].xyzw[0].f[j];
+ y = out->clip[1] = outputs[0].xyzw[1].f[j];
+ z = out->clip[2] = outputs[0].xyzw[2].f[j];
+ w = out->clip[3] = outputs[0].xyzw[3].f[j];
if (!draw->rasterizer->bypass_clipping) {
out->clipmask = compute_clipmask(out->clip, draw->plane,
@@ -182,10 +180,10 @@ vs_sse_run( struct draw_vertex_shader *base,
* vertex attrib slots.
*/
for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ out->data[slot][0] = outputs[slot].xyzw[0].f[j];
+ out->data[slot][1] = outputs[slot].xyzw[1].f[j];
+ out->data[slot][2] = outputs[slot].xyzw[2].f[j];
+ out->data[slot][3] = outputs[slot].xyzw[3].f[j];
}
#if 0 /*DEBUG*/
printf("%d) Post xform vert:\n", i + j);
@@ -233,6 +231,8 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
}
+
+ input = (const float (*)[4])((const char *)input + input_stride);
}
/* run compiled shader
@@ -253,12 +253,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
}
- }
- /* Advance input, output pointers:
- */
- input = (const float (*)[4])((const char *)input + input_stride);
- output = (float (*)[4])((char *)output + output_stride);
+ output = (float (*)[4])((char *)output + output_stride);
+ }
}
}
@@ -300,6 +297,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.run = vs_sse_run;
vs->base.run_linear = vs_sse_run_linear;
vs->base.delete = vs_sse_delete;
+ vs->machine = &draw->machine;
x86_init_func( &vs->sse2_program );