summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Skeggs <skeggsb@gmail.com>2008-03-13 18:29:56 +1100
committerBen Skeggs <skeggsb@gmail.com>2008-03-16 18:13:34 +1100
commite1cf3f00e546f814effd25e9ccd072c941366444 (patch)
tree0aac96d8bdff0b7e12c16d93ba1a1df8f30360ca
parent7d2c63e90983088f1e2f49543caf0468aa91111f (diff)
nv40: simple swtnl path (half broken, but getting there)
-rw-r--r--src/gallium/drivers/nv40/nv40_context.c6
-rw-r--r--src/gallium/drivers/nv40/nv40_context.h25
-rw-r--r--src/gallium/drivers/nv40/nv40_draw.c334
-rw-r--r--src/gallium/drivers/nv40/nv40_fragprog.c3
-rw-r--r--src/gallium/drivers/nv40/nv40_shader.h2
-rw-r--r--src/gallium/drivers/nv40/nv40_state.c27
-rw-r--r--src/gallium/drivers/nv40/nv40_state.h4
-rw-r--r--src/gallium/drivers/nv40/nv40_state_clip.c8
-rw-r--r--src/gallium/drivers/nv40/nv40_state_emit.c130
-rw-r--r--src/gallium/drivers/nv40/nv40_state_viewport.c45
-rw-r--r--src/gallium/drivers/nv40/nv40_vbo.c22
-rw-r--r--src/gallium/drivers/nv40/nv40_vertprog.c16
12 files changed, 531 insertions, 91 deletions
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 203c843a01..58627443b8 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -74,8 +74,12 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
nv40_init_state_functions(nv40);
nv40_init_miptree_functions(nv40);
+ /* Create, configure, and install fallback swtnl path */
nv40->draw = draw_create();
- assert(nv40->draw);
+ draw_wide_point_threshold(nv40->draw, 9999999.0);
+ draw_wide_line_threshold(nv40->draw, 9999999.0);
+ draw_enable_line_stipple(nv40->draw, FALSE);
+ draw_enable_point_sprites(nv40->draw, FALSE);
draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40));
return &nv40->pipe;
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 100c678187..02ca20b801 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -116,7 +116,20 @@ struct nv40_context {
/* HW state derived from pipe states */
struct nv40_state state;
- unsigned fallback;
+ struct {
+ struct nv40_vertex_program *vertprog;
+
+ unsigned nr_attribs;
+ unsigned hw[PIPE_MAX_SHADER_INPUTS];
+ unsigned draw[PIPE_MAX_SHADER_INPUTS];
+ unsigned emit[PIPE_MAX_SHADER_INPUTS];
+ } swtnl;
+
+ enum {
+ HW, SWTNL, SWRAST
+ } render_mode;
+ unsigned fallback_swtnl;
+ unsigned fallback_swrast;
/* Context state */
unsigned dirty;
@@ -166,6 +179,10 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
/* nv40_draw.c */
extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
+extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf,
+ unsigned ib_size, unsigned mode,
+ unsigned start, unsigned count);
/* nv40_vertprog.c */
extern void nv40_vertprog_destroy(struct nv40_context *,
@@ -179,8 +196,9 @@ extern void nv40_fragprog_destroy(struct nv40_context *,
extern void nv40_fragtex_bind(struct nv40_context *);
/* nv40_state.c and friends */
-extern void nv40_emit_hw_state(struct nv40_context *nv40);
-extern void nv40_state_tex_update(struct nv40_context *nv40);
+extern boolean nv40_state_validate(struct nv40_context *nv40);
+extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
+extern void nv40_state_emit(struct nv40_context *nv40);
extern struct nv40_state_entry nv40_state_clip;
extern struct nv40_state_entry nv40_state_rasterizer;
extern struct nv40_state_entry nv40_state_scissor;
@@ -194,6 +212,7 @@ extern struct nv40_state_entry nv40_state_viewport;
extern struct nv40_state_entry nv40_state_framebuffer;
extern struct nv40_state_entry nv40_state_fragtex;
extern struct nv40_state_entry nv40_state_vbo;
+extern struct nv40_state_entry nv40_state_vtxfmt;
/* nv40_vbo.c */
extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index a39bb85e99..ce0e0bc6f2 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -1,62 +1,350 @@
-#include "draw/draw_private.h"
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+#include "draw/draw_private.h"
#include "nv40_context.h"
+#define NV40_SHADER_NO_FUCKEDNESS
+#include "nv40_shader.h"
+
+/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very
+ * often at all. Uses "quadro style" vertex submission + a fixed vertex
+ * layout to avoid the need to generate a vertex program or vtxfmt.
+ */
-struct nv40_draw_stage {
- struct draw_stage draw;
+struct nv40_render_stage {
+ struct draw_stage stage;
struct nv40_context *nv40;
+ unsigned prim;
};
+static INLINE struct nv40_render_stage *
+nv40_render_stage(struct draw_stage *stage)
+{
+ return (struct nv40_render_stage *)stage;
+}
+
+static INLINE void
+nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
+{
+ unsigned i;
+
+ for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
+ unsigned idx = nv40->swtnl.draw[i];
+ unsigned hw = nv40->swtnl.hw[i];
+
+ switch (nv40->swtnl.emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_1F:
+ BEGIN_RING(curie, 0x1e40 + (hw * 4), 1);
+ OUT_RING (fui(v->data[idx][0]));
+ break;
+ case EMIT_2F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ break;
+ case EMIT_3F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ break;
+ case EMIT_4F:
+ BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+ OUT_RING (fui(v->data[idx][0]));
+ OUT_RING (fui(v->data[idx][1]));
+ OUT_RING (fui(v->data[idx][2]));
+ OUT_RING (fui(v->data[idx][3]));
+ break;
+ case EMIT_4UB:
+ BEGIN_RING(curie, 0x1940 + (hw * 4), 1);
+ OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]),
+ float_to_ubyte(v->data[idx][1]),
+ float_to_ubyte(v->data[idx][2]),
+ float_to_ubyte(v->data[idx][3])));
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+}
+
+static INLINE void
+nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
+ unsigned mode, unsigned count)
+{
+ struct nv40_render_stage *rs = nv40_render_stage(stage);
+ struct nv40_context *nv40 = rs->nv40;
+ struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf;
+ unsigned i;
+
+ /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
+ if (pb->remaining < ((count * 20) + 6)) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ NOUVEAU_ERR("AIII, missed flush\n");
+ assert(0);
+ }
+ FIRE_RING();
+ nv40_state_emit(nv40);
+ }
+
+ /* Switch primitive modes if necessary */
+ if (rs->prim != mode) {
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ }
+
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (mode);
+ rs->prim = mode;
+ }
+
+ /* Emit vertex data */
+ for (i = 0; i < count; i++)
+ nv40_render_vertex(nv40, prim->v[i]);
+
+ /* If it's likely we'll need to empty the push buffer soon, finish
+ * off the primitive now.
+ */
+ if (pb->remaining < ((count * 20) + 6)) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
+}
+
static void
-nv40_draw_point(struct draw_stage *draw, struct prim_header *prim)
+nv40_render_point(struct draw_stage *draw, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1);
}
static void
-nv40_draw_line(struct draw_stage *draw, struct prim_header *prim)
+nv40_render_line(struct draw_stage *draw, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2);
}
static void
-nv40_draw_tri(struct draw_stage *draw, struct prim_header *prim)
+nv40_render_tri(struct draw_stage *draw, struct prim_header *prim)
{
- NOUVEAU_ERR("\n");
+ nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3);
}
static void
-nv40_draw_flush(struct draw_stage *draw, unsigned flags)
+nv40_render_flush(struct draw_stage *draw, unsigned flags)
{
+ struct nv40_render_stage *rs = nv40_render_stage(draw);
+ struct nv40_context *nv40 = rs->nv40;
+
+ if (rs->prim != NV40TCL_BEGIN_END_STOP) {
+ BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
+ OUT_RING (NV40TCL_BEGIN_END_STOP);
+ rs->prim = NV40TCL_BEGIN_END_STOP;
+ }
}
static void
-nv40_draw_reset_stipple_counter(struct draw_stage *draw)
+nv40_render_reset_stipple_counter(struct draw_stage *draw)
{
- NOUVEAU_ERR("\n");
}
static void
-nv40_draw_destroy(struct draw_stage *draw)
+nv40_render_destroy(struct draw_stage *draw)
{
free(draw);
}
+static INLINE void
+emit_mov(struct nv40_vertex_program *vp,
+ unsigned dst, unsigned src, unsigned vor, unsigned mask)
+{
+ struct nv40_vertex_program_exec *inst;
+
+ vp->insns = realloc(vp->insns,
+ sizeof(struct nv40_vertex_program_exec) *
+ ++vp->nr_insns);
+ inst = &vp->insns[vp->nr_insns - 1];
+
+ inst->data[0] = 0x401f9c6c;
+ inst->data[1] = 0x0040000d | (src << 8);
+ inst->data[2] = 0x8106c083;
+ inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13);
+ inst->const_index = -1;
+ inst->has_branch_offset = FALSE;
+
+ vp->ir |= (1 << src);
+ if (vor != ~0)
+ vp->or |= (1 << vor);
+}
+
+static struct nv40_vertex_program *
+create_drawvp(struct nv40_context *nv40)
+{
+ struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program);
+ unsigned i;
+
+ emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf);
+ emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8);
+ for (i = 0; i < 8; i++)
+ emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf);
+
+ vp->insns[vp->nr_insns - 1].data[3] |= 1;
+ vp->translated = TRUE;
+ return vp;
+}
+
struct draw_stage *
nv40_draw_render_stage(struct nv40_context *nv40)
{
- struct nv40_draw_stage *nv40draw = CALLOC_STRUCT(nv40_draw_stage);
+ struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage);
+
+ if (!nv40->swtnl.vertprog)
+ nv40->swtnl.vertprog = create_drawvp(nv40);
+
+ render->nv40 = nv40;
+ render->stage.draw = nv40->draw;
+ render->stage.point = nv40_render_point;
+ render->stage.line = nv40_render_line;
+ render->stage.tri = nv40_render_tri;
+ render->stage.flush = nv40_render_flush;
+ render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter;
+ render->stage.destroy = nv40_render_destroy;
+
+ return &render->stage;
+}
+
+boolean
+nv40_draw_elements_swtnl(struct pipe_context *pipe,
+ struct pipe_buffer *idxbuf, unsigned idxbuf_size,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct nv40_context *nv40 = nv40_context(pipe);
+ struct pipe_winsys *ws = pipe->winsys;
+ unsigned i;
+ void *map;
+
+ if (!nv40_state_validate_swtnl(nv40))
+ return FALSE;
+ nv40_state_emit(nv40);
- nv40draw->nv40 = nv40;
- nv40draw->draw.draw = nv40->draw;
- nv40draw->draw.point = nv40_draw_point;
- nv40draw->draw.line = nv40_draw_line;
- nv40draw->draw.tri = nv40_draw_tri;
- nv40draw->draw.flush = nv40_draw_flush;
- nv40draw->draw.reset_stipple_counter = nv40_draw_reset_stipple_counter;
- nv40draw->draw.destroy = nv40_draw_destroy;
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
+ if (!nv40->vtxbuf[i].buffer)
+ continue;
+ map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer,
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_vertex_buffer(nv40->draw, i, map);
+ }
- return &nv40draw->draw;
+ if (idxbuf) {
+ map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map);
+ } else {
+ draw_set_mapped_element_buffer(nv40->draw, 0, NULL);
+ }
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX]) {
+ map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX],
+ PIPE_BUFFER_USAGE_CPU_READ);
+ draw_set_mapped_constant_buffer(nv40->draw, map);
+ }
+
+ draw_arrays(nv40->draw, mode, start, count);
+
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++) {
+ if (!nv40->vtxbuf[i].buffer)
+ continue;
+ ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer);
+ }
+
+ if (idxbuf)
+ ws->buffer_unmap(ws, idxbuf);
+
+ if (nv40->constbuf[PIPE_SHADER_VERTEX])
+ ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]);
+
+ draw_flush(nv40->draw);
+ pipe->flush(pipe, 0);
+
+ return TRUE;
}
+static INLINE void
+emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
+ unsigned semantic, unsigned index)
+{
+ unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+ unsigned a = nv40->swtnl.nr_attribs++;
+
+ nv40->swtnl.hw[a] = hw;
+ nv40->swtnl.emit[a] = emit;
+ nv40->swtnl.draw[a] = draw_out;
+}
+
+static boolean
+nv40_state_vtxfmt_validate(struct nv40_context *nv40)
+{
+ struct nv40_fragment_program *fp = nv40->fragprog;
+ unsigned colour = 0, texcoords = 0, fog = 0, i;
+
+ /* Determine needed fragprog inputs */
+ for (i = 0; i < fp->info.num_inputs; i++) {
+ switch (fp->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ break;
+ case TGSI_SEMANTIC_COLOR:
+ colour |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_GENERIC:
+ texcoords |= (1 << fp->info.input_semantic_index[i]);
+ break;
+ case TGSI_SEMANTIC_FOG:
+ fog = 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ nv40->swtnl.nr_attribs = 0;
+
+ /* Map draw vtxprog output to hw attribute IDs */
+ for (i = 0; i < 2; i++) {
+ if (!(colour & (1 << i)))
+ continue;
+ emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i);
+ }
+
+ for (i = 0; i < 8; i++) {
+ if (!(texcoords & (1 << i)))
+ continue;
+ emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i);
+ }
+
+ if (fog) {
+ emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0);
+ }
+
+ emit_attrib(nv40, 0, EMIT_4F, TGSI_SEMANTIC_POSITION, 0);
+
+ return FALSE;
+}
+
+struct nv40_state_entry nv40_state_vtxfmt = {
+ .validate = nv40_state_vtxfmt_validate,
+ .dirty = {
+ .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG,
+ .hw = 0
+ }
+};
+
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 953f9cd908..82dbcd3eef 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -797,9 +797,10 @@ nv40_fragprog_validate(struct nv40_context *nv40)
if (fp->translated)
goto update_constants;
+ nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG;
nv40_fragprog_translate(nv40, fp);
if (!fp->translated) {
- nv40->fallback |= NV40_FALLBACK_RAST;
+ nv40->fallback_swrast |= NV40_NEW_FRAGPROG;
return FALSE;
}
diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h
index 5909c70713..854dccf548 100644
--- a/src/gallium/drivers/nv40/nv40_shader.h
+++ b/src/gallium/drivers/nv40/nv40_shader.h
@@ -476,6 +476,7 @@
# define NV40_FP_SWIZZLE_W 3
#define NV40_FP_REG_NEGATE (1 << 17)
+#ifndef NV40_SHADER_NO_FUCKEDNESS
#define NV40SR_NONE 0
#define NV40SR_OUTPUT 1
#define NV40SR_INPUT 2
@@ -550,5 +551,6 @@ nv40_sr_scale(struct nv40_sreg src, int scale)
src.dst_scale = scale;
return src;
}
+#endif
#endif
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index 321d5de041..3eafbece30 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -3,6 +3,8 @@
#include "pipe/p_util.h"
#include "pipe/p_inlines.h"
+#include "draw/draw_context.h"
+
#include "nv40_context.h"
#include "nv40_state.h"
@@ -345,7 +347,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
- so_data(so, 0);
+ so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CCW);
@@ -363,13 +365,13 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK);
break;
default:
- so_data(so, 0);
+ so_data(so, NV40TCL_CULL_FACE_BACK);
break;
}
so_data(so, NV40TCL_FRONT_FACE_CW);
}
so_data(so, cso->poly_smooth ? 1 : 0);
- so_data(so, cso->cull_mode != PIPE_WINDING_NONE ? 1 : 0);
+ so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0);
so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
so_data (so, cso->poly_stipple_enable ? 1 : 0);
@@ -419,6 +421,9 @@ static void
nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_rasterizer_state *rsso = hwcso;
+
+ draw_set_rasterizer_state(nv40->draw, &rsso->pipe);
nv40->rasterizer = hwcso;
nv40->dirty |= NV40_NEW_RAST;
@@ -508,10 +513,12 @@ static void *
nv40_vp_state_create(struct pipe_context *pipe,
const struct pipe_shader_state *cso)
{
+ struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp;
vp = CALLOC(1, sizeof(struct nv40_vertex_program));
vp->pipe = *cso;
+ vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe);
return (void *)vp;
}
@@ -520,6 +527,9 @@ static void
nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ struct nv40_vertex_program *vp = hwcso;
+
+ draw_bind_vertex_shader(nv40->draw, vp ? vp->draw : NULL);
nv40->vertprog = hwcso;
nv40->dirty |= NV40_NEW_VERTPROG;
@@ -531,6 +541,7 @@ nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso)
struct nv40_context *nv40 = nv40_context(pipe);
struct nv40_vertex_program *vp = hwcso;
+ draw_delete_vertex_shader(nv40->draw, vp->draw);
nv40_vertprog_destroy(nv40, vp);
FREE(vp);
}
@@ -544,6 +555,8 @@ nv40_fp_state_create(struct pipe_context *pipe,
fp = CALLOC(1, sizeof(struct nv40_fragment_program));
fp->pipe = *cso;
+ tgsi_scan_shader(fp->pipe.tokens, &fp->info);
+
return (void *)fp;
}
@@ -582,6 +595,8 @@ nv40_set_clip_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
+ draw_set_clip_state(nv40->draw, clip);
+
nv40->clip = *clip;
nv40->dirty |= NV40_NEW_UCP;
}
@@ -638,6 +653,8 @@ nv40_set_viewport_state(struct pipe_context *pipe,
{
struct nv40_context *nv40 = nv40_context(pipe);
+ draw_set_viewport_state(nv40->draw, vpt);
+
nv40->viewport = *vpt;
nv40->dirty |= NV40_NEW_VIEWPORT;
}
@@ -648,6 +665,8 @@ nv40_set_vertex_buffer(struct pipe_context *pipe, unsigned index,
{
struct nv40_context *nv40 = nv40_context(pipe);
+ draw_set_vertex_buffer(nv40->draw, index, vb);
+
nv40->vtxbuf[index] = *vb;
nv40->dirty |= NV40_NEW_ARRAYS;
}
@@ -658,6 +677,8 @@ nv40_set_vertex_element(struct pipe_context *pipe, unsigned index,
{
struct nv40_context *nv40 = nv40_context(pipe);
+ draw_set_vertex_element(nv40->draw, index, ve);
+
nv40->vtxelt[index] = *ve;
nv40->dirty |= NV40_NEW_ARRAYS;
}
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index a02ea0c878..ab2866eb7a 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -2,6 +2,7 @@
#define __NV40_STATE_H__
#include "pipe/p_state.h"
+#include "tgsi/util/tgsi_scan.h"
struct nv40_sampler_state {
uint32_t fmt;
@@ -25,6 +26,8 @@ struct nv40_vertex_program_data {
struct nv40_vertex_program {
struct pipe_shader_state pipe;
+ struct draw_vertex_shader *draw;
+
boolean translated;
struct nv40_vertex_program_exec *insns;
unsigned nr_insns;
@@ -49,6 +52,7 @@ struct nv40_fragment_program_data {
struct nv40_fragment_program {
struct pipe_shader_state pipe;
+ struct tgsi_shader_info info;
boolean translated;
unsigned samplers;
diff --git a/src/gallium/drivers/nv40/nv40_state_clip.c b/src/gallium/drivers/nv40/nv40_state_clip.c
index 93e690161f..c52390f9ed 100644
--- a/src/gallium/drivers/nv40/nv40_state_clip.c
+++ b/src/gallium/drivers/nv40/nv40_state_clip.c
@@ -3,8 +3,12 @@
static boolean
nv40_state_clip_validate(struct nv40_context *nv40)
{
- if (nv40->clip.nr)
- nv40->fallback |= NV40_FALLBACK_TNL;
+
+ if (nv40->render_mode == HW) {
+ nv40->fallback_swtnl &= ~NV40_NEW_UCP;
+ if (nv40->clip.nr)
+ nv40->fallback_swtnl |= NV40_NEW_UCP;
+ }
return FALSE;
}
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 9f268640e0..056238cc83 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -1,5 +1,6 @@
#include "nv40_context.h"
#include "nv40_state.h"
+#include "draw/draw_context.h"
static struct nv40_state_entry *render_states[] = {
&nv40_state_framebuffer,
@@ -18,15 +19,27 @@ static struct nv40_state_entry *render_states[] = {
NULL
};
+static struct nv40_state_entry *swtnl_states[] = {
+ &nv40_state_framebuffer,
+ &nv40_state_rasterizer,
+ &nv40_state_clip,
+ &nv40_state_scissor,
+ &nv40_state_stipple,
+ &nv40_state_fragprog,
+ &nv40_state_fragtex,
+ &nv40_state_vertprog,
+ &nv40_state_blend,
+ &nv40_state_blend_colour,
+ &nv40_state_zsa,
+ &nv40_state_viewport,
+ &nv40_state_vtxfmt,
+ NULL
+};
+
static void
-nv40_state_validate(struct nv40_context *nv40)
+nv40_state_do_validate(struct nv40_context *nv40,
+ struct nv40_state_entry **states)
{
- struct nv40_state_entry **states = render_states;
- unsigned last_fallback;
-
- last_fallback = nv40->fallback;
- nv40->fallback = 0;
-
while (*states) {
struct nv40_state_entry *e = *states;
@@ -38,32 +51,15 @@ nv40_state_validate(struct nv40_context *nv40)
states++;
}
nv40->dirty = 0;
-
- if (nv40->fallback & NV40_FALLBACK_TNL &&
- !(last_fallback & NV40_FALLBACK_TNL)) {
- NOUVEAU_ERR("XXX: hwtnl->swtnl\n");
- } else
- if (last_fallback & NV40_FALLBACK_TNL &&
- !(nv40->fallback & NV40_FALLBACK_TNL)) {
- NOUVEAU_ERR("XXX: swtnl->hwtnl\n");
- }
-
- if (nv40->fallback & NV40_FALLBACK_RAST &&
- !(last_fallback & NV40_FALLBACK_RAST)) {
- NOUVEAU_ERR("XXX: hwrast->swrast\n");
- } else
- if (last_fallback & NV40_FALLBACK_RAST &&
- !(nv40->fallback & NV40_FALLBACK_RAST)) {
- NOUVEAU_ERR("XXX: swrast->hwrast\n");
- }
}
-static void
+void
nv40_state_emit(struct nv40_context *nv40)
{
struct nv40_state *state = &nv40->state;
struct nv40_screen *screen = nv40->screen;
unsigned i, samplers;
+ uint64 states;
if (nv40->pctx_id != screen->cur_pctx) {
for (i = 0; i < NV40_STATE_MAX; i++) {
@@ -74,14 +70,24 @@ nv40_state_emit(struct nv40_context *nv40)
screen->cur_pctx = nv40->pctx_id;
}
- while (state->dirty) {
- unsigned idx = ffsll(state->dirty) - 1;
+ for (i = 0, states = state->dirty; states; i++) {
+ if (!(states & (1ULL << i)))
+ continue;
+ so_ref (state->hw[i], &nv40->screen->state[i]);
+ so_emit(nv40->nvws, nv40->screen->state[i]);
+ states &= ~(1ULL << i);
+ }
- so_ref (state->hw[idx], &nv40->screen->state[idx]);
- so_emit(nv40->nvws, nv40->screen->state[idx]);
- state->dirty &= ~(1ULL << idx);
+ if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
+ (1ULL << NV40_STATE_FRAGTEX0))) {
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (2);
+ BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
+ OUT_RING (1);
}
+ state->dirty = 0;
+
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]);
for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
if (!(samplers & (1 << i)))
@@ -91,18 +97,62 @@ nv40_state_emit(struct nv40_context *nv40)
samplers &= ~(1ULL << i);
}
so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]);
- so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
+ if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW)
+ so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]);
}
-void
-nv40_emit_hw_state(struct nv40_context *nv40)
+boolean
+nv40_state_validate(struct nv40_context *nv40)
{
- nv40_state_validate(nv40);
- nv40_state_emit(nv40);
+ boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE;
+
+ if (nv40->render_mode != HW) {
+ /* Don't even bother trying to go back to hw if none
+ * of the states that caused swtnl previously have changed.
+ */
+ if ((nv40->fallback_swtnl & nv40->dirty)
+ != nv40->fallback_swtnl)
+ return FALSE;
+
+ /* Attempt to go to hwtnl again */
+ nv40->pipe.flush(&nv40->pipe, 0);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS |
+ NV40_NEW_UCP);
+ nv40->render_mode = HW;
+ }
+
+ nv40_state_do_validate(nv40, render_states);
+ if (nv40->fallback_swtnl || nv40->fallback_swrast)
+ return FALSE;
+
+ if (was_sw)
+ NOUVEAU_ERR("swtnl->hw\n");
+
+ return TRUE;
+}
+
+boolean
+nv40_state_validate_swtnl(struct nv40_context *nv40)
+{
+ /* Setup for swtnl */
+ if (nv40->render_mode == HW) {
+ NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl);
+ nv40->pipe.flush(&nv40->pipe, 0);
+ nv40->dirty |= (NV40_NEW_VIEWPORT |
+ NV40_NEW_VERTPROG |
+ NV40_NEW_ARRAYS |
+ NV40_NEW_UCP);
+ nv40->render_mode = SWTNL;
+ }
+
+ nv40_state_do_validate(nv40, swtnl_states);
+ if (nv40->fallback_swrast) {
+ NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast);
+ return FALSE;
+ }
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (2);
- BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
- OUT_RING (1);
+ return TRUE;
}
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
index 3a32533907..9e5c7a72a7 100644
--- a/src/gallium/drivers/nv40/nv40_state_viewport.c
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -3,18 +3,43 @@
static boolean
nv40_state_viewport_validate(struct nv40_context *nv40)
{
- struct nouveau_stateobj *so = so_new(9, 0);
+ struct nouveau_stateobj *so = so_new(11, 0);
struct pipe_viewport_state *vpt = &nv40->viewport;
- so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
- so_data (so, fui(vpt->translate[0]));
- so_data (so, fui(vpt->translate[1]));
- so_data (so, fui(vpt->translate[2]));
- so_data (so, fui(vpt->translate[3]));
- so_data (so, fui(vpt->scale[0]));
- so_data (so, fui(vpt->scale[1]));
- so_data (so, fui(vpt->scale[2]));
- so_data (so, fui(vpt->scale[3]));
+ if (nv40->render_mode == HW) {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(vpt->translate[0]));
+ so_data (so, fui(vpt->translate[1]));
+ so_data (so, fui(vpt->translate[2]));
+ so_data (so, fui(vpt->translate[3]));
+ so_data (so, fui(vpt->scale[0]));
+ so_data (so, fui(vpt->scale[1]));
+ so_data (so, fui(vpt->scale[2]));
+ so_data (so, fui(vpt->scale[3]));
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 1);
+ } else {
+ so_method(so, nv40->screen->curie,
+ NV40TCL_VIEWPORT_TRANSLATE_X, 8);
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(0.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(1.0));
+ so_data (so, fui(0.0));
+ /* Not entirely certain what this is yet. The DDX uses this
+ * value also as it fixes rendering when you pass
+ * pre-transformed vertices to the GPU. My best gusss is that
+ * this bypasses some culling/clipping stage. Might be worth
+ * noting that points/lines are uneffected by whatever this
+ * value fixes, only filled polygons are effected.
+ */
+ so_method(so, nv40->screen->curie, 0x1d78, 1);
+ so_data (so, 0x110);
+ }
so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]);
return TRUE;
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index f16afc23b8..fad423fdf8 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -8,6 +8,8 @@
#include "nouveau/nouveau_channel.h"
#include "nouveau/nouveau_pushbuf.h"
+#define FORCE_SWTNL 0
+
static INLINE int
nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp)
{
@@ -165,7 +167,11 @@ nv40_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
unsigned nr;
nv40_vbo_set_idxbuf(nv40, NULL, 0);
- nv40_emit_hw_state(nv40);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+ nv40_state_emit(nv40);
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (nvgl_primitive(mode));
@@ -274,7 +280,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
struct pipe_winsys *ws = pipe->winsys;
void *map;
- nv40_emit_hw_state(nv40);
+ nv40_state_emit(nv40);
map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ);
if (!ib) {
@@ -315,7 +321,7 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
struct nv40_context *nv40 = nv40_context(pipe);
unsigned nr;
- nv40_emit_hw_state(nv40);
+ nv40_state_emit(nv40);
BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
OUT_RING (nvgl_primitive(mode));
@@ -352,8 +358,16 @@ nv40_draw_elements(struct pipe_context *pipe,
unsigned mode, unsigned start, unsigned count)
{
struct nv40_context *nv40 = nv40_context(pipe);
+ boolean idxbuf;
+
+ idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
+ if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
+ return nv40_draw_elements_swtnl(pipe, NULL, 0,
+ mode, start, count);
+ }
+ nv40_state_emit(nv40);
- if (nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize)) {
+ if (idxbuf) {
nv40_draw_elements_vbo(pipe, mode, start, count);
} else {
nv40_draw_elements_inline(pipe, indexBuffer, indexSize,
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 3d730c1a32..9f1ee575ce 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -634,21 +634,29 @@ out_err:
static boolean
nv40_vertprog_validate(struct nv40_context *nv40)
{
- struct nv40_vertex_program *vp = nv40->vertprog;
- struct pipe_buffer *constbuf =
- nv40->constbuf[PIPE_SHADER_VERTEX];
struct nouveau_winsys *nvws = nv40->nvws;
struct pipe_winsys *ws = nv40->pipe.winsys;
+ struct nv40_vertex_program *vp;
+ struct pipe_buffer *constbuf;
boolean upload_code = FALSE, upload_data = FALSE;
int i;
+ if (nv40->render_mode == HW) {
+ vp = nv40->vertprog;
+ constbuf = nv40->constbuf[PIPE_SHADER_VERTEX];
+ } else {
+ vp = nv40->swtnl.vertprog;
+ constbuf = NULL;
+ }
+
/* Translate TGSI shader into hw bytecode */
if (vp->translated)
goto check_gpu_resources;
+ nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG;
nv40_vertprog_translate(nv40, vp);
if (!vp->translated) {
- nv40->fallback |= NV40_FALLBACK_TNL;
+ nv40->fallback_swtnl |= NV40_NEW_VERTPROG;
return FALSE;
}