diff options
author | Keith Whitwell <keithw@vmware.com> | 2009-11-17 14:46:23 -0800 |
---|---|---|
committer | Keith Whitwell <keithw@vmware.com> | 2009-11-17 14:50:49 -0800 |
commit | 1877e6cd2d76143ef8a9c516122afe614ae3b4a4 (patch) | |
tree | 857cd3547555a14fe990dacc206234c892b9df1c | |
parent | d299ee771b577a8ce839861d1af336fc316e0a1d (diff) |
i965g: handle special vs outputs specially
Where vs output semantic tags indicate an output is signficant for
fixed function processing (such as clipping, unfilled modes, etc),
retain information about that output so that we can get to it easily
later on.
Fix up the unfilled processing, but hard-wire edgeflag to one for now.
With this change, trivial/tri-unfilled works.
-rw-r--r-- | src/gallium/drivers/i965/brw_clip.c | 45 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_clip.h | 15 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_clip_line.c | 5 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_clip_tri.c | 12 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_clip_unfilled.c | 9 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_clip_util.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_context.h | 19 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_pipe_shader.c | 38 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_vs.c | 13 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_vs_emit.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_wm_pass2.c | 2 |
11 files changed, 147 insertions, 55 deletions
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 35e1d2fdbd..4ec7b823e8 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -58,7 +58,6 @@ compile_clip_prog( struct brw_context *brw, const GLuint *program; GLuint program_size; GLuint delta; - GLuint i; memset(&c, 0, sizeof(c)); @@ -82,16 +81,26 @@ compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - /* XXX: c.offset is now pretty redundant: - */ - for (i = 0; i < c.key.nr_attrs; i++) { - c.offset[i] = delta; - delta += ATTR_SIZE; - } - /* XXX: c.nr_attrs is very redundant: */ c.nr_attrs = c.key.nr_attrs; + + c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; + + if (c.key.output_color0) + c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; + + if (c.key.output_color1) + c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; + + if (c.key.output_bfc0) + c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; + + if (c.key.output_bfc1) + c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; + + if (c.key.output_edgeflag) + c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -158,21 +167,33 @@ compile_clip_prog( struct brw_context *brw, static enum pipe_error upload_clip_prog(struct brw_context *brw) { - enum pipe_error ret; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; struct brw_clip_prog_key key; + enum pipe_error ret; /* Populate the key, starting from the almost-complete version from * the rast state. */ /* PIPE_NEW_RAST */ - memcpy(&key, &brw->curr.rast->clip_key, sizeof key); - + key = brw->curr.rast->clip_key; + /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; + /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove + * dependency on CACHE_NEW_VS_PROG + */ + /* CACHE_NEW_VS_PROG */ + key.nr_attrs = brw->vs.prog_data->nr_outputs; + key.output_edgeflag = brw->vs.prog_data->output_edgeflag; + /* PIPE_NEW_VS */ - key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + key.output_hpos = vs->output_hpos; + key.output_color0 = vs->output_color0; + key.output_color1 = vs->output_color1; + key.output_bfc0 = vs->output_bfc0; + key.output_bfc1 = vs->output_bfc1; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 9bec9643d7..8729efa47b 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint nr_attrs:5; + GLuint nr_attrs:6; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -54,7 +54,14 @@ struct brw_clip_prog_key { GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:7; + GLuint output_hpos:6; /* not always zero? */ + + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + GLuint pad1:2; GLfloat offset_factor; GLfloat offset_units; @@ -123,7 +130,6 @@ struct brw_clip_compile { GLuint last_mrf; GLuint header_position_offset; - GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; GLuint nr_color_attrs; @@ -131,7 +137,8 @@ struct brw_clip_compile { GLuint offset_color1; GLuint offset_bfc0; GLuint offset_bfc1; - + + GLuint offset_hpos; GLuint offset_edgeflag; }; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index a4790bda95..54282d975e 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); - const int hpos = 0; /* XXX: position not always first element */ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); @@ -173,12 +172,12 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* dp = DP4(vtx->position, plane) */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation); /* if (IS_NEGATIVE(dp1)) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { /* diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 5486f4fa89..fa00f6044f 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -249,13 +249,13 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(prev) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation); prev_test = brw_IF(p, BRW_EXECUTE_1); { /* IS_POSITIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { @@ -297,7 +297,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we @@ -462,9 +462,9 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); - brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos)); + brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos)); + brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 0fab3a5f1a..aec835b8ce 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -45,9 +45,9 @@ static void compute_tri_direction( struct brw_clip_compile *c ) struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; - struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); struct brw_reg v0n = get_tmp(c); @@ -123,7 +123,8 @@ static void copy_bfc( struct brw_clip_compile *c ) /* Do we have any colors to copy? */ - if (c->nr_color_attrs == 0) + if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) && + (c->offset_color1 == 0 || c->offset_bfc1 == 0)) return; /* In some wierd degnerate cases we can end up testing the diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 018511e699..872042c9a9 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -106,7 +106,7 @@ static void brw_clip_project_vertex( struct brw_clip_compile *c, /* Fixup position. Extract from the original vertex and re-project * to screen space: */ - brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos)); brw_clip_project_position(c, tmp); brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 31f3cf3685..31e04b6e14 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -152,13 +152,23 @@ struct brw_rasterizer_state; struct brw_vertex_shader { const struct tgsi_token *tokens; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + struct tgsi_shader_info info; - unsigned has_flow_control:1; + GLuint has_flow_control:1; + GLuint use_const_buffer:1; + + /* Offsets of special vertex shader outputs required for clipping. + */ + GLuint output_hpos:6; /* not always zero? */ + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; unsigned id; - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; }; struct brw_fs_signature { @@ -317,7 +327,8 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLboolean copy_edgeflag; + GLuint output_edgeflag; + GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 7febf9e0c2..02bc8fa130 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -124,21 +124,51 @@ static void *brw_create_vs_state( struct pipe_context *pipe, const struct pipe_shader_state *shader ) { struct brw_context *brw = brw_context(pipe); + struct brw_vertex_shader *vs; + unsigned i; - struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader); + vs = CALLOC_STRUCT(brw_vertex_shader); if (vs == NULL) return NULL; /* Duplicate tokens, scan shader */ - vs->id = brw->program_id++; - vs->has_flow_control = has_flow_control(&vs->info); - vs->tokens = tgsi_dup_tokens(shader->tokens); if (vs->tokens == NULL) goto fail; tgsi_scan_shader(vs->tokens, &vs->info); + + vs->id = brw->program_id++; + vs->has_flow_control = has_flow_control(&vs->info); + + for (i = 0; i < vs->info.num_outputs; i++) { + int index = vs->info.output_semantic_index[i]; + switch (vs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + vs->output_hpos = i; + break; + case TGSI_SEMANTIC_COLOR: + if (index == 0) + vs->output_color0 = i; + else + vs->output_color1 = i; + break; + case TGSI_SEMANTIC_BCOLOR: + if (index == 0) + vs->output_bfc0 = i; + else + vs->output_bfc1 = i; + break; +#if 0 + case TGSI_SEMANTIC_EDGEFLAG: + vs->output_edgeflag = i; + break; +#endif + } + } + + /* Done: */ diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 05a62ed974..2668392919 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -57,7 +57,18 @@ static enum pipe_error do_vs_prog( struct brw_context *brw, c.prog_data.nr_outputs = vp->info.num_outputs; c.prog_data.nr_inputs = vp->info.num_inputs; - c.prog_data.copy_edgeflag = c.key.copy_edgeflag; + + /* XXX: we want edgeflag handling to be integrated to the vertex + * shader, but are currently faking the edgeflag output: + */ + if (c.key.copy_edgeflag) { + c.prog_data.output_edgeflag = c.prog_data.nr_outputs; + c.prog_data.nr_outputs++; + } + else { + c.prog_data.output_edgeflag = ~0; + } + if (1) tgsi_dump(c.vp->tokens, 0); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 933c9c4d63..bcaeaca62d 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -70,11 +70,17 @@ static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); + if (vs_output == c->prog_data.output_edgeflag) { + return FALSE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); + } } @@ -83,15 +89,22 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned *fs_input_slot ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + if (vs_output == c->prog_data.output_edgeflag) { + *fs_input_slot = c->key.fs_signature.nr_inputs; + return TRUE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; + *fs_input_slot = i; + return TRUE; + } } } @@ -219,7 +232,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* XXX: need to access vertex output semantics here: */ - for (i = 0; i < c->prog_data.nr_outputs; i++) { + for (i = 0; i < c->nr_outputs; i++) { unsigned slot; /* XXX: Put output position in slot zero always. Clipper, etc, @@ -1116,10 +1129,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { - assert(0); brw_MOV(p, - get_reg(c, TGSI_FILE_OUTPUT, 0), - get_reg(c, TGSI_FILE_INPUT, 0)); + get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag), + brw_imm_f(1)); } /* Build ndc coords */ diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 2a879863ab..56f39d036b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -93,7 +93,7 @@ static void init_registers( struct brw_wm_compile *c ) assert(c->key.vp_nr_outputs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; + c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2; c->prog_data.curb_read_length = c->nr_creg * 2; /* Note this allocation: |