From 496f9ddf351bd91ea17c257f94e3504e87992202 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Oct 2006 11:28:45 +0000 Subject: eliminate rhw divide under some circumstances --- src/mesa/drivers/dri/i965/brw_vs.c | 7 +++++- src/mesa/drivers/dri/i965/brw_vs.h | 3 ++- src/mesa/drivers/dri/i965/brw_vs_emit.c | 40 ++++++++++++++++++++------------- 3 files changed, 32 insertions(+), 18 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 2a94ac6496..e5a28b96e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -105,6 +105,11 @@ static void brw_upload_vs_prog( struct brw_context *brw ) key.copy_edgeflag = (brw->attribs.Polygon->FrontMode != GL_FILL || brw->attribs.Polygon->BackMode != GL_FILL); + /* BRW_NEW_METAOPS + */ + if (brw->metaops.active) + key.know_w_is_one = 1; + /* Make an early check for the key. */ if (brw_search_cache(&brw->cache[BRW_VS_PROG], @@ -122,7 +127,7 @@ static void brw_upload_vs_prog( struct brw_context *brw ) const struct brw_tracked_state brw_vs_prog = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_POLYGON, - .brw = BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_METAOPS, .cache = 0 }, .update = brw_upload_vs_prog diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index d355681b5e..fdb5785d67 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -43,7 +43,8 @@ struct brw_vs_prog_key { GLuint program_string_id; GLuint nr_userclip:4; GLuint copy_edgeflag:1; - GLuint pad:27; + GLuint know_w_is_one:1; + GLuint pad:26; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a22740084d..da9d3bacb0 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -797,13 +797,21 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Build ndc coords? TODO: Shortcircuit when w is known to be one. */ - ndc = get_tmp(c); - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } /* This includes the workaround for -ve rhw, so is no longer an * optional step: */ + if ((c->prog_data.outputs_written & (1<key.nr_userclip || + !c->key.know_w_is_one) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -836,20 +844,17 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - brw_CMP(p, - vec8(brw_null_reg()), - BRW_CONDITIONAL_L, - brw_swizzle1(ndc, 3), - brw_imm_f(0)); + if (!c->key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); - brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); - brw_MOV(p, ndc, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - - - - + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); @@ -857,6 +862,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) release_tmp(c, header1); } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } /* Emit the (interleaved) headers for the two vertices - an 8-reg -- cgit v1.2.3