diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_wm_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 673 |
1 files changed, 393 insertions, 280 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index bf80a2942a..f316e0cda4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -44,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } + /* Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, @@ -60,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.8 -- ? */ - -static void emit_pixel_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask) { + struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + struct brw_reg dst0_uw, dst1_uw; + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + if (c->dispatch_width == 16) { + dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } else { + dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } + /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, - vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, - vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } - -static void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -118,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_compile *p = &c->func; @@ -146,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } -static void emit_pixel_w( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { + struct brw_compile *p = &c->func; + /* Don't need this if all you are doing is interpolating color, for * instance. */ @@ -165,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p, brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ - brw_math_16( p, dst[3], - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); + if (c->dispatch_width == 16) { + brw_math_16(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } else { + brw_math(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } } - -static void emit_linterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas ) +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -199,12 +218,12 @@ static void emit_linterp( struct brw_compile *p, } -static void emit_pinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -229,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p, } -static void emit_cinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -251,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p, } /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask ) +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; @@ -352,13 +371,13 @@ void emit_ddxy(struct brw_compile *p, brw_set_saturate(p, 0); } -static void emit_alu1( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { GLuint i; @@ -376,15 +395,15 @@ static void emit_alu1( struct brw_compile *p, } -static void emit_alu2( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -402,12 +421,12 @@ static void emit_alu2( struct brw_compile *p, } -static void emit_mad( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -422,26 +441,12 @@ static void emit_mad( struct brw_compile *p, } } -static void emit_trunc( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_RNDZ(p, dst[i], arg0[i]); - } - } -} - -static void emit_lrp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -461,21 +466,24 @@ static void emit_lrp( struct brw_compile *p, } } -static void emit_sop( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_pop_insn_state(p); } } } @@ -559,11 +567,11 @@ static void emit_cmp( struct brw_compile *p, } } -static void emit_max( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -583,11 +591,11 @@ static void emit_max( struct brw_compile *p, } } -static void emit_min( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -608,11 +616,11 @@ static void emit_min( struct brw_compile *p, } -static void emit_dp3( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -630,11 +638,11 @@ static void emit_dp3( struct brw_compile *p, } -static void emit_dp4( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -653,11 +661,11 @@ static void emit_dp4( struct brw_compile *p, } -static void emit_dph( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -676,15 +684,15 @@ static void emit_dph( struct brw_compile *p, } -static void emit_xpd( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; - assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + assert((mask & WRITEMASK_W) != WRITEMASK_W); for (i = 0 ; i < 3; i++) { if (mask & (1<<i)) { @@ -701,41 +709,68 @@ static void emit_xpd( struct brw_compile *p, } -static void emit_math1( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); + /* If compressed, this will write message reg 2,3 from arg0.x's 16 + * channels. + */ brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ - brw_math_16(p, - dst[dst_chan], + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + saturate, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, + saturate, + 3, brw_null_reg(), + BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + } + brw_pop_insn_state(p); } -static void emit_math2( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -746,173 +781,233 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } - - /* Send two messages to perform all 16 operations: - */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + saturate, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 4, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } brw_pop_insn_state(p); } - -static void emit_tex( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow) { struct brw_compile *p = &c->func; - GLuint msgLength, responseLength; - GLuint i, nr; + struct intel_context *intel = &p->brw->intel; + struct brw_reg dst_retyped; + GLuint cur_mrf = 2, response_length; + GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; + GLuint mrf_per_channel; + GLuint simd_mode; + + if (c->dispatch_width == 16) { + mrf_per_channel = 2; + response_length = 8; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { + mrf_per_channel = 1; + response_length = 4; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } /* How many input regs are there? */ - switch (inst->tex_idx) { + switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; - nr = 1; + nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; - nr = 2; + nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; - nr = 3; + nr_texcoords = 3; break; default: /* unexpected target */ abort(); } - if (inst->tex_shadow) { - nr = 4; - emit |= WRITEMASK_W; - } + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (!intel->is_ironlake && c->dispatch_width == 8) + nr_texcoords = 3; - msgLength = 1; + /* For shadow comparisons, we have to supply u,v,r. */ + if (shadow) + nr_texcoords = 3; - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]); + /* Emit the texcoords. */ + for (i = 0; i < nr_texcoords; i++) { + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else - brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); - msgLength += 2; + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; } - responseLength = 8; /* always */ + /* Fill in the shadow comparison reference value. */ + if (shadow) { + if (intel->is_ironlake) { + /* Fill in the cube map array index value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } else if (c->dispatch_width == 8) { + /* Fill in the LOD bias value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } - if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + if (intel->is_ironlake) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (c->dispatch_width == 16 && shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } - brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + brw_SAMPLE(p, + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, - msg_type, - responseLength, - msgLength, - 0, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, + msg_type, + response_length, + cur_mrf - 1, + 0, 1, - BRW_SAMPLER_SIMD_MODE_SIMD16); + simd_mode); } -static void emit_txb( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; GLuint msgLength; GLuint msg_type; - /* Shadow ignored for txb. + GLuint mrf_per_channel; + GLuint response_length; + struct brw_reg dst_retyped; + + /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased + * samples, so we'll use the 16-wide instruction, leave the second halves + * undefined, and trust the execution mask to keep the undefined pixels + * from mattering. */ - switch (inst->tex_idx) { + if (c->dispatch_width == 16 || !intel->is_ironlake) { + if (intel->is_ironlake) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + mrf_per_channel = 2; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 8; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + mrf_per_channel = 1; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 4; + } + + /* Shadow ignored for txb. */ + switch (tex_idx) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); - msgLength = 9; - - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); + msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, - 8, /* responseLength */ + response_length, msgLength, 0, 1, @@ -920,11 +1015,13 @@ static void emit_txb( struct brw_wm_compile *c, } -static void emit_lit( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +static void emit_lit(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; + assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { @@ -934,7 +1031,7 @@ static void emit_lit( struct brw_compile *p, } if (mask & WRITEMASK_Z) { - emit_math2(p, BRW_MATH_FUNCTION_POW, + emit_math2(c, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], @@ -989,7 +1086,7 @@ static void emit_kil_nv( struct brw_wm_compile *c ) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, r0uw, c->emit_mask_reg, r0uw); brw_pop_insn_state(p); } @@ -1001,7 +1098,13 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; - + struct brw_reg dst; + + if (c->dispatch_width == 16) + dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ @@ -1018,7 +1121,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, - retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, @@ -1050,14 +1153,15 @@ static void emit_aa( struct brw_wm_compile *c, * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ -static void emit_fb_write( struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; GLuint nr = 2; GLuint channel; @@ -1069,30 +1173,37 @@ static void emit_fb_write( struct brw_wm_compile *c, /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ - { - brw_push_insn_state(p); - - for (channel = 0; channel < 4; channel++) { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + if (c->dispatch_width == 16 && brw->has_compr4) { + /* By setting the high bit of the MRF register number, we indicate + * that we want COMPR4 mode - instead of doing the usual destination + * + 1 for the second half we get destination + 4. + */ + brw_MOV(p, + brw_message_reg(nr + channel + (1 << 7)), + arg0[channel]); + } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); - - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, - brw_message_reg(nr + channel + 4), - sechalf(arg0[channel])); - } - /* skip over the regs populated above: - */ - nr += 8; - - brw_pop_insn_state(p); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + } } + /* skip over the regs populated above: + */ + nr += 8; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { @@ -1142,7 +1253,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1156,7 +1267,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } } - /** * Move a GPR to scratch memory. */ @@ -1294,7 +1404,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: @@ -1306,7 +1416,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_PIXELW: - emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: @@ -1364,7 +1474,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TRUNC: - emit_trunc(p, dst, dst_flags, args[0]); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_LRP: @@ -1391,27 +1501,27 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Higher math functions: */ case OPCODE_RCP: - emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: @@ -1419,13 +1529,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: - emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: @@ -1463,17 +1573,20 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_LIT: - emit_lit(p, dst, dst_flags, args[0]); + emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit, + inst->tex_shadow); break; case OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: |