diff options
Diffstat (limited to 'src/gallium/drivers/i965/brw_wm_fp.c')
-rw-r--r-- | src/gallium/drivers/i965/brw_wm_fp.c | 1224 |
1 files changed, 1224 insertions, 0 deletions
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c new file mode 100644 index 0000000000..9c5b527f89 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -0,0 +1,1224 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_util.h" + +#include "brw_wm.h" +#include "brw_util.h" +#include "brw_debug.h" + + +/*********************************************************************** + * Source regs + */ + +static struct brw_fp_src src_reg(GLuint file, GLuint idx) +{ + struct brw_fp_src reg; + reg.file = file; + reg.index = idx; + reg.swizzle = BRW_SWIZZLE_XYZW; + reg.indirect = 0; + reg.negate = 0; + reg.abs = 0; + return reg; +} + +static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst) +{ + return src_reg(dst.file, dst.index); +} + +static struct brw_fp_src src_undef( void ) +{ + return src_reg(TGSI_FILE_NULL, 0); +} + +static GLboolean src_is_undef(struct brw_fp_src src) +{ + return src.file == TGSI_FILE_NULL; +} + +static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w ) +{ + unsigned swz = reg.swizzle; + + reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 | + BRW_GET_SWZ(swz, y) << 2 | + BRW_GET_SWZ(swz, z) << 4 | + BRW_GET_SWZ(swz, w) << 6 ); + + return reg; +} + +static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct brw_fp_src src_abs( struct brw_fp_src src ) +{ + src.negate = 0; + src.abs = 1; + return src; +} + +static struct brw_fp_src src_negate( struct brw_fp_src src ) +{ + src.negate = 1; + src.abs = 0; + return src; +} + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + *swizzle = 0; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + +/* Internally generated immediates: overkill... + */ +static struct brw_fp_src src_imm( struct brw_wm_compile *c, + const GLfloat *v, + unsigned nr) +{ + unsigned i, j; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < c->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + if (c->nr_immediates < Elements(c->immediate)) { + i = c->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + c->error = 1; + return src_undef(); + +out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + + return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), + BRW_GET_SWZ(swizzle, X), + BRW_GET_SWZ(swizzle, Y), + BRW_GET_SWZ(swizzle, Z), + BRW_GET_SWZ(swizzle, W) ); +} + + + +static struct brw_fp_src src_imm1f( struct brw_wm_compile *c, + GLfloat f ) +{ + return src_imm(c, &f, 1); +} + +static struct brw_fp_src src_imm4f( struct brw_wm_compile *c, + GLfloat x, + GLfloat y, + GLfloat z, + GLfloat w) +{ + GLfloat f[4] = {x,y,z,w}; + return src_imm(c, f, 4); +} + + + +/*********************************************************************** + * Dest regs + */ + +static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) +{ + struct brw_fp_dst reg; + reg.file = file; + reg.index = idx; + reg.writemask = BRW_WRITEMASK_XYZW; + reg.indirect = 0; + reg.saturate = 0; + return reg; +} + +static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask ) +{ + reg.writemask &= mask; + return reg; +} + +static struct brw_fp_dst dst_undef( void ) +{ + return dst_reg(TGSI_FILE_NULL, 0); +} + +static boolean dst_is_undef( struct brw_fp_dst dst ) +{ + return dst.file == TGSI_FILE_NULL; +} + +static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag ) +{ + reg.saturate = flag; + return reg; +} + +static struct brw_fp_dst get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + debug_printf("%s: out of temporaries\n", __FILE__); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp ) +{ + c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->fp_instructions[c->nr_fp_insns++]; +} + +static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + GLuint tex_unit, + GLuint target, + GLuint sampler, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + struct brw_fp_instruction *inst = get_fp_inst(c); + + if (tex_unit || target) + assert(op == TGSI_OPCODE_TXP || + op == TGSI_OPCODE_TXB || + op == TGSI_OPCODE_TEX || + op == WM_FB_WRITE); + + inst->opcode = op; + inst->dst = dest; + inst->tex_unit = tex_unit; + inst->target = target; + inst->sampler = sampler; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + + return inst; +} + + +static INLINE void emit_op3(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2); +} + + +static INLINE void emit_op2(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef()); +} + +static INLINE void emit_op1(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef()); +} + +static INLINE void emit_op0(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest) +{ + emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef()); +} + + + +/* Many opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static void emit_scalar_insn(struct brw_wm_compile *c, + unsigned opcode, + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + unsigned first_chan = ffs(dst.writemask) - 1; + unsigned first_mask = 1 << first_chan; + + if (dst.writemask == 0) + return; + + emit_op3( c, opcode, + dst_mask(dst, first_mask), + src0, src1, src2 ); + + if (dst.writemask != first_mask) { + emit_op1(c, TGSI_OPCODE_MOV, + dst_mask(dst, ~first_mask), + src_scalar(src_reg_from_dst(dst), first_chan)); + } +} + + +/*********************************************************************** + * Special instructions for interpolation and other tasks + */ + +static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_pixel_xy)) { + struct brw_fp_dst pixel_xy = get_temp(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + + + /* Emit the out calculations, and hold onto the results. Use + * two instructions as a temporary is required. + */ + /* pixel_xy.xy = PIXELXY payload[0]; + */ + emit_op1(c, + WM_PIXELXY, + dst_mask(pixel_xy, BRW_WRITEMASK_XY), + payload_r0_depth); + + c->fp_pixel_xy = src_reg_from_dst(pixel_xy); + } + + return c->fp_pixel_xy; +} + +static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_delta_xy)) { + struct brw_fp_dst delta_xy = get_temp(c); + struct brw_fp_src pixel_xy = get_pixel_xy(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + + /* deltas.xy = DELTAXY pixel_xy, payload[0] + */ + emit_op3(c, + WM_DELTAXY, + dst_mask(delta_xy, BRW_WRITEMASK_XY), + pixel_xy, + payload_r0_depth, + src_undef()); + + c->fp_delta_xy = src_reg_from_dst(delta_xy); + } + + return c->fp_delta_xy; +} + +static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->fp_pixel_w)) { + struct brw_fp_dst pixel_w = get_temp(c); + struct brw_fp_src deltas = get_delta_xy(c); + + /* XXX: assuming position is always first -- valid? + */ + struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); + + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x + */ + emit_op3(c, + WM_PIXELW, + dst_mask(pixel_w, BRW_WRITEMASK_W), + interp_wpos, + deltas, + src_undef()); + + + c->fp_pixel_w = src_reg_from_dst(pixel_w); + } + + return c->fp_pixel_w; +} + + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + +static void emit_interp( struct brw_wm_compile *c, + GLuint idx, + GLuint semantic, + GLuint interp_mode ) +{ + struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx); + struct brw_fp_src deltas = get_delta_xy(c); + + /* Need to use PINTERP on attributes which have been + * multiplied by 1/W in the SF program, and LINTERP on those + * which have not: + */ + switch (semantic) { + case TGSI_SEMANTIC_POSITION: + /* Have to treat wpos.xy specially: + */ + emit_op1(c, + WM_WPOSXY, + dst_mask(dst, BRW_WRITEMASK_XY), + get_pixel_xy(c)); + + /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op2(c, + WM_LINTERP, + dst_mask(dst, BRW_WRITEMASK_ZW), + interp, + deltas); + break; + + case TGSI_SEMANTIC_COLOR: + if (c->key.flat_shade) { + emit_op1(c, + WM_CINTERP, + dst, + interp); + } + else if (interp_mode == TGSI_INTERPOLATE_LINEAR) { + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + } + else { + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + } + + break; + + case TGSI_SEMANTIC_FOG: + /* Interpolate the fog coordinate */ + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_X), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); + break; + + case TGSI_SEMANTIC_FACE: + /* XXX review/test this case */ + emit_op0(c, + WM_FRONTFACING, + dst_mask(dst, BRW_WRITEMASK_X)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); + break; + + case TGSI_SEMANTIC_PSIZE: + /* XXX review/test this case */ + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_XY), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_imm1f(c, 0.0f)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0f)); + break; + + default: + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + emit_op1(c, + WM_CINTERP, + dst, + interp); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + break; + } + break; + } +} + + +/*********************************************************************** + * Expand various instructions here to simpler forms. + */ +static void precalc_dst( struct brw_wm_compile *c, + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1 ) +{ + if (dst.writemask & BRW_WRITEMASK_Y) { + /* dst.y = mul src0.y, src1.y + */ + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(dst, BRW_WRITEMASK_Y), + src0, + src1); + } + + if (dst.writemask & BRW_WRITEMASK_XZ) { + /* dst.z = mov src0.zzzz + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_scalar(src0, Z)); + + /* dst.x = imm1f(1.0) + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_imm1f(c, 1.0)); + } + if (dst.writemask & BRW_WRITEMASK_W) { + /* dst.w = mov src1.w + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src1); + } +} + + +static void precalc_lit( struct brw_wm_compile *c, + struct brw_fp_dst dst, + struct brw_fp_src src0 ) +{ + if (dst.writemask & BRW_WRITEMASK_XW) { + /* dst.xw = imm(1.0f) + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0), + src_imm1f(c, 1.0f)); + } + + if (dst.writemask & BRW_WRITEMASK_YZ) { + emit_op1(c, + TGSI_OPCODE_LIT, + dst_mask(dst, BRW_WRITEMASK_YZ), + src0); + } +} + + +/** + * Some TEX instructions require extra code, cube map coordinate + * normalization, or coordinate scaling for RECT textures, etc. + * This function emits those extra instructions and the TEX + * instruction itself. + */ +static void precalc_tex( struct brw_wm_compile *c, + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0, + struct brw_fp_src sampler ) +{ + struct brw_fp_src coord = src_undef(); + struct brw_fp_dst tmp = dst_undef(); + + assert(unit < BRW_MAX_TEX_UNIT); + + /* Cubemap: find longest component of coord vector and normalize + * it. + */ + if (target == TGSI_TEXTURE_CUBE) { + struct brw_fp_src tmpsrc; + + tmp = get_temp(c); + tmpsrc = src_reg_from_dst(tmp); + + /* tmp = abs(src0) */ + emit_op1(c, + TGSI_OPCODE_MOV, + tmp, + src_abs(src0)); + + /* tmp.X = MAX(tmp.X, tmp.Y) */ + emit_op2(c, TGSI_OPCODE_MAX, + dst_mask(tmp, BRW_WRITEMASK_X), + src_scalar(tmpsrc, X), + src_scalar(tmpsrc, Y)); + + /* tmp.X = MAX(tmp.X, tmp.Z) */ + emit_op2(c, TGSI_OPCODE_MAX, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc, + src_scalar(tmpsrc, Z)); + + /* tmp.X = 1 / tmp.X */ + emit_op1(c, TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc); + + /* tmp = src0 * tmp.xxxx */ + emit_op2(c, TGSI_OPCODE_MUL, + tmp, + src0, + src_scalar(tmpsrc, X)); + + coord = tmpsrc; + } + else if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { + /* XXX: need a mechanism for internally generated constants. + */ + coord = src0; + } + else { + coord = src0; + } + + /* Need to emit YUV texture conversions by hand. Probably need to + * do this here - the alternative is in brw_wm_emit.c, but the + * conversion requires allocating a temporary variable which we + * don't have the facility to do that late in the compilation. + */ + if (c->key.yuvtex_mask & (1 << unit)) { + /* convert ycbcr to RGBA */ + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<<unit); + struct brw_fp_dst tmp = get_temp(c); + struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); + struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); + struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); + + /* tmp = TEX ... + */ + emit_tex_op(c, + TGSI_OPCODE_TEX, + dst_saturate(tmp, dst.saturate), + unit, + target, + sampler.index, + coord, + src_undef(), + src_undef()); + + /* tmp.xyz = ADD TMP, C0 + */ + emit_op2(c, TGSI_OPCODE_ADD, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + tmpsrc, + C0); + + /* YUV.y = MUL YUV.y, C0.w + */ + emit_op2(c, TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_Y), + tmpsrc, + src_scalar(C0, W)); + + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y + */ + + emit_op3(c, TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + ( swap_uv ? + src_swizzle(tmpsrc, Z,Z,X,X) : + src_swizzle(tmpsrc, X,X,Z,Z)), + C1, + src_scalar(tmpsrc, Y)); + + /* RGB.y = MAD YUV.z, C1.w, RGB.y + */ + emit_op3(c, + TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_Y), + src_scalar(tmpsrc, Z), + src_scalar(C1, W), + src_scalar(src_reg_from_dst(dst), Y)); + + release_temp(c, tmp); + } + else { + /* ordinary RGBA tex instruction */ + emit_tex_op(c, + TGSI_OPCODE_TEX, + dst, + unit, + target, + sampler.index, + coord, + src_undef(), + src_undef()); + } + + /* XXX: add GL_EXT_texture_swizzle support to gallium -- by + * generating shader varients in mesa state tracker. + */ + + /* Release this temp if we ended up allocating it: + */ + if (!dst_is_undef(tmp)) + release_temp(c, tmp); +} + + +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ +static GLboolean projtex( struct brw_wm_compile *c, + unsigned target, + struct brw_fp_src src ) +{ + /* Only try to detect the simplest cases. Could detect (later) + * cases where we are trying to emit code like RCP {1.0}, MUL x, + * {1.0}, and so on. + * + * More complex cases than this typically only arise from + * user-provided fragment programs anyway: + */ + if (target == TGSI_TEXTURE_CUBE) + return GL_FALSE; /* ut2004 gun rendering !?! */ + + if (src.file == TGSI_FILE_INPUT && + BRW_GET_SWZ(src.swizzle, W) == W && + c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Emit code for TXP. + */ +static void precalc_txp( struct brw_wm_compile *c, + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0, + struct brw_fp_src sampler ) +{ + if (projtex(c, target, src0)) { + struct brw_fp_dst tmp = get_temp(c); + + /* tmp0.w = RCP inst.arg[0][3] + */ + emit_op1(c, + TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_W), + src_scalar(src0, W)); + + /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww + */ + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + src0, + src_scalar(src_reg_from_dst(tmp), W)); + + /* dst = TEX tmp0 + */ + precalc_tex(c, + dst, + target, + unit, + src_reg_from_dst(tmp), + sampler ); + + release_temp(c, tmp); + } + else + { + /* dst = TEX src0 + */ + precalc_tex(c, dst, target, unit, src0, sampler); + } +} + + +/* XXX: note this returns a src_reg. + */ +static struct brw_fp_src +find_output_by_semantic( struct brw_wm_compile *c, + unsigned semantic, + unsigned index ) +{ + const struct tgsi_shader_info *info = &c->fp->info; + unsigned i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return src_reg( TGSI_FILE_OUTPUT, i ); + + /* If not found, return some arbitrary immediate value: + * + * XXX: this is a good idea but immediates are up generating extra + * curbe entries atm, as they would have in the original driver. + */ + return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */ +} + + +static void emit_fb_write( struct brw_wm_compile *c ) +{ + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0); + GLuint i; + + + outdepth = src_scalar(outdepth, Z); + + for (i = 0 ; i < c->key.nr_cbufs; i++) { + struct brw_fp_src outcolor; + + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + + /* Use emit_tex_op so that we can specify the inst->target + * field, which is abused to contain the FB write target and the + * EOT marker + */ + emit_tex_op(c, WM_FB_WRITE, + dst_undef(), + (i == c->key.nr_cbufs - 1), /* EOT */ + i, + 0, /* no sampler */ + outcolor, + payload_r0_depth, + outdepth); + } +} + + +static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, + const struct tgsi_full_dst_register *dst, + unsigned saturate ) +{ + struct brw_fp_dst out; + + out.file = dst->Register.File; + out.index = dst->Register.Index; + out.writemask = dst->Register.WriteMask; + out.indirect = dst->Register.Indirect; + out.saturate = (saturate == TGSI_SAT_ZERO_ONE); + + if (out.indirect) { + assert(dst->Indirect.File == TGSI_FILE_ADDRESS); + assert(dst->Indirect.Index == 0); + } + + return out; +} + + +static struct brw_fp_src translate_src( struct brw_wm_compile *c, + const struct tgsi_full_src_register *src ) +{ + struct brw_fp_src out; + + out.file = src->Register.File; + out.index = src->Register.Index; + out.indirect = src->Register.Indirect; + + out.swizzle = ((src->Register.SwizzleX << 0) | + (src->Register.SwizzleY << 2) | + (src->Register.SwizzleZ << 4) | + (src->Register.SwizzleW << 6)); + + switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { + case TGSI_UTIL_SIGN_CLEAR: + out.abs = 1; + out.negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + out.abs = 1; + out.negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + out.abs = 0; + out.negate = 1; + break; + + case TGSI_UTIL_SIGN_KEEP: + default: + out.abs = 0; + out.negate = 0; + break; + } + + if (out.indirect) { + assert(src->Indirect.File == TGSI_FILE_ADDRESS); + assert(src->Indirect.Index == 0); + } + + return out; +} + + + +static void emit_insn( struct brw_wm_compile *c, + const struct tgsi_full_instruction *inst ) +{ + unsigned opcode = inst->Instruction.Opcode; + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + int i; + + dst = translate_dst( c, &inst->Dst[0], + inst->Instruction.Saturate ); + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) + src[i] = translate_src( c, &inst->Src[i] ); + + switch (opcode) { + case TGSI_OPCODE_ABS: + emit_op1(c, TGSI_OPCODE_MOV, + dst, + src_abs(src[0])); + break; + + case TGSI_OPCODE_SUB: + emit_op2(c, TGSI_OPCODE_ADD, + dst, + src[0], + src_negate(src[1])); + break; + + case TGSI_OPCODE_SCS: + emit_op1(c, TGSI_OPCODE_SCS, + dst_mask(dst, BRW_WRITEMASK_XY), + src[0]); + break; + + case TGSI_OPCODE_DST: + precalc_dst(c, dst, src[0], src[1]); + break; + + case TGSI_OPCODE_LIT: + precalc_lit(c, dst, src[0]); + break; + + case TGSI_OPCODE_TEX: + precalc_tex(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ + break; + + case TGSI_OPCODE_TXP: + precalc_txp(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ + break; + + case TGSI_OPCODE_TXB: + /* XXX: TXB not done + */ + precalc_tex(c, dst, + inst->Texture.Texture, + src[1].index, /* use sampler unit for tex idx*/ + src[0], + src[1]); + break; + + case TGSI_OPCODE_XPD: + emit_op2(c, TGSI_OPCODE_XPD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + src[0], + src[1]); + break; + + case TGSI_OPCODE_KIL: + emit_op1(c, TGSI_OPCODE_KIL, + dst_mask(dst_undef(), 0), + src[0]); + break; + + case TGSI_OPCODE_END: + emit_fb_write(c); + break; + default: + if (!c->key.has_flow_control && + brw_wm_is_scalar_result(opcode)) + emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); + else + emit_op3(c, opcode, dst, src[0], src[1], src[2]); + break; + } +} + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ +int brw_wm_pass_fp( struct brw_wm_compile *c ) +{ + struct brw_fragment_shader *fs = c->fp; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + struct tgsi_full_declaration *decl; + const float *imm; + GLuint size; + GLuint i; + + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pre-fp:\n"); + tgsi_dump(fs->tokens, 0); + } + + c->fp_pixel_xy = src_undef(); + c->fp_delta_xy = src_undef(); + c->fp_pixel_w = src_undef(); + c->nr_fp_insns = 0; + c->nr_immediates = 0; + + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ + tgsi_parse_init( &parse, fs->tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Turn intput declarations into special WM_* instructions. + * + * XXX: For non-branching shaders, consider deferring variable + * initialization as late as possible to minimize register + * usage. This is how the original BRW driver worked. + * + * In a branching shader, must preamble instructions at decl + * time, as instruction order in the shader does not + * correspond to the order instructions are executed in the + * wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from the payload registers and pixel + * position. + */ + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->Range.First; + last = decl->Range.Last; + mask = decl->Declaration.UsageMask; + + for (attrib = first; attrib <= last; attrib++) { + emit_interp(c, + attrib, + decl->Semantic.Name, + decl->Declaration.Interpolate ); + } + } + + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* Unlike VS programs we can probably manage fine encoding + * immediate values directly into the emitted EU + * instructions, as we probably only need to reference one + * float value per instruction. Just save the data for now + * and use directly later. + */ + i = c->nr_immediates++; + imm = &parse.FullToken.FullImmediate.u[i].Float; + size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + if (c->nr_immediates >= BRW_WM_MAX_CONST) + return PIPE_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates].v[i] = imm[i]; + + for (; i < 4; i++) + c->immediate[c->nr_immediates].v[i] = 0.0; + + c->immediate[c->nr_immediates].nr = size; + c->nr_immediates++; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn(c, inst); + break; + } + } + + if (BRW_DEBUG & DEBUG_WM) { + brw_wm_print_fp_program( c, "pass_fp" ); + debug_printf("\n"); + } + + return c->error; +} + |