diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
47 files changed, 1151 insertions, 1724 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 6e9a9a29a3..7a55333e89 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -101,6 +100,5 @@ DRI_LIB_DEPS += -ldrm_intel include ../Makefile.template -symlinks: intel_decode.o: ../intel/intel_decode.c intel_tex_layout.o: ../intel/intel_tex_layout.c diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 1088a7a607..ab301b9a3a 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -34,6 +34,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" +#include "intel_fbo.h" #include "main/macros.h" #include "main/enums.h" @@ -44,17 +45,24 @@ static void prepare_cc_vp( struct brw_context *brw ) memset(&ccv, 0, sizeof(ccv)); - /* _NEW_VIEWPORT */ - ccv.min_depth = ctx->Viewport.Near; - ccv.max_depth = ctx->Viewport.Far; + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } dri_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); } const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = _NEW_VIEWPORT, + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -82,6 +90,28 @@ struct brw_cc_unit_key { GLenum depth_func; }; +/** + * Modify blend function to force destination alpha to 1.0 + * + * If \c function specifies a blend function that uses destination alpha, + * replace it with a function that hard-wires destination alpha to 1.0. This + * is used when rendering to xRGB targets. + */ +static GLenum +fix_xRGB_alpha(GLenum function) +{ + switch (function) { + case GL_DST_ALPHA: + return GL_ONE; + + case GL_ONE_MINUS_DST_ALPHA: + case GL_SRC_ALPHA_SATURATE: + return GL_ZERO; + } + + return function; +} + static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { @@ -125,6 +155,17 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) key->blend_dst_rgb = ctx->Color.BlendDstRGB; key->blend_src_a = ctx->Color.BlendSrcA; key->blend_dst_a = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPAH with GL_ZERO. + */ + if (ctx->Visual.alphaBits == 0) { + key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); + key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); + key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); + key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); + } } key->alpha_enabled = ctx->Color.AlphaEnabled; diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 20a927cf38..dbd10a5297 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw, delta = REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<<i)) { + if (c.key.attrs & BITFIELD64_BIT(i)) { c.offset[i] = delta; delta += ATTR_SIZE; } @@ -156,6 +156,7 @@ static void upload_clip_prog(struct brw_context *brw) key.attrs = brw->vs.prog_data->outputs_written; /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index 957df441ab..1c6825510a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -42,22 +42,21 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; + GLuint pv_first:1; GLuint do_unfilled:1; GLuint fill_cw:2; /* includes cull information */ GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:17; - GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:27; - + GLuint pad0:11; + GLfloat offset_factor; GLfloat offset_units; }; diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 048ca620fa..fa9648f50f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -269,8 +269,12 @@ void brw_emit_line_clip( struct brw_clip_compile *c ) brw_clip_line_alloc_regs(c); brw_clip_init_ff_sync(c); - if (c->key.do_flat_shading) - brw_clip_copy_colors(c, 0, 1); + if (c->key.do_flat_shading) { + if (c->key.pv_first) + brw_clip_copy_colors(c, 1, 0); + else + brw_clip_copy_colors(c, 0, 1); + } clip_and_emit_line(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 0efd77225e..cf79224be4 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -188,14 +188,20 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) brw_imm_ud(_3DPRIM_POLYGON)); is_poly = brw_IF(p, BRW_EXECUTE_1); - { + { brw_clip_copy_colors(c, 1, 0); brw_clip_copy_colors(c, 2, 0); } is_poly = brw_ELSE(p, is_poly); { - brw_clip_copy_colors(c, 0, 2); - brw_clip_copy_colors(c, 1, 2); + if (c->key.pv_first) { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + else { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } } brw_ENDIF(p, is_poly); } diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3c5b848319..78bea82949 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -105,11 +105,15 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, ctx->Const.MaxTextureImageUnits); ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.MaxVertexTextureImageUnits + + ctx->Const.MaxTextureImageUnits; /* Mesa limits textures to 4kx4k; it would be nice to fix that someday */ @@ -125,6 +129,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; + ctx->Shader.EmitNVTempInitialization = GL_TRUE; ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); ctx->Const.VertexProgram.MaxAluInstructions = 0; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a5209ac41b..e73e21433c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -115,7 +115,7 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 + #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -172,8 +172,8 @@ struct brw_fragment_program { GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ - dri_bo *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; + dri_bo *const_buffer; /** Program constant buffer/surface */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -231,7 +231,7 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint outputs_written; + GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -252,20 +252,23 @@ struct brw_vs_ouput_sizes { /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + /** * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) /** * Size of surface binding table for the VS. @@ -317,7 +320,6 @@ struct brw_cache_item { GLuint nr_reloc_bufs; dri_bo *bo; - GLuint data_size; struct brw_cache_item *next; }; @@ -330,7 +332,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; @@ -410,23 +411,6 @@ struct brw_vertex_info { GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; - - - -/* Cache for TNL programs. - */ -struct brw_tnl_cache_item { - GLuint hash; - void *key; - void *data; - struct brw_tnl_cache_item *next; -}; - -struct brw_tnl_cache { - struct brw_tnl_cache_item **items; - GLuint size, n_items; -}; - struct brw_query_object { struct gl_query_object Base; @@ -454,8 +438,6 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; - GLboolean tmp_fallback; - GLboolean no_batch_wrap; struct { struct brw_state_flags dirty; @@ -705,10 +687,6 @@ void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ -void brwUpdateTextureState( struct intel_context *intel ); -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); @@ -763,9 +741,5 @@ brw_fragment_program_const(const struct gl_fragment_program *p) return (const struct brw_fragment_program *) p; } - - -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) - #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 0b0e6931a0..aadcfbe2da 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -130,7 +130,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT, .cache = CACHE_NEW_WM_PROG }, .prepare = calculate_curbe_offsets @@ -248,6 +248,9 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + /* Updates the ParamaterValues[i] pointers for all parameters of the * basic type of PROGRAM_STATE_VAR. */ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 78d457ad2b..c19510bbd4 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -673,18 +673,10 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG 3 /* for IGDNG only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 44bb7bd588..7ad860898f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -145,7 +145,7 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ - brw->no_batch_wrap = GL_TRUE; + intel->no_batch_wrap = GL_TRUE; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -153,21 +153,17 @@ static void brw_emit_prim(struct brw_context *brw, * the besides the draw code. */ if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } - brw->no_batch_wrap = GL_FALSE; + intel->no_batch_wrap = GL_FALSE; } static void brw_merge_inputs( struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 765ae5a2fe..7c796dae93 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -243,14 +243,6 @@ static void wrap_buffers( struct brw_context *brw, dri_bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); } static void get_space( struct brw_context *brw, @@ -375,7 +367,7 @@ static void brw_prepare_vertices(struct brw_context *brw) * isn't an issue at this point. */ if (brw->vb.nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; + intel->Fallback = GL_TRUE; /* boolean, not bitfield */ return; } @@ -427,7 +419,7 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Position array not properly enabled: */ if (input->glarray->StrideB == 0) { - intel->Fallback = 1; + intel->Fallback = GL_TRUE; /* boolean, not bitfield */ return; } @@ -536,16 +528,9 @@ static void brw_emit_vertices(struct brw_context *brw) I915_GEM_DOMAIN_VERTEX, 0, input->offset); if (BRW_IS_IGDNG(brw)) { - if (input->stride) { - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->stride * input->count); - } else { - assert(input->count == 1); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->element_size); - } + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->bo->size - 1); } else OUT_BATCH(input->stride ? input->count : 0); OUT_BATCH(0); /* Instance data step rate */ @@ -726,7 +711,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) brw->ib.offset); OUT_RELOC(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + brw->ib.size); + brw->ib.offset + brw->ib.size - 1); OUT_BATCH( 0 ); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 30603bdd0e..39eb88d7c2 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; - if (type == BRW_GENERAL_REGISTER_FILE) + if (file == BRW_GENERAL_REGISTER_FILE) assert(nr < BRW_MAX_GRF); - else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < BRW_MAX_MRF); - else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); reg.type = type; @@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { - assert(nr < BRW_MAX_MRF); + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 241cdc33f8..7ceabba288 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { - if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) assert(dest.nr < 128); insn->bits1.da1.dest_reg_file = dest.file; diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index d27c6c24ca..562a17844b 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -133,7 +133,11 @@ const struct brw_tracked_state brw_check_fallback = { -/* Not used: +/** + * Called by the INTEL_FALLBACK() macro. + * NOTE: this is a no-op for the i965 driver. The brw->intel.Fallback + * field is treated as a boolean, not a bitmask. It's only set in a + * couple of places. */ void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode ) { diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 48c2b9a41c..610b6c35e2 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -85,10 +85,10 @@ static void compile_gs_prog( struct brw_context *brw, */ switch (key->primitive) { case GL_QUADS: - brw_gs_quads( &c ); + brw_gs_quads( &c, key ); break; case GL_QUAD_STRIP: - brw_gs_quad_strip( &c ); + brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: brw_gs_lines( &c ); @@ -149,6 +149,7 @@ static const GLenum gs_prim[GL_POLYGON+1] = { static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -158,6 +159,9 @@ static void populate_key( struct brw_context *brw, key->primitive = gs_prim[brw->primitive]; key->hint_gs_always = 0; /* debug code? */ + + /* _NEW_LIGHT */ + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); key->need_gs_prog = (key->hint_gs_always || brw->primitive == GL_QUADS || @@ -193,7 +197,7 @@ static void prepare_gs_prog(struct brw_context *brw) const struct brw_tracked_state brw_gs_prog = { .dirty = { - .mesa = 0, + .mesa = _NEW_LIGHT, .brw = BRW_NEW_PRIMITIVE, .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index bbb991ea2e..010c1c2352 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -40,11 +40,12 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint hint_gs_always:1; + GLuint pv_first:1; GLuint need_gs_prog:1; - GLuint pad:26; + GLuint pad:25; }; struct brw_gs_compile { @@ -67,8 +68,8 @@ struct brw_gs_compile { #define ATTR_SIZE (4*4) -void brw_gs_quads( struct brw_gs_compile *c ); -void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); +void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); void brw_gs_tris( struct brw_gs_compile *c ); void brw_gs_lines( struct brw_gs_compile *c ); void brw_gs_points( struct brw_gs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index a9b2aa2eac..0fc5b02c61 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -120,7 +120,7 @@ static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) } -void brw_gs_quads( struct brw_gs_compile *c ) +void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { brw_gs_alloc_regs(c, 4); @@ -128,23 +128,39 @@ void brw_gs_quads( struct brw_gs_compile *c ) * is the PV for quads, but vertex 0 for polygons: */ if (c->need_ff_sync) - brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + brw_gs_ff_sync(c, 1); + if (key->pv_first) { + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } + else { + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } } -void brw_gs_quad_strip( struct brw_gs_compile *c ) +void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) { brw_gs_alloc_regs(c, 4); if (c->need_ff_sync) brw_gs_ff_sync(c, 1); - brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); - brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); - brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + if (key->pv_first) { + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } + else { + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); + } } void brw_gs_tris( struct brw_gs_compile *c ) diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index ea71857548..4b0d598336 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -66,7 +66,7 @@ static void upload_blend_constant_color(struct brw_context *brw) const struct brw_tracked_state brw_blend_constant_color = { .dirty = { .mesa = _NEW_COLOR, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_blend_constant_color @@ -93,7 +93,7 @@ static void upload_drawing_rect(struct brw_context *brw) const struct brw_tracked_state brw_drawing_rect = { .dirty = { .mesa = _NEW_BUFFERS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_drawing_rect @@ -317,7 +317,7 @@ static void upload_polygon_stipple(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple = { .dirty = { .mesa = _NEW_POLYGONSTIPPLE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple @@ -362,7 +362,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { .mesa = _NEW_WINDOW_POS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple_offset @@ -425,7 +425,7 @@ static void upload_line_stipple(struct brw_context *brw) const struct brw_tracked_state brw_line_stipple = { .dirty = { .mesa = _NEW_LINE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_line_stipple diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index e1c2c7777b..968890f7fb 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -61,7 +61,7 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = brw_count_bits(c.key.attrs); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; @@ -70,7 +70,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<<i)) { + if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { @@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw) * edgeflag testing here, it is already done in the clip * program. */ - if (key.attrs & (1<<VERT_RESULT_EDGE)) + if (key.attrs & BITFIELD64_BIT(VERT_RESULT_EDGE)) key.primitive = SF_UNFILLED_TRIS; else key.primitive = SF_TRIANGLES; @@ -161,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; - key.SpriteOrigin = ctx->Point.SpriteOrigin; + key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 6426b6df9f..0ba731fac9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -45,19 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint linear_color:1; /**< linear interp vs. perspective interp */ - GLuint pad:25; - GLenum SpriteOrigin; + GLuint sprite_origin_lower_left:1; + GLuint pad:24; }; struct brw_sf_point_tex { - GLboolean CoordReplace; + GLboolean CoordReplace; }; struct brw_sf_compile { diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index ca8f97f9f9..3eae41ee74 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -56,7 +56,7 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c, static GLboolean have_attr(struct brw_sf_compile *c, GLuint attr) { - return (c->key.attrs & (1<<attr)) ? 1 : 0; + return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; } /*********************************************************************** @@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * Flat shading */ -#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ - (1<<VERT_RESULT_COL1)) +#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \ + BITFIELD64_BIT(VERT_RESULT_COL1)) static void copy_colors( struct brw_sf_compile *c, struct brw_reg dst, @@ -312,8 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask; - GLuint linear_mask; + GLbitfield64 persp_mask; + GLbitfield64 linear_mask; if (c->key.do_flat_shading || c->key.linear_color) persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | @@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } @@ -551,7 +551,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); @@ -570,7 +570,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { brw_set_predicate_control_flag_value(p, pc); if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index bc0f076073..bb69435ec0 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -93,7 +93,8 @@ static void upload_sf_vp(struct brw_context *brw) } dri_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), + NULL, 0); } const struct brw_tracked_state brw_sf_vp = { @@ -113,7 +114,8 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; - GLenum front_face, cull_face, provoking_vertex; + GLenum front_face, cull_face; + unsigned pv_first:1; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -154,7 +156,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_attenuated = ctx->Point._Attenuated; /* _NEW_LIGHT */ - key->provoking_vertex = ctx->Light.ProvokingVertex; + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -287,7 +289,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + if (!key->pv_first) { sf.sf7.trifan_pv = 2; sf.sf7.linestrip_pv = 1; sf.sf7.tristrip_pv = 2; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 78572356a3..b129b1f1c3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -86,9 +86,6 @@ const struct brw_tracked_state brw_psp_urb_cbs; const struct brw_tracked_state brw_pipe_control; -const struct brw_tracked_state brw_clear_surface_cache; -const struct brw_tracked_state brw_clear_batch_cache; - const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; @@ -115,6 +112,7 @@ void brw_validate_state(struct brw_context *brw); void brw_upload_state(struct brw_context *brw); void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); +void brw_clear_validated_bos(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c @@ -122,16 +120,10 @@ void brw_destroy_state(struct brw_context *brw); dri_bo *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, + GLuint size, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs); - dri_bo *brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -154,6 +146,7 @@ void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); /*********************************************************************** * brw_state_batch.c @@ -165,7 +158,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); void brw_destroy_batch_cache( struct brw_context *brw ); -void brw_clear_batch_cache_flush( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); /* brw_wm_surface_state.c */ dri_bo * diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index 811940edc0..7821898cf9 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -79,7 +79,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, return GL_TRUE; } -static void clear_batch_cache( struct brw_context *brw ) +void brw_clear_batch_cache( struct brw_context *brw ) { struct brw_cached_batch_item *item = brw->cached_batch_items; @@ -93,18 +93,7 @@ static void clear_batch_cache( struct brw_context *brw ) brw->cached_batch_items = NULL; } -void brw_clear_batch_cache_flush( struct brw_context *brw ) -{ - clear_batch_cache(brw); - - brw->state.dirty.mesa |= ~0; - brw->state.dirty.brw |= ~0; - brw->state.dirty.cache |= ~0; -} - - - void brw_destroy_batch_cache( struct brw_context *brw ) { - clear_batch_cache(brw); + brw_clear_batch_cache(brw); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index e40d7a0416..e4c9ba7d87 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -245,7 +245,6 @@ brw_upload_cache( struct brw_cache *cache, item->bo = bo; dri_bo_reference(bo); - item->data_size = data_size; if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -275,15 +274,22 @@ brw_upload_cache( struct brw_cache *cache, /** - * This doesn't really work with aux data. Use search/upload instead + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + * + * If aux data is involved, use search/upload instead. + */ dri_bo * -brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { dri_bo *bo; struct brw_cache_item *item; @@ -306,25 +312,6 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } - -/** - * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. - * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be - * better to use, as the potentially changing offsets in the data-used-as-key - * will result in excessive cache misses. - */ -dri_bo * -brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) -{ - return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], - reloc_bufs, nr_reloc_bufs); -} - enum pool_type { DW_SURFACE_STATE, DW_GENERAL_STATE @@ -335,11 +322,9 @@ static void brw_init_cache_id(struct brw_cache *cache, const char *name, enum brw_cache_id id, - GLuint key_size, GLuint aux_size) { cache->name[id] = strdup(name); - cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } @@ -359,91 +344,76 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, - sizeof(struct brw_cc_viewport), 0); brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), 0); brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), 0); brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, - 0, /* variable key/data size */ 0); brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), 0); brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, - sizeof(struct brw_sf_viewport), 0); brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), 0); brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), 0); brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), 0); brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), 0); brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); } @@ -463,13 +433,11 @@ brw_init_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, - sizeof(struct brw_surface_state), 0); brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, - 0, 0); } @@ -517,6 +485,41 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) brw->state.dirty.cache |= ~0; } +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + + if (drm_intel_bo_references(c->bo, bo)) { + int j; + + *prev = c->next; + + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + cache->n_items--; + } else { + prev = &c->next; + } + } + } +} void brw_state_cache_check_size(struct brw_context *brw) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 414620d0b3..af8dfb4c15 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "intel_buffers.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -142,7 +143,7 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } -static void +void brw_clear_validated_bos(struct brw_context *brw) { int i; @@ -287,6 +288,7 @@ void brw_validate_state( struct brw_context *brw ) if (brw->emit_state_always) { state->mesa |= ~0; state->brw |= ~0; + state->cache |= ~0; } if (brw->fragment_program != ctx->FragmentProgram._Current) { @@ -305,9 +307,9 @@ void brw_validate_state( struct brw_context *brw ) return; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) - brw_clear_batch_cache_flush(brw); + brw_clear_batch_cache(brw); - brw->intel.Fallback = 0; + brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { @@ -323,6 +325,8 @@ void brw_validate_state( struct brw_context *brw ) } } + intel_check_front_buffer_rendering(intel); + /* Make sure that the textures which are referenced by the current * brw fragment program are actually present/valid. * If this fails, we can experience GPU lock-ups. diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 71bff166dd..e911b105b2 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -39,38 +39,6 @@ #include "intel_tex.h" #include "brw_context.h" - -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ) -{ - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - struct gl_texture_object *obj; - struct gl_texture_image *img; - - intel->frame_buffer_texobj = obj = - ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); - - obj->MinFilter = GL_NEAREST; - obj->MagFilter = GL_NEAREST; - - img = ctx->Driver.NewTextureImage( ctx ); - - _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, - region->pitch, region->height, 1, 0, - region->cpp == 4 ? GL_RGBA : GL_RGB ); - - _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); -} - -void brw_FrameBufferTexDestroy( struct brw_context *brw ) -{ - if (brw->intel.frame_buffer_texobj != NULL) - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); - brw->intel.frame_buffer_texobj = NULL; -} - /** * Finalizes all textures, completing any rendering that needs to be done * to prepare them. diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 5986cbffad..e59e52ed86 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -86,10 +86,10 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); if (mt->compressed) { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4; mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h); mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; } @@ -102,7 +102,8 @@ GLboolean brw_miptree_layout(struct intel_context *intel, height, 1); for (q = 0; q < nr_images; q++) - intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + intel_miptree_set_image_offset(mt, level, q, + x, y + q * qpitch); if (mt->compressed) img_height = MAX2(1, height/4); diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index ce21aa4869..bba9249d1b 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -35,7 +35,7 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) +GLuint brw_count_bits(uint64_t val) { GLuint i; for (i = 0; val ; val >>= 1) diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 33e7cd87e4..04f3175d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,7 +35,7 @@ #include "main/mtypes.h" -extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_count_bits(uint64_t val); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index e3111c6680..fd055e225e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } @@ -90,8 +90,6 @@ static void brw_upload_vs_prog(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - assert (vp && !vp->program.IsNVProgram); - memset(&key, 0, sizeof(key)); /* Just upload the program verbatim for now. Always send it all diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 1638ef8111..00efd3443d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -147,7 +147,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { @@ -331,63 +331,65 @@ static void unalias3( struct brw_vs_compile *c, } } -static void emit_sop( struct brw_compile *p, +static void emit_sop( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1, GLuint cond) { + struct brw_compile *p = &c->func; + brw_MOV(p, dst, brw_imm_f(0.0f)); brw_CMP(p, brw_null_reg(), cond, arg0, arg1); brw_MOV(p, dst, brw_imm_f(1.0f)); brw_set_predicate_control_flag_value(p, 0xff); } -static void emit_seq( struct brw_compile *p, +static void emit_seq( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_EQ); } -static void emit_sne( struct brw_compile *p, +static void emit_sne( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); } -static void emit_slt( struct brw_compile *p, +static void emit_slt( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_L); } -static void emit_sle( struct brw_compile *p, +static void emit_sle( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_LE); } -static void emit_sgt( struct brw_compile *p, +static void emit_sgt( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_G); } -static void emit_sge( struct brw_compile *p, +static void emit_sge( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1 ) { - emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); + emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_GE); } static void emit_max( struct brw_compile *p, @@ -912,6 +914,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: + case PROGRAM_LOCAL_PARAM: if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } @@ -930,7 +933,6 @@ get_src_reg( struct brw_vs_compile *c, /* this is a normal case since we loop over all three src args */ return brw_null_reg(); - case PROGRAM_LOCAL_PARAM: case PROGRAM_WRITE_ONLY: default: assert(0); @@ -1122,7 +1124,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); @@ -1132,7 +1134,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1222,7 +1224,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ GLuint i, mrf = 0; for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { /* move from GRF to MRF */ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); mrf++; @@ -1269,6 +1271,38 @@ post_vs_emit( struct brw_vs_compile *c, } } +static GLboolean +accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1]; + + if (p->nr_insn == 0) + return GL_FALSE; + + if (val.address_mode != BRW_ADDRESS_DIRECT) + return GL_FALSE; + + switch (prev_insn->header.opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_MAC: + case BRW_OPCODE_MUL: + if (prev_insn->header.access_mode == BRW_ALIGN_16 && + prev_insn->header.execution_size == val.width && + prev_insn->bits1.da1.dest_reg_file == val.file && + prev_insn->bits1.da1.dest_reg_type == val.type && + prev_insn->bits1.da1.dest_address_mode == val.address_mode && + prev_insn->bits1.da1.dest_reg_nr == val.nr && + prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 && + prev_insn->bits1.da16.dest_writemask == 0xf) + return GL_TRUE; + else + return GL_FALSE; + default: + return GL_FALSE; + } +} + static uint32_t get_predicate(const struct prog_instruction *inst) { @@ -1447,7 +1481,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; case OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); + if (!accumulator_contains(c, args[2])) + brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; case OPCODE_MAX: @@ -1473,25 +1508,25 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_SEQ: - emit_seq(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_seq); break; case OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_SNE: - emit_sne(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sne); break; case OPCODE_SGE: - emit_sge(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sge); break; case OPCODE_SGT: - emit_sgt(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sgt); break; case OPCODE_SLT: - emit_slt(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_slt); break; case OPCODE_SLE: - emit_sle(p, dst, args[0], args[1]); + unalias2(c, dst, args[0], args[1], emit_sle); break; case OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d790ab6555..7285466645 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -109,10 +109,39 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) - vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; - else - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + case 64: + case 96: + case 128: + case 168: + case 192: + case 224: + case 256: + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + break; + default: + assert(0); + } + } else { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + break; + case 64: + assert(BRW_IS_G4X(brw)); + break; + default: + assert(0); + } + vs.thread4.nr_urb_entries = key->nr_urb_entries; + } vs.thread4.urb_entry_allocation_size = key->urb_size - 1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 89f47522a1..3bc9840a97 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -30,7 +30,6 @@ */ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -53,6 +52,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw) const struct gl_program_parameter_list *params = vp->program.Base.Parameters; const int size = params->NumParameters * 4 * sizeof(GLfloat); drm_intel_bo *const_buffer; + int i; /* BRW_NEW_VERTEX_PROGRAM */ if (!vp->use_const_buffer) @@ -62,7 +62,19 @@ brw_vs_update_constant_buffer(struct brw_context *brw) size, 64); /* _NEW_PROGRAM_CONSTANTS */ - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); + + intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); + for (i = 0; i < params->NumParameters; i++) { + memcpy(const_buffer->virtual + i * 4 * sizeof(float), + params->ParameterValues[i], + 4 * sizeof(float)); + } + intel_bo_unmap_gtt_preferred(intel, const_buffer); return const_buffer; } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ac11790151..72749b3859 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -46,7 +46,7 @@ #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" - +#include "brw_wm.h" static void dri_bo_release(dri_bo **bo) @@ -66,10 +66,14 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); - - _mesa_free(brw->wm.compile_data); - - brw_FrameBufferTexDestroy( brw ); + brw_clear_validated_bos(brw); + if (brw->wm.compile_data) { + _mesa_free(brw->wm.compile_data->instruction); + _mesa_free(brw->wm.compile_data->vreg); + _mesa_free(brw->wm.compile_data->refs); + _mesa_free(brw->wm.compile_data->prog_instructions); + _mesa_free(brw->wm.compile_data); + } for (i = 0; i < brw->state.nr_color_regions; i++) intel_region_release(&brw->state.color_regions[i]); @@ -146,9 +150,6 @@ static void brw_new_batch( struct intel_context *intel ) { struct brw_context *brw = brw_context(&intel->ctx); - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!brw->no_batch_wrap); - brw->curbe.need_new_bo = GL_TRUE; /* Mark all context state as needing to be re-emitted. @@ -177,20 +178,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } -/* called from intelWaitForIdle() and intelFlush() - * - * For now, just flush everything. Could be smarter later. - */ -static GLuint brw_flush_cmd( void ) -{ - struct brw_mi_flush flush; - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_STATE_CACHE; - return *(GLuint *)&flush; -} - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -211,6 +198,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; - brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 2292de94c4..6895f64410 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -29,7 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" @@ -153,8 +152,21 @@ static void do_wm_prog( struct brw_context *brw, */ return; } + c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + sizeof(*c->prog_instructions)); + c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); } else { + void *instruction = c->instruction; + void *prog_instructions = c->prog_instructions; + void *vreg = c->vreg; + void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); + c->instruction = instruction; + c->prog_instructions = prog_instructions; + c->vreg = vreg; + c->refs = refs; } memcpy(&c->key, key, sizeof(*key)); @@ -218,7 +230,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -288,7 +300,7 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; - if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) + if (img->TexFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } @@ -309,6 +321,9 @@ static void brw_wm_populate_key( struct brw_context *brw, * from the incoming screen origin relative position we get as part of our * payload. * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * * We could avoid recompiling by including this as a constant referenced by * our program, but if we were to do that it would also be nice to handle * getting that constant updated at batchbuffer submit time (when we @@ -317,17 +332,21 @@ static void brw_wm_populate_key( struct brw_context *brw, * just avoid using this as key data if the program doesn't use * fragment.position. * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. */ - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } } + key->nr_color_regions = brw->state.nr_color_regions; + /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 872b1f3ecf..b9b987ea70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -67,18 +67,19 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; + GLuint nr_color_regions:2; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; + GLushort origin_x, origin_y; + GLushort drawable_height; + GLbitfield64 vp_outputs_written; GLuint program_string_id:32; - GLuint origin_x, origin_y; - GLuint drawable_height; - GLuint vp_outputs_written; }; @@ -153,15 +154,16 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS #define BRW_WM_MAX_SUBROUTINE 16 +/* used in masks next to WRITEMASK_*. */ +#define SATURATE (1<<5) /* New opcodes to track internal operations required for WM unit. @@ -200,7 +202,7 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct prog_instruction *prog_instructions; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; @@ -211,7 +213,7 @@ struct brw_wm_compile { struct prog_src_register pixel_w; - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + struct brw_wm_value *vreg; GLuint nr_vreg; struct brw_wm_value creg[BRW_WM_MAX_PARAM]; @@ -228,10 +230,10 @@ struct brw_wm_compile { struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; + struct brw_wm_ref *refs; GLuint nr_refs; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + struct brw_wm_instruction *instruction; GLuint nr_insns; struct brw_wm_constref constref[BRW_WM_MAX_CONST]; @@ -271,6 +273,12 @@ struct brw_wm_compile { }; +/** Bits for prog_instruction::Aux field */ +#define INST_AUX_EOT 0x1 +#define INST_AUX_TARGET(T) (T << 1) +#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1) + + GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); @@ -300,10 +308,141 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +/* brw_wm_emit.c */ +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, GLboolean is_ddx, const struct brw_reg *arg0); +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot); +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask); +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w); +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask); +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow); +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler); +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index bf80a2942a..5390fd2584 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -44,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } + /* Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, @@ -60,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.8 -- ? */ - -static void emit_pixel_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask) { + struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + struct brw_reg dst0_uw, dst1_uw; + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + if (c->dispatch_width == 16) { + dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } else { + dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } + /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, - vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, - vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } - -static void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -118,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_compile *p = &c->func; @@ -146,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } -static void emit_pixel_w( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { + struct brw_compile *p = &c->func; + /* Don't need this if all you are doing is interpolating color, for * instance. */ @@ -165,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p, brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ - brw_math_16( p, dst[3], - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); + if (c->dispatch_width == 16) { + brw_math_16(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } else { + brw_math(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } } - -static void emit_linterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas ) +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -199,12 +218,12 @@ static void emit_linterp( struct brw_compile *p, } -static void emit_pinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -229,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p, } -static void emit_cinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -251,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p, } /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask ) +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; @@ -352,13 +371,13 @@ void emit_ddxy(struct brw_compile *p, brw_set_saturate(p, 0); } -static void emit_alu1( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { GLuint i; @@ -376,15 +395,15 @@ static void emit_alu1( struct brw_compile *p, } -static void emit_alu2( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -402,12 +421,12 @@ static void emit_alu2( struct brw_compile *p, } -static void emit_mad( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -422,26 +441,12 @@ static void emit_mad( struct brw_compile *p, } } -static void emit_trunc( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_RNDZ(p, dst[i], arg0[i]); - } - } -} - -static void emit_lrp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -461,21 +466,24 @@ static void emit_lrp( struct brw_compile *p, } } -static void emit_sop( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_pop_insn_state(p); } } } @@ -559,11 +567,11 @@ static void emit_cmp( struct brw_compile *p, } } -static void emit_max( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -583,11 +591,11 @@ static void emit_max( struct brw_compile *p, } } -static void emit_min( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -608,11 +616,11 @@ static void emit_min( struct brw_compile *p, } -static void emit_dp3( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -630,11 +638,11 @@ static void emit_dp3( struct brw_compile *p, } -static void emit_dp4( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -653,11 +661,11 @@ static void emit_dp4( struct brw_compile *p, } -static void emit_dph( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -676,11 +684,11 @@ static void emit_dph( struct brw_compile *p, } -static void emit_xpd( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -701,41 +709,68 @@ static void emit_xpd( struct brw_compile *p, } -static void emit_math1( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); + /* If compressed, this will write message reg 2,3 from arg0.x's 16 + * channels. + */ brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ - brw_math_16(p, - dst[dst_chan], + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + saturate, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, + saturate, + 3, brw_null_reg(), + BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + } + brw_pop_insn_state(p); } -static void emit_math2( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -746,173 +781,231 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } - - /* Send two messages to perform all 16 operations: - */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + saturate, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 4, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } brw_pop_insn_state(p); } - -static void emit_tex( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow) { struct brw_compile *p = &c->func; - GLuint msgLength, responseLength; - GLuint i, nr; + struct brw_reg dst_retyped; + GLuint cur_mrf = 2, response_length; + GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; + GLuint mrf_per_channel; + GLuint simd_mode; + + if (c->dispatch_width == 16) { + mrf_per_channel = 2; + response_length = 8; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { + mrf_per_channel = 1; + response_length = 4; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } /* How many input regs are there? */ - switch (inst->tex_idx) { + switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; - nr = 1; + nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; - nr = 2; + nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; - nr = 3; + nr_texcoords = 3; break; default: /* unexpected target */ abort(); } - if (inst->tex_shadow) { - nr = 4; - emit |= WRITEMASK_W; - } + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + nr_texcoords = 3; - msgLength = 1; + /* For shadow comparisons, we have to supply u,v,r. */ + if (shadow) + nr_texcoords = 3; - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]); + /* Emit the texcoords. */ + for (i = 0; i < nr_texcoords; i++) { + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else - brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); - msgLength += 2; + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; } - responseLength = 8; /* always */ + /* Fill in the shadow comparison reference value. */ + if (shadow) { + if (BRW_IS_IGDNG(p->brw)) { + /* Fill in the cube map array index value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } else if (c->dispatch_width == 8) { + /* Fill in the LOD bias value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; + } if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (c->dispatch_width == 16 && shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } - brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + brw_SAMPLE(p, + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, - msg_type, - responseLength, - msgLength, - 0, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, + msg_type, + response_length, + cur_mrf - 1, + 0, 1, - BRW_SAMPLER_SIMD_MODE_SIMD16); + simd_mode); } -static void emit_txb( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength; GLuint msg_type; - /* Shadow ignored for txb. + GLuint mrf_per_channel; + GLuint response_length; + struct brw_reg dst_retyped; + + /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased + * samples, so we'll use the 16-wide instruction, leave the second halves + * undefined, and trust the execution mask to keep the undefined pixels + * from mattering. */ - switch (inst->tex_idx) { + if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + mrf_per_channel = 2; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 8; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + mrf_per_channel = 1; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 4; + } + + /* Shadow ignored for txb. */ + switch (tex_idx) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); - msgLength = 9; - - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); + msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, - 8, /* responseLength */ + response_length, msgLength, 0, 1, @@ -920,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c, } -static void emit_lit( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +static void emit_lit(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; + assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { @@ -934,7 +1029,7 @@ static void emit_lit( struct brw_compile *p, } if (mask & WRITEMASK_Z) { - emit_math2(p, BRW_MATH_FUNCTION_POW, + emit_math2(c, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], @@ -1001,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; - + struct brw_reg dst; + + if (c->dispatch_width == 16) + dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ @@ -1018,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, - retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, @@ -1050,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c, * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ -static void emit_fb_write( struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; GLuint nr = 2; GLuint channel; @@ -1069,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c, /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ - { - brw_push_insn_state(p); - - for (channel = 0; channel < 4; channel++) { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) { + /* By setting the high bit of the MRF register number, we indicate + * that we want COMPR4 mode - instead of doing the usual destination + * + 1 for the second half we get destination + 4. + */ + brw_MOV(p, + brw_message_reg(nr + channel + (1 << 7)), + arg0[channel]); + } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); - - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, - brw_message_reg(nr + channel + 4), - sechalf(arg0[channel])); - } - /* skip over the regs populated above: - */ - nr += 8; - - brw_pop_insn_state(p); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + } } + /* skip over the regs populated above: + */ + nr += 8; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { @@ -1142,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1156,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } } - /** * Move a GPR to scratch memory. */ @@ -1294,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: @@ -1306,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_PIXELW: - emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: @@ -1364,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TRUNC: - emit_trunc(p, dst, dst_flags, args[0]); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_LRP: @@ -1391,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Higher math functions: */ case OPCODE_RCP: - emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: @@ -1419,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: - emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: @@ -1463,17 +1571,20 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_LIT: - emit_lit(p, dst, dst_flags, args[0]); + emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit, + inst->tex_shadow); break; case OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 4e3edfbbff..3737faf26f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -138,7 +138,6 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; - reg.pad = 0; return reg; } @@ -181,6 +180,9 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) { + assert(c->nr_fp_insns < BRW_WM_MAX_INSN); + memset(&c->prog_instructions[c->nr_fp_insns], 0, + sizeof(*c->prog_instructions)); return &c->prog_instructions[c->nr_fp_insns++]; } @@ -447,7 +449,6 @@ static void emit_interp( struct brw_wm_compile *c, break; case FRAG_ATTRIB_FACE: - /* XXX review/test this case */ emit_op(c, WM_FRONTFACING, dst_mask(dst, WRITEMASK_X), @@ -956,7 +957,7 @@ static void precalc_txp( struct brw_wm_compile *c, -static void emit_fb_write( struct brw_wm_compile *c ) +static void emit_render_target_writes( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); @@ -964,36 +965,34 @@ static void emit_fb_write( struct brw_wm_compile *c ) GLuint i; struct prog_instruction *inst, *last_inst; - struct brw_context *brw = c->func.brw; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { + if (c->key.nr_color_regions > 1) { + for (i = 0 ; i < c->key.nr_color_regions; i++) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = INST_AUX_TARGET(i); if (c->fp_fragcolor_emitted) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + inst->Aux = INST_AUX_TARGET(i); } } - last_inst->Aux |= 1; //eot + last_inst->Aux |= INST_AUX_EOT; } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) + if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = 1|(0<<1); + inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0); } } @@ -1155,7 +1154,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out->DstReg.WriteMask = 0; break; case OPCODE_END: - emit_fb_write(c); + emit_render_target_writes(c); break; case OPCODE_PRINT: break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index c9fe1dd8ad..e8c2cb66ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -371,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { reg_index += 2; } } @@ -550,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } } - -/** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. - * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). - * - */ -static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ - const int component = GET_SWZ(src->Swizzle, channel); - const GLfloat *param = - c->fp->program.Base.Parameters->ParameterValues[src->Index]; - GLfloat value = param[component]; - if (src->Negate & (1 << channel)) - value = -value; - if (src->Abs) - value = FABSF(value); -#if 0 - printf(" form immed value %f for chan %d\n", value, channel); -#endif - return brw_imm_f(value); - } - else { - return get_src_reg(c, inst, srcRegIndex, channel); - } -} - - /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -650,542 +614,110 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_trunc( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, inst, 0, i); - brw_RNDZ(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - /* XXX some moves from immediate value don't work reliably!!! */ - /*src = get_src_reg_imm(c, inst, 0, i);*/ - src = get_src_reg(c, inst, 0, i); - brw_MOV(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_pixel_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - - struct brw_reg dst0, dst1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - -static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - src0 = get_src_reg(c, inst, 0, 0); - src1 = get_src_reg(c, inst, 0, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } -} - -static void fire_fb_write( struct brw_wm_compile *c, - GLuint base_reg, - GLuint nr, - GLuint target, - GLuint eot) -{ - struct brw_compile *p = &c->func; - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - target, - nr, - 0, - eot); -} - -static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - GLuint target, eot; - struct brw_reg src0; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, inst, 0, channel); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { - src0 = get_src_reg(c, inst, 2, 2); - brw_MOV(p, brw_message_reg(nr), src0); - } - else { - src0 = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; - } - - if (c->key.dest_depth_reg) { - const GLuint comp = c->key.dest_depth_reg / 2; - const GLuint off = c->key.dest_depth_reg % 2; - - if (off != 0) { - /* XXX this code needs review/testing */ - struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); - struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1_1); - brw_pop_insn_state(p); - } - else - { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); - } - nr += 2; - } - - target = inst->Aux >> 1; - eot = inst->Aux & 1; - fire_fb_write(c, 0, nr, target, eot); -} - -static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias3(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3); - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} + struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; + int i, j; + int mark = mark_tmps(c); -static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), delta1); - } + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; + tmp_arg2[j] = arg2[j]; } -} - -static void emit_cinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, suboffset(interp[i],3)); + for (j = 0; j < 4; j++) { + if (arg0[j].file == dst[i].file && + dst[i].nr == arg0[j].nr) { + tmp_arg0[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg0[j], arg0[j]); + } + if (arg1[j].file == dst[i].file && + dst[i].nr == arg1[j].nr) { + tmp_arg1[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg1[j], arg1[j]); + } + if (arg2[j].file == dst[i].file && + dst[i].nr == arg2[j].nr) { + tmp_arg2[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg2[j], arg2[j]); + } + } } } -} - -static void emit_pinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - GLuint nr, i; - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - w = get_src_reg(c, inst, 2, 3); - nr = src0.nr; + func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), - delta1); - brw_MUL(p, dst, dst, w); - } - } + release_tmps(c, mark); } -/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing(struct brw_wm_compile *c, - const struct prog_instruction *inst) +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias2(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { struct brw_compile *p = &c->func; - struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); - struct brw_reg dst; - GLuint mask = inst->DstReg.WriteMask; - int i; + struct brw_reg tmp_arg0[4], tmp_arg1[4]; + int i, j; + int mark = mark_tmps(c); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, brw_imm_f(0.0)); - } + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; } - /* bit 31 is "primitive is back face", so checking < (1 << 31) gives - * us front face - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, brw_imm_f(1.0)); + for (j = 0; j < 4; j++) { + if (arg0[j].file == dst[i].file && + dst[i].nr == arg0[j].nr) { + tmp_arg0[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg0[j], arg0[j]); + } + if (arg1[j].file == dst[i].file && + dst[i].nr == arg1[j].nr) { + tmp_arg1[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg1[j], arg1[j]); + } + } } } - brw_set_predicate_control_flag_value(p, 0xff); -} -static void emit_xpd(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - for (i = 0; i < 4; i++) { - GLuint i2 = (i+2)%3; - GLuint i1 = (i+1)%3; - if (mask & (1<<i)) { - struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i); - src0 = negate(get_src_reg(c, inst, 0, i2)); - src1 = get_src_reg_imm(c, inst, 1, i1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, inst, 0, i1); - src1 = get_src_reg_imm(c, inst, 1, i2); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); -} + func(p, dst, mask, tmp_arg0, tmp_arg1); -static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} - -static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src1[3]); - brw_set_saturate(p, 0); -} - -/** - * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. - * Note that the result of the function is smeared across the dest - * register's X, Y, Z and W channels (subject to writemasking of course). - */ -static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - /* Get first component of source register */ - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg(c, inst, 0, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} - -static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); -} - -static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} - -static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); -} - -static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - -static void emit_add(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_ADD(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); + release_tmps(c, mark); } static void emit_arl(struct brw_wm_compile *c, @@ -1201,180 +733,6 @@ static void emit_arl(struct brw_wm_compile *c, brw_set_saturate(p, 0); } - -static void emit_mul(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_MUL(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_frc(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg_imm(c, inst, 0, i); - brw_FRC(p, dst, src0); - } - } - if (inst->SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg_imm(c, inst, 0, i); - brw_RNDD(p, dst, src0); - } - } - brw_set_saturate(p, 0); -} - - -static void emit_min_max(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - const GLuint mask = inst->DstReg.WriteMask; - const int mark = mark_tmps(c); - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg real_dst = get_dst_reg(c, inst, i); - struct brw_reg src0 = get_src_reg(c, inst, 0, i); - struct brw_reg src1 = get_src_reg(c, inst, 1, i); - struct brw_reg dst; - /* if dst==src0 or dst==src1 we need to use a temp reg */ - GLboolean use_temp = brw_same_reg(dst, src0) || - brw_same_reg(dst, src1); - if (use_temp) - dst = alloc_tmp(c); - else - dst = real_dst; - - /* - printf(" Min/max: dst %d src0 %d src1 %d\n", - dst.nr, src0.nr, src1.nr); - */ - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - if (inst->Opcode == OPCODE_MIN) - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - else - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - if (use_temp) - brw_MOV(p, real_dst, dst); - } - } - brw_pop_insn_state(p); - release_tmps(c, mark); -} - -static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg_imm(c, inst, 0, 0); - src1 = get_src_reg_imm(c, inst, 1, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_lrp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - int mark = mark_tmps(c); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - - src1 = get_src_reg_imm(c, inst, 1, i); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, inst, 2, i); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c, mark); - } -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -1390,89 +748,6 @@ static void emit_kil(struct brw_wm_compile *c) brw_pop_insn_state(p); } -static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - src2 = get_src_reg_imm(c, inst, 2, i); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - -static void emit_sop(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint cond) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - brw_pop_insn_state(p); - } - } -} - -static void emit_slt(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_L); -} - -static void emit_sle(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_LE); -} - -static void emit_sgt(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_G); -} - -static void emit_sge(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_GE); -} - -static void emit_seq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_EQ); -} - -static void emit_sne(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); -} - static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), @@ -2525,196 +1800,6 @@ static void emit_noise4( struct brw_wm_compile *c, release_tmps( c, mark ); } - -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0); - dst[1] = get_dst_reg(c, inst, 1); - - src0[0] = get_src_reg(c, inst, 0, 0); - src0[1] = get_src_reg(c, inst, 0, 1); - - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ - if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); - } - - if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); - } -} - -/* TODO - BIAS on SIMD8 not working yet... - */ -static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint i; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - default: - /* invalid target */ - abort(); - } - brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - - if (BRW_IS_IGDNG(p->brw)) { - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; - } else { - /* Does it work well on SIMD8? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); -} - - -static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint msg_len; - GLuint i, nr; - GLuint emit; - GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; - nr = 3; - break; - default: - /* invalid target */ - abort(); - } - msg_len = 1; - - /* move/load S, T, R coords */ - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); - else - brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); - msg_len += 1; - } - - if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ - brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ - } - - if (BRW_IS_IGDNG(p->brw)) { - if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; - } else { - /* Does it work for shadow on SIMD8 ? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - shadow ? 6 : 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} - /** * Resolve subroutine calls after code emit is done. @@ -2771,6 +1856,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (c->fp->use_const_buffer) fetch_constants(c, inst); + if (inst->Opcode != OPCODE_ARL) { + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + } + for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) + get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else @@ -2782,126 +1882,131 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) switch (inst->Opcode) { case WM_PIXELXY: - emit_pixel_xy(c, inst); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(c, inst); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_PIXELW: - emit_pixel_w(c, inst); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: - emit_linterp(c, inst); + emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: - emit_pinterp(c, inst); + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: - emit_cinterp(c, inst); + emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: - emit_wpos_xy(c, inst); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_FB_WRITE: - emit_fb_write(c, inst); + emit_fb_write(c, args[0], args[1], args[2], + INST_AUX_GET_TARGET(inst->Aux), + inst->Aux & INST_AUX_EOT); break; case WM_FRONTFACING: - emit_frontfacing(c, inst); + emit_frontfacing(p, dst, dst_flags); break; case OPCODE_ADD: - emit_add(c, inst); + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_ARL: emit_arl(c, inst); break; case OPCODE_FRC: - emit_frc(c, inst); + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: - emit_flr(c, inst); + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - emit_lrp(c, inst); + unalias3(c, emit_lrp, + dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: - emit_trunc(c, inst); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_MOV: case OPCODE_SWZ: - emit_mov(c, inst); + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_DP3: - emit_dp3(c, inst); + emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: - emit_dp4(c, inst); + emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: - emit_xpd(c, inst); + emit_xpd(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: - emit_dph(c, inst); + emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_RCP: - emit_rcp(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_rsq(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_sin(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_cos(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_ex2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_lg2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_MIN: + unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + break; case OPCODE_MAX: - emit_min_max(c, inst); + unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: case OPCODE_DDY: - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), args[0]); break; case OPCODE_SLT: - emit_slt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_L, args[0], args[1]); break; case OPCODE_SLE: - emit_sle(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_LE, args[0], args[1]); break; case OPCODE_SGT: - emit_sgt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_G, args[0], args[1]); break; case OPCODE_SGE: - emit_sge(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_GE, args[0], args[1]); break; case OPCODE_SEQ: - emit_seq(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_EQ, args[0], args[1]); break; case OPCODE_SNE: - emit_sne(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_NEQ, args[0], args[1]); break; case OPCODE_MUL: - emit_mul(c, inst); + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_POW: - emit_pow(c, inst); + emit_math2(c, BRW_MATH_FUNCTION_POW, + dst, dst_flags, args[0], args[1]); break; case OPCODE_MAD: - emit_mad(c, inst); + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_NOISE1: emit_noise1(c, inst); @@ -2916,10 +2021,19 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_noise4(c, inst); break; case OPCODE_TEX: - emit_tex(c, inst); + emit_tex(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + inst->TexSrcUnit, + (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); break; case OPCODE_TXB: - emit_txb(c, inst); + emit_txb(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); break; case OPCODE_KIL_NV: emit_kil(c); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 6279258339..ff4c082d5e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -42,12 +42,14 @@ static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) { assert(c->nr_refs < BRW_WM_MAX_REF); + memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); return &c->refs[c->nr_refs++]; } static struct brw_wm_value *get_value( struct brw_wm_compile *c) { assert(c->nr_refs < BRW_WM_MAX_VREG); + memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); return &c->vreg[c->nr_vreg++]; } @@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); + memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); return &c->instruction[c->nr_insns++]; } @@ -322,8 +325,8 @@ translate_insn(struct brw_wm_compile *c, out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; out->tex_shadow = inst->TexShadow; - out->eot = inst->Aux & 1; - out->target = inst->Aux >> 1; + out->eot = inst->Aux & INST_AUX_EOT; + out->target = INST_AUX_GET_TARGET(inst->Aux); /* Args: */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 6faea018fb..31303febf0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<<j)) { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { int fp_index; if (j >= VERT_RESULT_VAR0) diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index dff466587a..aa2e519588 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -66,19 +66,6 @@ static GLuint translate_wrap_mode( GLenum wrap ) } } - -static GLuint U_FIXED(GLfloat value, GLuint frac_bits) -{ - value *= (1<<frac_bits); - return value < 0 ? 0 : value; -} - -static GLint S_FIXED(GLfloat value, GLuint frac_bits) -{ - return value * (1<<frac_bits); -} - - static dri_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { @@ -86,8 +73,8 @@ static dri_bo *upload_default_color( struct brw_context *brw, COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, - NULL, 0 ); + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc), NULL, 0); } @@ -228,8 +215,8 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); + sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6); + sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6); sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 361f91292b..f89ed9bce7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -106,7 +106,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; /* BRW_NEW_DEPTH_BUFFER * Override for NULL depthbuffer case, required by the Pixel Shader Computed * Depth field. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 51539ac1e7..3f9b1fbfdc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -31,7 +31,6 @@ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -70,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, +static GLuint translate_tex_format( gl_format mesa_format, + GLenum internal_format, GLenum depth_mode ) { switch( mesa_format ) { @@ -86,21 +86,22 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, case MESA_FORMAT_AL88: return BRW_SURFACEFORMAT_L8A8_UNORM; + case MESA_FORMAT_AL1616: + return BRW_SURFACEFORMAT_L16A16_UNORM; + case MESA_FORMAT_RGB888: assert(0); /* not supported for sampling */ return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - else - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case MESA_FORMAT_XRGB8888: + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; case MESA_FORMAT_RGBA8888_REV: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - else - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -287,7 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.bo = NULL; key.offset = intelObj->textureOffset; } else { - key.format = firstImage->TexFormat->MesaFormat; + key.format = firstImage->TexFormat; key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; @@ -354,7 +355,10 @@ brw_create_constant_surface( struct brw_context *brw, NULL, NULL); if (key->bo) { - /* Emit relocation to surface contents */ + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_SAMPLER, 0, 0, @@ -527,8 +531,15 @@ brw_update_renderbuffer_surface(struct brw_context *brw, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { + /* XRGB and ARGB are treated the same here because the chips in this + * family cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the alpha + * blending hardware to use GL_ONE (or GL_ZERO) for cases where + * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. + */ case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case MESA_FORMAT_RGB565: @@ -541,8 +552,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); } key.tiling = region->tiling; if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { @@ -564,8 +574,16 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.cpp = 4; key.draw_offset = 0; } + /* _NEW_COLOR */ memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); + + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + key.color_blend = (!ctx->Color._LogicOpEnabled && ctx->Color.BlendEnabled); @@ -660,7 +678,7 @@ brw_wm_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_WM_MAX_SURF]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) @@ -685,8 +703,6 @@ brw_wm_get_binding_table(struct brw_context *brw) brw->wm.surf_bo[i]); } } - - free(data); } return bind_bo; @@ -695,11 +711,10 @@ brw_wm_get_binding_table(struct brw_context *brw) static void prepare_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; GLuint i; int old_nr_surfaces; - /* _NEW_BUFFERS */ + /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { @@ -712,7 +727,7 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS; if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; @@ -724,17 +739,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { - if (texUnit->_Current == intel->frame_buffer_texobj) { - /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[surf]); - brw->wm.nr_surfaces = surf + 1; - } else { - /* regular texture */ - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; } else { dri_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada0..0000000000 --- a/src/mesa/drivers/dri/i965/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c
\ No newline at end of file |