diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
43 files changed, 1205 insertions, 1771 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 6e9a9a29a3..7a55333e89 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,7 +14,6 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ - intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -101,6 +100,5 @@ DRI_LIB_DEPS += -ldrm_intel include ../Makefile.template -symlinks: intel_decode.o: ../intel/intel_decode.c intel_tex_layout.o: ../intel/intel_tex_layout.c diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index c724218cf5..ab301b9a3a 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -34,25 +34,35 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_util.h" +#include "intel_fbo.h" #include "main/macros.h" #include "main/enums.h" static void prepare_cc_vp( struct brw_context *brw ) { + GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; memset(&ccv, 0, sizeof(ccv)); - ccv.min_depth = 0.0; - ccv.max_depth = 1.0; + /* _NEW_TRANSOFORM */ + if (ctx->Transform.DepthClamp) { + /* _NEW_VIEWPORT */ + ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + } else { + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + } dri_bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv), + NULL, 0); } const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = 0, + .mesa = _NEW_VIEWPORT | _NEW_TRANSFORM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -80,6 +90,28 @@ struct brw_cc_unit_key { GLenum depth_func; }; +/** + * Modify blend function to force destination alpha to 1.0 + * + * If \c function specifies a blend function that uses destination alpha, + * replace it with a function that hard-wires destination alpha to 1.0. This + * is used when rendering to xRGB targets. + */ +static GLenum +fix_xRGB_alpha(GLenum function) +{ + switch (function) { + case GL_DST_ALPHA: + return GL_ONE; + + case GL_ONE_MINUS_DST_ALPHA: + case GL_SRC_ALPHA_SATURATE: + return GL_ZERO; + } + + return function; +} + static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { @@ -123,6 +155,17 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) key->blend_dst_rgb = ctx->Color.BlendDstRGB; key->blend_src_a = ctx->Color.BlendSrcA; key->blend_dst_a = ctx->Color.BlendDstA; + + /* If the renderbuffer is XRGB, we have to frob the blend function to + * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA + * with GL_ONE and GL_ONE_MINUS_DST_ALPAH with GL_ZERO. + */ + if (ctx->Visual.alphaBits == 0) { + key->blend_src_rgb = fix_xRGB_alpha(key->blend_src_rgb); + key->blend_src_a = fix_xRGB_alpha(key->blend_src_a); + key->blend_dst_rgb = fix_xRGB_alpha(key->blend_dst_rgb); + key->blend_dst_a = fix_xRGB_alpha(key->blend_dst_a); + } } key->alpha_enabled = ctx->Color.AlphaEnabled; diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index f45dcf8282..dbd10a5297 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw, delta = REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<<i)) { + if (c.key.attrs & BITFIELD64_BIT(i)) { c.offset[i] = delta; delta += ATTR_SIZE; } diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index dc550ac793..1c6825510a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 5762c9577c..234b3744bf 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -43,11 +43,14 @@ struct brw_clip_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; }; static void clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) { + GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ @@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) /* BRW_NEW_URB_FENCE */ key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; + + /* _NEW_TRANSOFORM */ + key->depth_clamp = ctx->Transform.DepthClamp; } static dri_bo * @@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw, clip.clip5.userclip_enable_flags = 0x7f; clip.clip5.userclip_must_clip = 1; clip.clip5.guard_band_enable = 0; - clip.clip5.viewport_z_clip_enable = 1; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; clip.clip5.viewport_xy_clip_enable = 1; clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; clip.clip5.api_mode = BRW_CLIP_API_OGL; @@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = 0, + .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index c300c33adc..48685c087b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -105,6 +105,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS; ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index da0e091bfd..fded47aa2f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -231,7 +231,7 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint outputs_written; + GLbitfield64 outputs_written; GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -252,20 +252,23 @@ struct brw_vs_ouput_sizes { /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + /** * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) +#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) /** * Size of surface binding table for the VS. @@ -317,7 +320,6 @@ struct brw_cache_item { GLuint nr_reloc_bufs; dri_bo *bo; - GLuint data_size; struct brw_cache_item *next; }; @@ -330,7 +332,6 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; - GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; @@ -410,23 +411,6 @@ struct brw_vertex_info { GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; - - - -/* Cache for TNL programs. - */ -struct brw_tnl_cache_item { - GLuint hash; - void *key; - void *data; - struct brw_tnl_cache_item *next; -}; - -struct brw_tnl_cache { - struct brw_tnl_cache_item **items; - GLuint size, n_items; -}; - struct brw_query_object { struct gl_query_object Base; @@ -704,10 +688,6 @@ void brw_debug_batch(struct intel_context *intel); /*====================================================================== * brw_tex.c */ -void brwUpdateTextureState( struct intel_context *intel ); -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ); -void brw_FrameBufferTexDestroy( struct brw_context *brw ); void brw_validate_textures( struct brw_context *brw ); @@ -762,9 +742,5 @@ brw_fragment_program_const(const struct gl_fragment_program *p) return (const struct brw_fragment_program *) p; } - - -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) - #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 4be6c77aa1..aadcfbe2da 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -130,7 +130,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT, .cache = CACHE_NEW_WM_PROG }, .prepare = calculate_curbe_offsets diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 78d457ad2b..c19510bbd4 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -673,18 +673,10 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 -#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 -#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG 3 /* for IGDNG only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index c53bd47bb5..8bcb6083f7 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -25,13 +25,15 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" #include "main/context.h" #include "main/state.h" -#include "main/api_validate.h" #include "main/enums.h" +#include "tnl/tnl.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" #include "brw_draw.h" #include "brw_defines.h" @@ -42,11 +44,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "tnl/tnl.h" -#include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" - #define FILE_DEBUG_FLAG DEBUG_BATCH static GLuint prim_to_hw_prim[GL_POLYGON+1] = { @@ -145,7 +142,7 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; + prim_packet.base_vert_location = prim->basevertex; /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; @@ -156,18 +153,14 @@ static void brw_emit_prim(struct brw_context *brw, * the besides the draw code. */ if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); - ADVANCE_BATCH(); + intel_batchbuffer_emit_mi_flush(intel->batch); } brw->no_batch_wrap = GL_FALSE; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 2b1347b698..271a88dae0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include <stdlib.h> #include "main/glheader.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/state.h" #include "main/api_validate.h" @@ -384,7 +384,7 @@ static void brw_prepare_vertices(struct brw_context *brw) input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - if (input->glarray->BufferObj->Name != 0) { + if (_mesa_is_bufferobj(input->glarray->BufferObj)) { struct intel_buffer_object *intel_buffer = intel_buffer_object(input->glarray->BufferObj); @@ -616,7 +616,7 @@ static void brw_prepare_indices(struct brw_context *brw) /* Turn into a proper VBO: */ - if (!bufferobj->Name) { + if (!_mesa_is_bufferobj(bufferobj)) { brw->ib.start_vertex_offset = 0; /* Get new bufferobj, offset: diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 30603bdd0e..39eb88d7c2 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -170,11 +170,11 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; - if (type == BRW_GENERAL_REGISTER_FILE) + if (file == BRW_GENERAL_REGISTER_FILE) assert(nr < BRW_MAX_GRF); - else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < BRW_MAX_MRF); - else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + else if (file == BRW_MESSAGE_REGISTER_FILE) + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); + else if (file == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); reg.type = type; @@ -538,7 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { - assert(nr < BRW_MAX_MRF); + assert((nr & ~(1 << 7)) < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 241cdc33f8..7ceabba288 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -55,7 +55,8 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { - if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && + dest.file != BRW_MESSAGE_REGISTER_FILE) assert(dest.nr < 128); insn->bits1.da1.dest_reg_file = dest.file; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index e0cf07256b..010c1c2352 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -40,7 +40,7 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:4; GLuint hint_gs_always:1; GLuint pv_first:1; diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index a761c03153..ed9d2ffe60 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.nr_urb_entries = key->nr_urb_entries; gs.thread4.urb_entry_allocation_size = key->urb_size - 1; - gs.thread4.max_threads = 0; /* Hardware requirement */ + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index ea71857548..4b0d598336 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -66,7 +66,7 @@ static void upload_blend_constant_color(struct brw_context *brw) const struct brw_tracked_state brw_blend_constant_color = { .dirty = { .mesa = _NEW_COLOR, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_blend_constant_color @@ -93,7 +93,7 @@ static void upload_drawing_rect(struct brw_context *brw) const struct brw_tracked_state brw_drawing_rect = { .dirty = { .mesa = _NEW_BUFFERS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_drawing_rect @@ -317,7 +317,7 @@ static void upload_polygon_stipple(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple = { .dirty = { .mesa = _NEW_POLYGONSTIPPLE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple @@ -362,7 +362,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { .mesa = _NEW_WINDOW_POS, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple_offset @@ -425,7 +425,7 @@ static void upload_line_stipple(struct brw_context *brw) const struct brw_tracked_state brw_line_stipple = { .dirty = { .mesa = _NEW_LINE, - .brw = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_line_stipple diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index e1c2c7777b..968890f7fb 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -61,7 +61,7 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.nr_attrs = brw_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = brw_count_bits(c.key.attrs); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; @@ -70,7 +70,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<<i)) { + if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { @@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw) * edgeflag testing here, it is already done in the clip * program. */ - if (key.attrs & (1<<VERT_RESULT_EDGE)) + if (key.attrs & BITFIELD64_BIT(VERT_RESULT_EDGE)) key.primitive = SF_UNFILLED_TRIS; else key.primitive = SF_TRIANGLES; @@ -161,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; - key.SpriteOrigin = ctx->Point.SpriteOrigin; + key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 6426b6df9f..0ba731fac9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -45,19 +45,19 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { - GLuint attrs:32; + GLbitfield64 attrs; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint linear_color:1; /**< linear interp vs. perspective interp */ - GLuint pad:25; - GLenum SpriteOrigin; + GLuint sprite_origin_lower_left:1; + GLuint pad:24; }; struct brw_sf_point_tex { - GLboolean CoordReplace; + GLboolean CoordReplace; }; struct brw_sf_compile { diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index ca8f97f9f9..3eae41ee74 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -56,7 +56,7 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c, static GLboolean have_attr(struct brw_sf_compile *c, GLuint attr) { - return (c->key.attrs & (1<<attr)) ? 1 : 0; + return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; } /*********************************************************************** @@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * Flat shading */ -#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \ - (1<<VERT_RESULT_COL1)) +#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \ + BITFIELD64_BIT(VERT_RESULT_COL1)) static void copy_colors( struct brw_sf_compile *c, struct brw_reg dst, @@ -312,8 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask; - GLuint linear_mask; + GLbitfield64 persp_mask; + GLbitfield64 linear_mask; if (c->key.do_flat_shading || c->key.linear_color) persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | @@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } @@ -551,7 +551,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); @@ -570,7 +570,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { brw_set_predicate_control_flag_value(p, pc); if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 79f37097d3..bb69435ec0 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -93,7 +93,8 @@ static void upload_sf_vp(struct brw_context *brw) } dri_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), + NULL, 0); } const struct brw_tracked_state brw_sf_vp = { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index d639656b9d..b129b1f1c3 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -112,6 +112,7 @@ void brw_validate_state(struct brw_context *brw); void brw_upload_state(struct brw_context *brw); void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); +void brw_clear_validated_bos(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c @@ -119,16 +120,10 @@ void brw_destroy_state(struct brw_context *brw); dri_bo *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, + GLuint size, dri_bo **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs); - dri_bo *brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index f8e46aacf7..e4c9ba7d87 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -245,7 +245,6 @@ brw_upload_cache( struct brw_cache *cache, item->bo = bo; dri_bo_reference(bo); - item->data_size = data_size; if (cache->n_items > cache->size * 1.5) rehash(cache); @@ -275,15 +274,22 @@ brw_upload_cache( struct brw_cache *cache, /** - * This doesn't really work with aux data. Use search/upload instead + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + * + * If aux data is involved, use search/upload instead. + */ dri_bo * -brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { dri_bo *bo; struct brw_cache_item *item; @@ -306,25 +312,6 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } - -/** - * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. - * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be - * better to use, as the potentially changing offsets in the data-used-as-key - * will result in excessive cache misses. - */ -dri_bo * -brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - dri_bo **reloc_bufs, - GLuint nr_reloc_bufs) -{ - return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], - reloc_bufs, nr_reloc_bufs); -} - enum pool_type { DW_SURFACE_STATE, DW_GENERAL_STATE @@ -335,11 +322,9 @@ static void brw_init_cache_id(struct brw_cache *cache, const char *name, enum brw_cache_id id, - GLuint key_size, GLuint aux_size) { cache->name[id] = strdup(name); - cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } @@ -359,91 +344,76 @@ brw_init_non_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, - sizeof(struct brw_cc_viewport), 0); brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), 0); brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), 0); brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, - 0, /* variable key/data size */ 0); brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), 0); brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, - sizeof(struct brw_sf_viewport), 0); brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), 0); brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), 0); brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), 0); brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), 0); brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); } @@ -463,13 +433,11 @@ brw_init_surface_cache(struct brw_context *brw) brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, - sizeof(struct brw_surface_state), 0); brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, - 0, 0); } @@ -534,14 +502,9 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { struct brw_cache_item *c = *prev; - int j; - - for (j = 0; j < c->nr_reloc_bufs; j++) { - if (c->reloc_bufs[j] == bo) - break; - } - if (j != c->nr_reloc_bufs) { + if (drm_intel_bo_references(c->bo, bo)) { + int j; *prev = c->next; @@ -551,17 +514,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) free((void *)c->key); free(c); cache->n_items--; - - /* Delete up the tree. Notably we're trying to get from - * a request to delete the surface, to deleting the surface state - * object, to deleting the binding table. We're slack and restart - * the deletion process when we do this because the other delete - * may kill our *prev. - */ - brw_state_cache_bo_delete(cache, c->bo); - prev = &cache->items[i]; } else { - prev = &(*prev)->next; + prev = &c->next; } } } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index ee447afa62..af8dfb4c15 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "intel_buffers.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -142,7 +143,7 @@ static void xor_states( struct brw_state_flags *result, result->cache = a->cache ^ b->cache; } -static void +void brw_clear_validated_bos(struct brw_context *brw) { int i; @@ -324,6 +325,8 @@ void brw_validate_state( struct brw_context *brw ) } } + intel_check_front_buffer_rendering(intel); + /* Make sure that the textures which are referenced by the current * brw fragment program are actually present/valid. * If this fails, we can experience GPU lock-ups. diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c index 71bff166dd..e911b105b2 100644 --- a/src/mesa/drivers/dri/i965/brw_tex.c +++ b/src/mesa/drivers/dri/i965/brw_tex.c @@ -39,38 +39,6 @@ #include "intel_tex.h" #include "brw_context.h" - -void brw_FrameBufferTexInit( struct brw_context *brw, - struct intel_region *region ) -{ - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - struct gl_texture_object *obj; - struct gl_texture_image *img; - - intel->frame_buffer_texobj = obj = - ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D ); - - obj->MinFilter = GL_NEAREST; - obj->MagFilter = GL_NEAREST; - - img = ctx->Driver.NewTextureImage( ctx ); - - _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img, - region->pitch, region->height, 1, 0, - region->cpp == 4 ? GL_RGBA : GL_RGB ); - - _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img ); -} - -void brw_FrameBufferTexDestroy( struct brw_context *brw ) -{ - if (brw->intel.frame_buffer_texobj != NULL) - brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx, - brw->intel.frame_buffer_texobj ); - brw->intel.frame_buffer_texobj = NULL; -} - /** * Finalizes all textures, completing any rendering that needs to be done * to prepare them. diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index ce21aa4869..bba9249d1b 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -35,7 +35,7 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) +GLuint brw_count_bits(uint64_t val) { GLuint i; for (i = 0; val ; val >>= 1) diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 33e7cd87e4..04f3175d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,7 +35,7 @@ #include "main/mtypes.h" -extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_count_bits(uint64_t val); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index f0c79efbd9..fd055e225e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE); c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a4f34660de..00efd3443d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -147,7 +147,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { @@ -1124,7 +1124,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); @@ -1134,7 +1134,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1210,7 +1210,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ eot, /* eot */ - 1, /* writes complete */ + eot, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); @@ -1224,7 +1224,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ GLuint i, mrf = 0; for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { /* move from GRF to MRF */ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); mrf++; @@ -1271,10 +1271,60 @@ post_vs_emit( struct brw_vs_compile *c, } } +static GLboolean +accumulator_contains(struct brw_vs_compile *c, struct brw_reg val) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1]; + + if (p->nr_insn == 0) + return GL_FALSE; + + if (val.address_mode != BRW_ADDRESS_DIRECT) + return GL_FALSE; + + switch (prev_insn->header.opcode) { + case BRW_OPCODE_MOV: + case BRW_OPCODE_MAC: + case BRW_OPCODE_MUL: + if (prev_insn->header.access_mode == BRW_ALIGN_16 && + prev_insn->header.execution_size == val.width && + prev_insn->bits1.da1.dest_reg_file == val.file && + prev_insn->bits1.da1.dest_reg_type == val.type && + prev_insn->bits1.da1.dest_address_mode == val.address_mode && + prev_insn->bits1.da1.dest_reg_nr == val.nr && + prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 && + prev_insn->bits1.da16.dest_writemask == 0xf) + return GL_TRUE; + else + return GL_FALSE; + default: + return GL_FALSE; + } +} + static uint32_t -get_predicate(uint32_t swizzle) +get_predicate(const struct prog_instruction *inst) { - switch (swizzle) { + if (inst->DstReg.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ + assert(inst->DstReg.CondMask == COND_NE); + + switch (inst->DstReg.CondSwizzle) { case SWIZZLE_XXXX: return BRW_PREDICATE_ALIGN16_REPLICATE_X; case SWIZZLE_YYYY: @@ -1284,7 +1334,8 @@ get_predicate(uint32_t swizzle) case SWIZZLE_WWWW: return BRW_PREDICATE_ALIGN16_REPLICATE_W; default: - _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle); + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", + inst->DstReg.CondMask); return BRW_PREDICATE_NORMAL; } } @@ -1296,6 +1347,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; @@ -1429,7 +1481,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; case OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); + if (!accumulator_contains(c, args[2])) + brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; case OPCODE_MAX: @@ -1494,8 +1547,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) case OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); - if_inst[if_depth]->header.predicate_control = - get_predicate(inst->DstReg.CondSwizzle); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); if_depth++; break; case OPCODE_ELSE: @@ -1505,45 +1558,48 @@ void brw_vs_emit(struct brw_vs_compile *c ) assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; -#if 0 case OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; + GLuint br = 1; + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; -#else - (void) loop_inst; - (void) loop_depth; -#endif case OPCODE_BRA: - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index d790ab6555..7285466645 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -109,10 +109,39 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - if (BRW_IS_IGDNG(brw)) - vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; - else - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + case 64: + case 96: + case 128: + case 168: + case 192: + case 224: + case 256: + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + break; + default: + assert(0); + } + } else { + switch (key->nr_urb_entries) { + case 8: + case 12: + case 16: + case 32: + break; + case 64: + assert(BRW_IS_G4X(brw)); + break; + default: + assert(0); + } + vs.thread4.nr_urb_entries = key->nr_urb_entries; + } vs.thread4.urb_entry_allocation_size = key->urb_size - 1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 746d097d23..3bc9840a97 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -30,7 +30,6 @@ */ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -69,10 +68,13 @@ brw_vs_update_constant_buffer(struct brw_context *brw) */ _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters); + intel_bo_map_gtt_preferred(intel, const_buffer, GL_TRUE); for (i = 0; i < params->NumParameters; i++) { - dri_bo_subdata(const_buffer, i * 4 * sizeof(float), 4 * sizeof(float), - params->ParameterValues[i]); + memcpy(const_buffer->virtual + i * 4 * sizeof(float), + params->ParameterValues[i], + 4 * sizeof(float)); } + intel_bo_unmap_gtt_preferred(intel, const_buffer); return const_buffer; } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ac11790151..34aaea3736 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -46,7 +46,7 @@ #include "brw_state.h" #include "brw_fallback.h" #include "brw_vs.h" - +#include "brw_wm.h" static void dri_bo_release(dri_bo **bo) @@ -66,10 +66,14 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); - - _mesa_free(brw->wm.compile_data); - - brw_FrameBufferTexDestroy( brw ); + brw_clear_validated_bos(brw); + if (brw->wm.compile_data) { + _mesa_free(brw->wm.compile_data->instruction); + _mesa_free(brw->wm.compile_data->vreg); + _mesa_free(brw->wm.compile_data->refs); + _mesa_free(brw->wm.compile_data->prog_instructions); + _mesa_free(brw->wm.compile_data); + } for (i = 0; i < brw->state.nr_color_regions; i++) intel_region_release(&brw->state.color_regions[i]); @@ -177,20 +181,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } -/* called from intelWaitForIdle() and intelFlush() - * - * For now, just flush everything. Could be smarter later. - */ -static GLuint brw_flush_cmd( void ) -{ - struct brw_mi_flush flush; - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_STATE_CACHE; - return *(GLuint *)&flush; -} - - static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) { /* nothing */ @@ -211,6 +201,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; - brw->intel.vtbl.flush_cmd = brw_flush_cmd; brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 2292de94c4..6895f64410 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -29,7 +29,6 @@ * Keith Whitwell <keith@tungstengraphics.com> */ -#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" @@ -153,8 +152,21 @@ static void do_wm_prog( struct brw_context *brw, */ return; } + c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + sizeof(*c->prog_instructions)); + c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); } else { + void *instruction = c->instruction; + void *prog_instructions = c->prog_instructions; + void *vreg = c->vreg; + void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); + c->instruction = instruction; + c->prog_instructions = prog_instructions; + c->vreg = vreg; + c->refs = refs; } memcpy(&c->key, key, sizeof(*key)); @@ -218,7 +230,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -288,7 +300,7 @@ static void brw_wm_populate_key( struct brw_context *brw, const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; - if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) + if (img->TexFormat == MESA_FORMAT_YCBCR) key->yuvtex_swap_mask |= 1 << i; } @@ -309,6 +321,9 @@ static void brw_wm_populate_key( struct brw_context *brw, * from the incoming screen origin relative position we get as part of our * payload. * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * * We could avoid recompiling by including this as a constant referenced by * our program, but if we were to do that it would also be nice to handle * getting that constant updated at batchbuffer submit time (when we @@ -317,17 +332,21 @@ static void brw_wm_populate_key( struct brw_context *brw, * just avoid using this as key data if the program doesn't use * fragment.position. * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. */ - if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } } + key->nr_color_regions = brw->state.nr_color_regions; + /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index ae98b5492d..9dcb6e14bb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -38,6 +38,8 @@ #include "brw_context.h" #include "brw_eu.h" +#define SATURATE (1<<5) + /* A big lookup table is used to figure out which and how many * additional regs will inserted before the main payload in the WM * program execution. These mainly relate to depth and stencil @@ -65,18 +67,19 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; + GLuint nr_color_regions:2; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; - GLuint drawable_height; - GLuint vp_outputs_written; + GLushort origin_x, origin_y; + GLushort drawable_height; + GLbitfield64 vp_outputs_written; }; @@ -151,15 +154,16 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS #define BRW_WM_MAX_SUBROUTINE 16 +/* used in masks next to WRITEMASK_*. */ +#define SATURATE (1<<5) /* New opcodes to track internal operations required for WM unit. @@ -198,19 +202,18 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct prog_instruction *prog_instructions; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - GLuint fp_deriv_emitted; struct prog_src_register pixel_xy; struct prog_src_register delta_xy; struct prog_src_register pixel_w; - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + struct brw_wm_value *vreg; GLuint nr_vreg; struct brw_wm_value creg[BRW_WM_MAX_PARAM]; @@ -227,10 +230,10 @@ struct brw_wm_compile { struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; + struct brw_wm_ref *refs; GLuint nr_refs; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + struct brw_wm_instruction *instruction; GLuint nr_insns; struct brw_wm_constref constref[BRW_WM_MAX_CONST]; @@ -270,6 +273,12 @@ struct brw_wm_compile { }; +/** Bits for prog_instruction::Aux field */ +#define INST_AUX_EOT 0x1 +#define INST_AUX_TARGET(T) (T << 1) +#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1) + + GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); @@ -299,5 +308,141 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +/* brw_wm_emit.c */ +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot); +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask); +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w); +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask); +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow); +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler); +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9b1f54414b..5390fd2584 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,8 +34,6 @@ #include "brw_context.h" #include "brw_wm.h" -#define SATURATE (1<<5) - /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -46,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } + /* Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, @@ -62,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.8 -- ? */ - -static void emit_pixel_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask) { + struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + struct brw_reg dst0_uw, dst1_uw; + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + if (c->dispatch_width == 16) { + dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } else { + dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } + /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, - vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, - vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } - -static void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -120,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_compile *p = &c->func; @@ -148,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } -static void emit_pixel_w( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { + struct brw_compile *p = &c->func; + /* Don't need this if all you are doing is interpolating color, for * instance. */ @@ -167,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p, brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ - brw_math_16( p, dst[3], - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); + if (c->dispatch_width == 16) { + brw_math_16(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } else { + brw_math(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } } - -static void emit_linterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas ) +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -201,12 +218,12 @@ static void emit_linterp( struct brw_compile *p, } -static void emit_pinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -231,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p, } -static void emit_cinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -253,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p, } /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask ) +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; @@ -281,13 +298,86 @@ static void emit_frontfacing( struct brw_compile *p, brw_set_predicate_control_flag_value(p, 0xff); } -static void emit_alu1( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input + * looking like: + * + * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * + * and we're trying to produce: + * + * DDX DDY + * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) + * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) + * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) + * (ss0.br - ss0.bl) (ss0.tr - ss0.br) + * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) + * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) + * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) + * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * + * and add another set of two more subspans if in 16-pixel dispatch mode. + * + * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result + * for each pair, and vertstride = 2 jumps us 2 elements after processing a + * pair. But for DDY, it's harder, as we want to produce the pairs swizzled + * between each other. We could probably do it like ddx and swizzle the right + * order later, but bail for now and just produce + * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + */ +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0) +{ + int i; + struct brw_reg src0, src1; + + if (mask & SATURATE) + brw_set_saturate(p, 1); + for (i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + if (is_ddx) { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 1, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } else { + src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + } + brw_ADD(p, dst[i], src0, negate(src1)); + } + } + if (mask & SATURATE) + brw_set_saturate(p, 0); +} + +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { GLuint i; @@ -305,15 +395,15 @@ static void emit_alu1( struct brw_compile *p, } -static void emit_alu2( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -331,12 +421,12 @@ static void emit_alu2( struct brw_compile *p, } -static void emit_mad( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -351,26 +441,12 @@ static void emit_mad( struct brw_compile *p, } } -static void emit_trunc( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) -{ - GLuint i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - brw_RNDZ(p, dst[i], arg0[i]); - } - } -} - -static void emit_lrp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; @@ -390,21 +466,24 @@ static void emit_lrp( struct brw_compile *p, } } -static void emit_sop( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_MOV(p, dst[i], brw_imm_f(0)); + brw_push_insn_state(p); brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst[i], brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_MOV(p, dst[i], brw_imm_f(1.0)); - brw_set_predicate_control_flag_value(p, 0xff); + brw_pop_insn_state(p); } } } @@ -488,11 +567,11 @@ static void emit_cmp( struct brw_compile *p, } } -static void emit_max( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -512,11 +591,11 @@ static void emit_max( struct brw_compile *p, } } -static void emit_min( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -537,11 +616,11 @@ static void emit_min( struct brw_compile *p, } -static void emit_dp3( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -559,11 +638,11 @@ static void emit_dp3( struct brw_compile *p, } -static void emit_dp4( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -582,11 +661,11 @@ static void emit_dp4( struct brw_compile *p, } -static void emit_dph( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -605,11 +684,11 @@ static void emit_dph( struct brw_compile *p, } -static void emit_xpd( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -630,41 +709,68 @@ static void emit_xpd( struct brw_compile *p, } -static void emit_math1( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); + /* If compressed, this will write message reg 2,3 from arg0.x's 16 + * channels. + */ brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ - brw_math_16(p, - dst[dst_chan], + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + saturate, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, + saturate, + 3, brw_null_reg(), + BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + } + brw_pop_insn_state(p); } -static void emit_math2( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -675,183 +781,231 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } - - /* Send two messages to perform all 16 operations: - */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + saturate, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 4, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } brw_pop_insn_state(p); } - -static void emit_tex( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow) { struct brw_compile *p = &c->func; - GLuint msgLength, responseLength; - GLuint i, nr; + struct brw_reg dst_retyped; + GLuint cur_mrf = 2, response_length; + GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; + GLuint mrf_per_channel; + GLuint simd_mode; + + if (c->dispatch_width == 16) { + mrf_per_channel = 2; + response_length = 8; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { + mrf_per_channel = 1; + response_length = 4; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } /* How many input regs are there? */ - switch (inst->tex_idx) { + switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; - nr = 1; + nr_texcoords = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: emit = WRITEMASK_XY; - nr = 2; + nr_texcoords = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: emit = WRITEMASK_XYZ; - nr = 3; + nr_texcoords = 3; break; default: /* unexpected target */ abort(); } - /* For shadow comparisons, we have to supply u,v,r. */ - if (inst->tex_shadow) - nr = 3; + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + nr_texcoords = 3; - msgLength = 1; + /* For shadow comparisons, we have to supply u,v,r. */ + if (shadow) + nr_texcoords = 3; - for (i = 0; i < nr; i++) { + /* Emit the texcoords. */ + for (i = 0; i < nr_texcoords; i++) { if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msgLength+1), arg[i]); + brw_MOV(p, brw_message_reg(cur_mrf), arg[i]); else - brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); - msgLength += 2; - } - - /* Fill in the cube map array index value. */ - if (BRW_IS_IGDNG(p->brw) && inst->tex_shadow) { - brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0)); - msgLength += 2; + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; } /* Fill in the shadow comparison reference value. */ - if (inst->tex_shadow) { - brw_MOV(p, brw_message_reg(msgLength+1), arg[2]); - msgLength += 2; + if (shadow) { + if (BRW_IS_IGDNG(p->brw)) { + /* Fill in the cube map array index value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } else if (c->dispatch_width == 8) { + /* Fill in the LOD bias value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; + } + brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); + cur_mrf += mrf_per_channel; } - responseLength = 8; /* always */ - if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (c->dispatch_width == 16 && shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } - brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + brw_SAMPLE(p, + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, - msg_type, - responseLength, - msgLength, - 0, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, + msg_type, + response_length, + cur_mrf - 1, + 0, 1, - BRW_SAMPLER_SIMD_MODE_SIMD16); + simd_mode); } -static void emit_txb( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength; GLuint msg_type; - /* Shadow ignored for txb. + GLuint mrf_per_channel; + GLuint response_length; + struct brw_reg dst_retyped; + + /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased + * samples, so we'll use the 16-wide instruction, leave the second halves + * undefined, and trust the execution mask to keep the undefined pixels + * from mattering. */ - switch (inst->tex_idx) { + if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + mrf_per_channel = 2; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 8; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + mrf_per_channel = 1; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 4; + } + + /* Shadow ignored for txb. */ + switch (tex_idx) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); - msgLength = 9; - - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); + msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, - 8, /* responseLength */ + response_length, msgLength, 0, 1, @@ -859,11 +1013,13 @@ static void emit_txb( struct brw_wm_compile *c, } -static void emit_lit( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +static void emit_lit(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; + assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { @@ -873,7 +1029,7 @@ static void emit_lit( struct brw_compile *p, } if (mask & WRITEMASK_Z) { - emit_math2(p, BRW_MATH_FUNCTION_POW, + emit_math2(c, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], @@ -918,6 +1074,20 @@ static void emit_kil( struct brw_wm_compile *c, } } +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, @@ -926,7 +1096,13 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; - + struct brw_reg dst; + + if (c->dispatch_width == 16) + dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ @@ -943,7 +1119,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, - retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, @@ -975,14 +1151,15 @@ static void emit_aa( struct brw_wm_compile *c, * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ -static void emit_fb_write( struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; GLuint nr = 2; GLuint channel; @@ -994,30 +1171,37 @@ static void emit_fb_write( struct brw_wm_compile *c, /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ - { - brw_push_insn_state(p); - - for (channel = 0; channel < 4; channel++) { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) { + /* By setting the high bit of the MRF register number, we indicate + * that we want COMPR4 mode - instead of doing the usual destination + * + 1 for the second half we get destination + 4. + */ + brw_MOV(p, + brw_message_reg(nr + channel + (1 << 7)), + arg0[channel]); + } else { /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]); - - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, - brw_message_reg(nr + channel + 4), - sechalf(arg0[channel])); - } - /* skip over the regs populated above: - */ - nr += 8; - - brw_pop_insn_state(p); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + } } + /* skip over the regs populated above: + */ + nr += 8; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { @@ -1067,7 +1251,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1081,7 +1265,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } } - /** * Move a GPR to scratch memory. */ @@ -1219,7 +1402,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: @@ -1231,7 +1414,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_PIXELW: - emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: @@ -1268,6 +1451,14 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + case OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; @@ -1281,7 +1472,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TRUNC: - emit_trunc(p, dst, dst_flags, args[0]); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_LRP: @@ -1308,27 +1499,27 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Higher math functions: */ case OPCODE_RCP: - emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: @@ -1336,13 +1527,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: - emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: @@ -1380,23 +1571,30 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_LIT: - emit_lit(p, dst, dst_flags, args[0]); + emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: */ case OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit, + inst->tex_shadow); break; case OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: emit_kil(c, args[0]); break; + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + default: _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 123fe841c3..7d03179588 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -181,6 +181,9 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) { + assert(c->nr_fp_insns < BRW_WM_MAX_INSN); + memset(&c->prog_instructions[c->nr_fp_insns], 0, + sizeof(*c->prog_instructions)); return &c->prog_instructions[c->nr_fp_insns++]; } @@ -447,7 +450,6 @@ static void emit_interp( struct brw_wm_compile *c, break; case FRAG_ATTRIB_FACE: - /* XXX review/test this case */ emit_op(c, WM_FRONTFACING, dst_mask(dst, WRITEMASK_X), @@ -494,38 +496,6 @@ static void emit_interp( struct brw_wm_compile *c, c->fp_interp_emitted |= 1<<idx; } -static void emit_ddx( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDX, - inst->DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - -static void emit_ddy( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - GLuint idx = inst->SrcReg[0].Index; - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - - c->fp_deriv_emitted |= 1<<idx; - emit_op(c, - OPCODE_DDY, - inst->DstReg, - 0, - interp, - get_pixel_w(c), - src_undef()); -} - /*********************************************************************** * Hacks to extend the program parameter and constant lists. */ @@ -988,7 +958,7 @@ static void precalc_txp( struct brw_wm_compile *c, -static void emit_fb_write( struct brw_wm_compile *c ) +static void emit_render_target_writes( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); @@ -996,36 +966,34 @@ static void emit_fb_write( struct brw_wm_compile *c ) GLuint i; struct prog_instruction *inst, *last_inst; - struct brw_context *brw = c->func.brw; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { + if (c->key.nr_color_regions > 1) { + for (i = 0 ; i < c->key.nr_color_regions; i++) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = INST_AUX_TARGET(i); if (c->fp_fragcolor_emitted) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); + inst->Aux = INST_AUX_TARGET(i); } } - last_inst->Aux |= 1; //eot + last_inst->Aux |= INST_AUX_EOT; } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) + if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = 1|(0<<1); + inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0); } } @@ -1186,14 +1154,8 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) */ out->DstReg.WriteMask = 0; break; - case OPCODE_DDX: - emit_ddx(c, inst); - break; - case OPCODE_DDY: - emit_ddy(c, inst); - break; case OPCODE_END: - emit_fb_write(c); + emit_render_target_writes(c); break; case OPCODE_PRINT: break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 7c210abbce..e8c2cb66ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; + for (i = 0; i < fp->Base.NumInstructions; i++) { const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { @@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_DDX: - case OPCODE_DDY: case OPCODE_NOISE1: case OPCODE_NOISE2: case OPCODE_NOISE3: @@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c) int i, j; struct brw_reg reg; int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; GLuint reg_index = 0; memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); @@ -372,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { reg_index += 2; } } @@ -551,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } } - -/** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. - * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). - * - */ -static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ - const int component = GET_SWZ(src->Swizzle, channel); - const GLfloat *param = - c->fp->program.Base.Parameters->ParameterValues[src->Index]; - GLfloat value = param[component]; - if (src->Negate & (1 << channel)) - value = -value; - if (src->Abs) - value = FABSF(value); -#if 0 - printf(" form immed value %f for chan %d\n", value, channel); -#endif - return brw_imm_f(value); - } - else { - return get_src_reg(c, inst, srcRegIndex, channel); - } -} - - /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -651,542 +614,110 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -static void emit_trunc( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - src = get_src_reg(c, inst, 0, i); - brw_RNDZ(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg src, dst; - dst = get_dst_reg(c, inst, i); - /* XXX some moves from immediate value don't work reliably!!! */ - /*src = get_src_reg_imm(c, inst, 0, i);*/ - src = get_src_reg(c, inst, 0, i); - brw_MOV(p, dst, src); - } - } - brw_set_saturate(p, 0); -} - -static void emit_pixel_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - - struct brw_reg dst0, dst1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - -static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - src0 = get_src_reg(c, inst, 0, 0); - src1 = get_src_reg(c, inst, 0, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } -} - -static void fire_fb_write( struct brw_wm_compile *c, - GLuint base_reg, - GLuint nr, - GLuint target, - GLuint eot) -{ - struct brw_compile *p = &c->func; - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - target, - nr, - 0, - eot); -} - -static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - GLuint target, eot; - struct brw_reg src0; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, inst, 0, channel); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { - src0 = get_src_reg(c, inst, 2, 2); - brw_MOV(p, brw_message_reg(nr), src0); - } - else { - src0 = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; - } - - if (c->key.dest_depth_reg) { - const GLuint comp = c->key.dest_depth_reg / 2; - const GLuint off = c->key.dest_depth_reg % 2; - - if (off != 0) { - /* XXX this code needs review/testing */ - struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); - struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1_1); - brw_pop_insn_state(p); - } - else - { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); - } - nr += 2; - } - - target = inst->Aux >> 1; - eot = inst->Aux & 1; - fire_fb_write(c, 0, nr, target, eot); -} - -static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias3(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3); - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} + struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; + int i, j; + int mark = mark_tmps(c); -static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), delta1); - } + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; + tmp_arg2[j] = arg2[j]; } -} - -static void emit_cinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { + for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, suboffset(interp[i],3)); + for (j = 0; j < 4; j++) { + if (arg0[j].file == dst[i].file && + dst[i].nr == arg0[j].nr) { + tmp_arg0[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg0[j], arg0[j]); + } + if (arg1[j].file == dst[i].file && + dst[i].nr == arg1[j].nr) { + tmp_arg1[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg1[j], arg1[j]); + } + if (arg2[j].file == dst[i].file && + dst[i].nr == arg2[j].nr) { + tmp_arg2[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg2[j], arg2[j]); + } + } } } -} -static void emit_pinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - w = get_src_reg(c, inst, 2, 3); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); + func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_LINE(p, brw_null_reg(), interp[i], delta0); - brw_MAC(p, dst, suboffset(interp[i],1), - delta1); - brw_MUL(p, dst, dst, w); - } - } + release_tmps(c, mark); } -/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing(struct brw_wm_compile *c, - const struct prog_instruction *inst) +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias2(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { struct brw_compile *p = &c->func; - struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); - struct brw_reg dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, brw_imm_f(0.0)); - } - } + struct brw_reg tmp_arg0[4], tmp_arg1[4]; + int i, j; + int mark = mark_tmps(c); - /* bit 31 is "primitive is back face", so checking < (1 << 31) gives - * us front face - */ - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31)); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, brw_imm_f(1.0)); - } + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; } - brw_set_predicate_control_flag_value(p, 0xff); -} -static void emit_xpd(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; for (i = 0; i < 4; i++) { - GLuint i2 = (i+2)%3; - GLuint i1 = (i+1)%3; if (mask & (1<<i)) { - struct brw_reg src0, src1, dst; - dst = get_dst_reg(c, inst, i); - src0 = negate(get_src_reg(c, inst, 0, i2)); - src1 = get_src_reg_imm(c, inst, 1, i1); - brw_MUL(p, brw_null_reg(), src0, src1); - src0 = get_src_reg(c, inst, 0, i1); - src1 = get_src_reg_imm(c, inst, 1, i2); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); + for (j = 0; j < 4; j++) { + if (arg0[j].file == dst[i].file && + dst[i].nr == arg0[j].nr) { + tmp_arg0[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg0[j], arg0[j]); + } + if (arg1[j].file == dst[i].file && + dst[i].nr == arg1[j].nr) { + tmp_arg1[j] = alloc_tmp(c); + brw_MOV(p, tmp_arg1[j], arg1[j]); + } + } } } - brw_set_saturate(p, 0); -} - -static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} + func(p, dst, mask, tmp_arg0, tmp_arg1); -static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src1[3]); - brw_set_saturate(p, 0); -} - -/** - * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. - * Note that the result of the function is smeared across the dest - * register's X, Y, Z and W channels (subject to writemasking of course). - */ -static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - /* Get first component of source register */ - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg(c, inst, 0, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} - -static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); -} - -static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} - -static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); -} - -static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - -static void emit_add(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_ADD(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); + release_tmps(c, mark); } static void emit_arl(struct brw_wm_compile *c, @@ -1202,180 +733,6 @@ static void emit_arl(struct brw_wm_compile *c, brw_set_saturate(p, 0); } - -static void emit_mul(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_MUL(p, dst, src0, src1); - } - } - brw_set_saturate(p, 0); -} - -static void emit_frc(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg_imm(c, inst, 0, i); - brw_FRC(p, dst, src0); - } - } - if (inst->SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg_imm(c, inst, 0, i); - brw_RNDD(p, dst, src0); - } - } - brw_set_saturate(p, 0); -} - - -static void emit_min_max(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - const GLuint mask = inst->DstReg.WriteMask; - const int mark = mark_tmps(c); - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - struct brw_reg real_dst = get_dst_reg(c, inst, i); - struct brw_reg src0 = get_src_reg(c, inst, 0, i); - struct brw_reg src1 = get_src_reg(c, inst, 1, i); - struct brw_reg dst; - /* if dst==src0 or dst==src1 we need to use a temp reg */ - GLboolean use_temp = brw_same_reg(dst, src0) || - brw_same_reg(dst, src1); - if (use_temp) - dst = alloc_tmp(c); - else - dst = real_dst; - - /* - printf(" Min/max: dst %d src0 %d src1 %d\n", - dst.nr, src0.nr, src1.nr); - */ - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - if (inst->Opcode == OPCODE_MIN) - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - else - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - if (use_temp) - brw_MOV(p, real_dst, dst); - } - } - brw_pop_insn_state(p); - release_tmps(c, mark); -} - -static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg_imm(c, inst, 0, 0); - src1 = get_src_reg_imm(c, inst, 1, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_lrp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - int mark = mark_tmps(c); - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - - src1 = get_src_reg_imm(c, inst, 1, i); - - if (src1.nr == dst.nr) { - tmp1 = alloc_tmp(c); - brw_MOV(p, tmp1, src1); - } else - tmp1 = src1; - - src2 = get_src_reg(c, inst, 2, i); - if (src2.nr == dst.nr) { - tmp2 = alloc_tmp(c); - brw_MOV(p, tmp2, src2); - } else - tmp2 = src2; - - brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); - brw_MUL(p, brw_null_reg(), dst, tmp2); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c, mark); - } -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -1391,144 +748,6 @@ static void emit_kil(struct brw_wm_compile *c) brw_pop_insn_state(p); } -static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - src2 = get_src_reg_imm(c, inst, 2, i); - brw_MUL(p, dst, src0, src1); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - -static void emit_sop(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint cond) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - src0 = get_src_reg(c, inst, 0, i); - src1 = get_src_reg_imm(c, inst, 1, i); - brw_push_insn_state(p); - brw_CMP(p, brw_null_reg(), cond, src0, src1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_MOV(p, dst, brw_imm_f(0.0)); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, brw_imm_f(1.0)); - brw_pop_insn_state(p); - } - } -} - -static void emit_slt(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_L); -} - -static void emit_sle(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_LE); -} - -static void emit_sgt(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_G); -} - -static void emit_sge(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_GE); -} - -static void emit_seq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_EQ); -} - -static void emit_sne(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_sop(c, inst, BRW_CONDITIONAL_NEQ); -} - -static void emit_ddx(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - src0 = get_src_reg(c, inst, 0, 0); - w = get_src_reg(c, inst, 1, 3); - nr = src0.nr; - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, interp[i]); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - -static void emit_ddy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - w = get_src_reg(c, inst, 1, 3); - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for(i = 0; i < 4; i++ ) { - if (mask & (1<<i)) { - dst = get_dst_reg(c, inst, i); - brw_MOV(p, dst, suboffset(interp[i], 1)); - brw_MUL(p, dst, dst, w); - } - } - brw_set_saturate(p, 0); -} - static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), @@ -2581,196 +1800,6 @@ static void emit_noise4( struct brw_wm_compile *c, release_tmps( c, mark ); } - -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0); - dst[1] = get_dst_reg(c, inst, 1); - - src0[0] = get_src_reg(c, inst, 0, 0); - src0[1] = get_src_reg(c, inst, 0, 1); - - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ - if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); - } - - if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); - } -} - -/* TODO - BIAS on SIMD8 not working yet... - */ -static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint i; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - default: - /* invalid target */ - abort(); - } - brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - - if (BRW_IS_IGDNG(p->brw)) { - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; - } else { - /* Does it work well on SIMD8? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); -} - - -static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint msg_len; - GLuint i, nr; - GLuint emit; - GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; - nr = 3; - break; - default: - /* invalid target */ - abort(); - } - msg_len = 1; - - /* move/load S, T, R coords */ - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<<i)) - brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); - else - brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); - msg_len += 1; - } - - if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ - brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ - } - - if (BRW_IS_IGDNG(p->brw)) { - if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; - } else { - /* Does it work for shadow on SIMD8 ? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - shadow ? 6 : 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} - /** * Resolve subroutine calls after code emit is done. @@ -2780,6 +1809,21 @@ static void post_wm_emit( struct brw_wm_compile *c ) brw_resolve_cals(&c->func); } +static void +get_argument_regs(struct brw_wm_compile *c, + const struct prog_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); + } +} + static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 @@ -2797,6 +1841,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; c->cur_inst = i; @@ -2809,127 +1856,157 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (c->fp->use_const_buffer) fetch_constants(c, inst); + if (inst->Opcode != OPCODE_ARL) { + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + } + for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) + get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + switch (inst->Opcode) { case WM_PIXELXY: - emit_pixel_xy(c, inst); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(c, inst); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_PIXELW: - emit_pixel_w(c, inst); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: - emit_linterp(c, inst); + emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: - emit_pinterp(c, inst); + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: - emit_cinterp(c, inst); + emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: - emit_wpos_xy(c, inst); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_FB_WRITE: - emit_fb_write(c, inst); + emit_fb_write(c, args[0], args[1], args[2], + INST_AUX_GET_TARGET(inst->Aux), + inst->Aux & INST_AUX_EOT); break; case WM_FRONTFACING: - emit_frontfacing(c, inst); + emit_frontfacing(p, dst, dst_flags); break; case OPCODE_ADD: - emit_add(c, inst); + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; case OPCODE_ARL: emit_arl(c, inst); break; case OPCODE_FRC: - emit_frc(c, inst); + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; case OPCODE_FLR: - emit_flr(c, inst); + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - emit_lrp(c, inst); + unalias3(c, emit_lrp, + dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: - emit_trunc(c, inst); + emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); break; case OPCODE_MOV: case OPCODE_SWZ: - emit_mov(c, inst); + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_DP3: - emit_dp3(c, inst); + emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: - emit_dp4(c, inst); + emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: - emit_xpd(c, inst); + emit_xpd(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: - emit_dph(c, inst); + emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_RCP: - emit_rcp(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_rsq(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_sin(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_cos(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_ex2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_lg2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_MIN: + unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + break; case OPCODE_MAX: - emit_min_max(c, inst); + unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: - emit_ddx(c, inst); - break; case OPCODE_DDY: - emit_ddy(c, inst); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); break; case OPCODE_SLT: - emit_slt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_L, args[0], args[1]); break; case OPCODE_SLE: - emit_sle(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_LE, args[0], args[1]); break; case OPCODE_SGT: - emit_sgt(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_G, args[0], args[1]); break; case OPCODE_SGE: - emit_sge(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_GE, args[0], args[1]); break; case OPCODE_SEQ: - emit_seq(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_EQ, args[0], args[1]); break; case OPCODE_SNE: - emit_sne(c, inst); + emit_sop(p, dst, dst_flags, + BRW_CONDITIONAL_NEQ, args[0], args[1]); break; case OPCODE_MUL: - emit_mul(c, inst); + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_POW: - emit_pow(c, inst); + emit_math2(c, BRW_MATH_FUNCTION_POW, + dst, dst_flags, args[0], args[1]); break; case OPCODE_MAD: - emit_mad(c, inst); + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_NOISE1: emit_noise1(c, inst); @@ -2944,10 +2021,19 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_noise4(c, inst); break; case OPCODE_TEX: - emit_tex(c, inst); + emit_tex(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + inst->TexSrcUnit, + (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); break; case OPCODE_TXB: - emit_txb(c, inst); + emit_txb(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); break; case OPCODE_KIL_NV: emit_kil(c); diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 6279258339..ff4c082d5e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -42,12 +42,14 @@ static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) { assert(c->nr_refs < BRW_WM_MAX_REF); + memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); return &c->refs[c->nr_refs++]; } static struct brw_wm_value *get_value( struct brw_wm_compile *c) { assert(c->nr_refs < BRW_WM_MAX_VREG); + memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); return &c->vreg[c->nr_vreg++]; } @@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); + memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); return &c->instruction[c->nr_insns++]; } @@ -322,8 +325,8 @@ translate_insn(struct brw_wm_compile *c, out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; out->tex_shadow = inst->TexShadow; - out->eot = inst->Aux & 1; - out->target = inst->Aux >> 1; + out->eot = inst->Aux & INST_AUX_EOT; + out->target = INST_AUX_GET_TARGET(inst->Aux); /* Args: */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3436a24717..b449394029 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read1 = writemask; break; + case OPCODE_DDX: + case OPCODE_DDY: + read0 = writemask; + break; + case OPCODE_MAD: case OPCODE_CMP: case OPCODE_LRP: @@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: + case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 6faea018fb..31303febf0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<<j)) { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { int fp_index; if (j >= VERT_RESULT_VAR0) diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index dff466587a..aa2e519588 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -66,19 +66,6 @@ static GLuint translate_wrap_mode( GLenum wrap ) } } - -static GLuint U_FIXED(GLfloat value, GLuint frac_bits) -{ - value *= (1<<frac_bits); - return value < 0 ? 0 : value; -} - -static GLint S_FIXED(GLfloat value, GLuint frac_bits) -{ - return value * (1<<frac_bits); -} - - static dri_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { @@ -86,8 +73,8 @@ static dri_bo *upload_default_color( struct brw_context *brw, COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, - NULL, 0 ); + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc), NULL, 0); } @@ -228,8 +215,8 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, */ sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); + sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6); + sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6); sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 39f8c6d522..f89ed9bce7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -106,7 +106,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_DEPTH_BUFFER + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->state.depth_region == NULL) + key->computes_depth = 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; @@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | + BRW_NEW_DEPTH_BUFFER | BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 3dcc592bde..3f9b1fbfdc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -31,7 +31,6 @@ #include "main/mtypes.h" -#include "main/texformat.h" #include "main/texstore.h" #include "shader/prog_parameter.h" @@ -70,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, +static GLuint translate_tex_format( gl_format mesa_format, + GLenum internal_format, GLenum depth_mode ) { switch( mesa_format ) { @@ -86,21 +86,22 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, case MESA_FORMAT_AL88: return BRW_SURFACEFORMAT_L8A8_UNORM; + case MESA_FORMAT_AL1616: + return BRW_SURFACEFORMAT_L16A16_UNORM; + case MESA_FORMAT_RGB888: assert(0); /* not supported for sampling */ return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - else - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case MESA_FORMAT_XRGB8888: + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; case MESA_FORMAT_RGBA8888_REV: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - else - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -287,7 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.bo = NULL; key.offset = intelObj->textureOffset; } else { - key.format = firstImage->TexFormat->MesaFormat; + key.format = firstImage->TexFormat; key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; @@ -354,7 +355,10 @@ brw_create_constant_surface( struct brw_context *brw, NULL, NULL); if (key->bo) { - /* Emit relocation to surface contents */ + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 + * bspec ("Data Cache") says that the data cache does not exist as + * a separate cache and is just the sampler cache. + */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_SAMPLER, 0, 0, @@ -527,8 +531,15 @@ brw_update_renderbuffer_surface(struct brw_context *brw, region_bo = region->buffer; key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat->MesaFormat) { + switch (irb->Base.Format) { + /* XRGB and ARGB are treated the same here because the chips in this + * family cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the alpha + * blending hardware to use GL_ONE (or GL_ZERO) for cases where + * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. + */ case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; case MESA_FORMAT_RGB565: @@ -541,26 +552,38 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); } key.tiling = region->tiling; - key.width = region->width; - key.height = region->height; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } key.pitch = region->pitch; key.cpp = region->cpp; key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ } else { key.surface_type = BRW_SURFACE_NULL; key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = 0; + key.tiling = I915_TILING_X; key.width = 1; key.height = 1; key.cpp = 4; key.draw_offset = 0; } + /* _NEW_COLOR */ memcpy(key.color_mask, ctx->Color.ColorMask, sizeof(key.color_mask)); + + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->Visual.alphaBits == 0) + key.color_mask[3] = GL_FALSE; + key.color_blend = (!ctx->Color._LogicOpEnabled && ctx->Color.BlendEnabled); @@ -655,7 +678,7 @@ brw_wm_get_binding_table(struct brw_context *brw) if (bind_bo == NULL) { GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); + uint32_t data[BRW_WM_MAX_SURF]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) @@ -680,8 +703,6 @@ brw_wm_get_binding_table(struct brw_context *brw) brw->wm.surf_bo[i]); } } - - free(data); } return bind_bo; @@ -690,11 +711,10 @@ brw_wm_get_binding_table(struct brw_context *brw) static void prepare_wm_surfaces(struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; GLuint i; int old_nr_surfaces; - /* _NEW_BUFFERS */ + /* _NEW_BUFFERS | _NEW_COLOR */ /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { @@ -707,7 +727,7 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + brw->wm.nr_surfaces = BRW_MAX_DRAW_BUFFERS; if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; @@ -719,17 +739,8 @@ static void prepare_wm_surfaces(struct brw_context *brw ) /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ if (texUnit->_ReallyEnabled) { - if (texUnit->_Current == intel->frame_buffer_texobj) { - /* render to texture */ - dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw->wm.surf_bo[0]; - dri_bo_reference(brw->wm.surf_bo[surf]); - brw->wm.nr_surfaces = surf + 1; - } else { - /* regular texture */ - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; } else { dri_bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c deleted file mode 120000 index 4c6b37ada0..0000000000 --- a/src/mesa/drivers/dri/i965/intel_generatemipmap.c +++ /dev/null @@ -1 +0,0 @@ -../intel/intel_generatemipmap.c
\ No newline at end of file |