diff options
Diffstat (limited to 'src/gallium/drivers/r300')
22 files changed, 971 insertions, 411 deletions
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index ffe066d536..c14414fff6 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -27,9 +27,9 @@ static void r300_blitter_save_states(struct r300_context* r300) { - util_blitter_save_blend(r300->blitter, r300->blend_state); - util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state); - util_blitter_save_rasterizer(r300->blitter, r300->rs_state); + util_blitter_save_blend(r300->blitter, r300->blend_state.state); + util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state); + util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state); util_blitter_save_fragment_shader(r300->blitter, r300->fs); util_blitter_save_vertex_shader(r300->blitter, r300->vs); } diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 51fdb82ff3..92de297ef1 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -33,6 +33,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) /* Reasonable defaults */ caps->num_vert_fpus = 4; caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + caps->is_r400 = FALSE; caps->is_r500 = FALSE; caps->high_second_pipe = FALSE; @@ -123,6 +124,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4A54: caps->family = CHIP_FAMILY_R420; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5548: @@ -136,6 +138,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D57: caps->family = CHIP_FAMILY_R423; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x554C: @@ -147,6 +150,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D4A: caps->family = CHIP_FAMILY_R430; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5D4C: @@ -157,6 +161,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5D52: caps->family = CHIP_FAMILY_R480; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x4B48: @@ -166,6 +171,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x4B4C: caps->family = CHIP_FAMILY_R481; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5E4C: @@ -182,6 +188,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x5E4D: caps->family = CHIP_FAMILY_RV410; caps->num_vert_fpus = 6; + caps->is_r400 = TRUE; break; case 0x5954: @@ -212,6 +219,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x791F: caps->family = CHIP_FAMILY_RS690; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x793F: @@ -219,6 +227,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x7942: caps->family = CHIP_FAMILY_RS600; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x796C: @@ -227,6 +236,7 @@ void r300_parse_chipset(struct r300_capabilities* caps) case 0x796F: caps->family = CHIP_FAMILY_RS740; caps->has_tcl = FALSE; + caps->is_r400 = TRUE; break; case 0x7100: diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index 0633a8b8a7..2808486492 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -40,11 +40,18 @@ struct r300_capabilities { unsigned num_z_pipes; /* Whether or not TCL is physically present */ boolean has_tcl; + /* Whether or not this is R400. The differences compared to their R3xx + * cousins are: + * - Extended fragment shader registers + * - Blend LTE/GTE thresholds */ + boolean is_r400; /* Whether or not this is an RV515 or newer; R500s have many differences * that require extra consideration, compared to their R3xx cousins: * - Extra bit of width and height on texture sizes * - Blend color is split across two registers - * - Universal Shader (US) block used for fragment shaders */ + * - Blend LTE/GTE thresholds + * - Universal Shader (US) block used for fragment shaders + * - FP16 blending and multisampling */ boolean is_r500; /* Whether or not the second pixel pipe is accessed with the high bit */ boolean high_second_pipe; diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index d5c2d63d39..5e4f6552c3 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -30,6 +30,7 @@ #include "r300_blit.h" #include "r300_context.h" +#include "r300_emit.h" #include "r300_flush.h" #include "r300_query.h" #include "r300_render.h" @@ -69,11 +70,13 @@ static void r300_destroy_context(struct pipe_context* context) FREE(query); } - FREE(r300->blend_color_state); + FREE(r300->blend_color_state.state); + FREE(r300->clip_state.state); FREE(r300->rs_block); - FREE(r300->scissor_state); + FREE(r300->scissor_state.state); FREE(r300->vertex_info); - FREE(r300->viewport_state); + FREE(r300->viewport_state.state); + FREE(r300->ztop_state.state); FREE(r300); } @@ -107,6 +110,35 @@ static void r300_flush_cb(void *data) cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL); } +#define R300_INIT_ATOM(atomname, atomsize) \ + r300->atomname##_state.name = #atomname; \ + r300->atomname##_state.state = NULL; \ + r300->atomname##_state.size = atomsize; \ + r300->atomname##_state.emit = r300_emit_##atomname##_state; \ + r300->atomname##_state.dirty = FALSE; \ + insert_at_tail(&r300->atom_list, &r300->atomname##_state); + +static void r300_setup_atoms(struct r300_context* r300) +{ + /* Create the actual atom list. + * + * Each atom is examined and emitted in the order it appears here, which + * can affect performance and conformance if not handled with care. + * + * Some atoms never change size, others change every emit. This is just + * an upper bound on each atom, to keep the emission machinery from + * underallocating space. */ + make_empty_list(&r300->atom_list); + R300_INIT_ATOM(ztop, 2); + R300_INIT_ATOM(blend, 8); + R300_INIT_ATOM(blend_color, 3); + R300_INIT_ATOM(clip, 29); + R300_INIT_ATOM(dsa, 8); + R300_INIT_ATOM(rs, 22); + R300_INIT_ATOM(scissor, 3); + R300_INIT_ATOM(viewport, 9); +} + struct pipe_context* r300_create_context(struct pipe_screen* screen, struct radeon_winsys* radeon_winsys) { @@ -155,11 +187,15 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash, r300_shader_key_compare); - r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300_setup_atoms(r300); + + r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state); + r300->clip_state.state = CALLOC_STRUCT(pipe_clip_state); r300->rs_block = CALLOC_STRUCT(r300_rs_block); - r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state); r300->vertex_info = CALLOC_STRUCT(r300_vertex_info); - r300->viewport_state = CALLOC_STRUCT(r300_viewport_state); + r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state); + r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state); /* Open up the OQ BO. */ r300->oqbo = screen->buffer_create(screen, 4096, diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 232530b7dc..682b9179c8 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -30,9 +30,28 @@ #include "pipe/p_context.h" #include "pipe/p_inlines.h" +struct r300_context; + struct r300_fragment_shader; struct r300_vertex_shader; +struct r300_atom { + /* List pointers. */ + struct r300_atom *prev, *next; + /* Name, for debugging. */ + const char* name; + /* Opaque state. */ + void* state; + /* Emit the state to the context. */ + void (*emit)(struct r300_context*, void*); + /* Upper bound on number of dwords to emit. */ + unsigned size; + /* Whether this atom should be emitted. */ + boolean dirty; + /* Another dirty flag that is never automatically cleared. */ + boolean always_dirty; +}; + struct r300_blend_state { uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ @@ -62,11 +81,6 @@ struct r300_rs_state { /* Draw-specific rasterizer state */ struct pipe_rasterizer_state rs; - /* Whether or not to enable the VTE. This is referenced at the very - * last moment during emission of VTE state, to decide whether or not - * the VTE should be used for transformation. */ - boolean enable_vte; - uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */ @@ -102,19 +116,6 @@ struct r300_sampler_state { unsigned min_lod, max_lod; }; -struct r300_scissor_regs { - uint32_t top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ - uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ - - /* Whether everything is culled by scissoring. */ - boolean empty_area; -}; - -struct r300_scissor_state { - struct r300_scissor_regs framebuffer; - struct r300_scissor_regs scissor; -}; - struct r300_texture_state { uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */ @@ -135,24 +136,17 @@ struct r300_ztop_state { uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ }; -#define R300_NEW_BLEND 0x00000001 -#define R300_NEW_BLEND_COLOR 0x00000002 -#define R300_NEW_CLIP 0x00000004 -#define R300_NEW_DSA 0x00000008 #define R300_NEW_FRAMEBUFFERS 0x00000010 #define R300_NEW_FRAGMENT_SHADER 0x00000020 #define R300_NEW_FRAGMENT_SHADER_CONSTANTS 0x00000040 -#define R300_NEW_RASTERIZER 0x00000080 #define R300_NEW_RS_BLOCK 0x00000100 #define R300_NEW_SAMPLER 0x00000200 #define R300_ANY_NEW_SAMPLERS 0x0001fe00 -#define R300_NEW_SCISSOR 0x00020000 #define R300_NEW_TEXTURE 0x00040000 #define R300_ANY_NEW_TEXTURES 0x03fc0000 #define R300_NEW_VERTEX_FORMAT 0x04000000 #define R300_NEW_VERTEX_SHADER 0x08000000 #define R300_NEW_VERTEX_SHADER_CONSTANTS 0x10000000 -#define R300_NEW_VIEWPORT 0x20000000 #define R300_NEW_QUERY 0x40000000 #define R300_NEW_KITCHEN_SINK 0x7fffffff @@ -194,6 +188,12 @@ struct r300_query { struct r300_query* next; }; +enum r300_buffer_tiling { + R300_BUFFER_LINEAR = 0, + R300_BUFFER_TILED, + R300_BUFFER_SQUARETILED +}; + struct r300_texture { /* Parent class */ struct pipe_texture tex; @@ -230,6 +230,9 @@ struct r300_texture { /* Registers carrying texture format data. */ struct r300_texture_state state; + + /* Buffer tiling */ + enum r300_buffer_tiling microtile, macrotile; }; struct r300_vertex_info { @@ -273,38 +276,40 @@ struct r300_context { struct r300_vertex_info* vertex_info; /* Various CSO state objects. */ + /* Beginning of atom list. */ + struct r300_atom atom_list; /* Blend state. */ - struct r300_blend_state* blend_state; + struct r300_atom blend_state; /* Blend color state. */ - struct r300_blend_color_state* blend_color_state; + struct r300_atom blend_color_state; /* User clip planes. */ - struct pipe_clip_state clip_state; + struct r300_atom clip_state; /* Shader constants. */ struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; /* Depth, stencil, and alpha state. */ - struct r300_dsa_state* dsa_state; + struct r300_atom dsa_state; /* Fragment shader. */ struct r300_fragment_shader* fs; /* Framebuffer state. We currently don't need our own version of this. */ struct pipe_framebuffer_state framebuffer_state; /* Rasterizer state. */ - struct r300_rs_state* rs_state; + struct r300_atom rs_state; /* RS block state. */ struct r300_rs_block* rs_block; /* Sampler states. */ struct r300_sampler_state* sampler_states[8]; int sampler_count; /* Scissor state. */ - struct r300_scissor_state* scissor_state; + struct r300_atom scissor_state; /* Texture states. */ struct r300_texture* textures[8]; int texture_count; /* Vertex shader. */ struct r300_vertex_shader* vs; /* Viewport state. */ - struct r300_viewport_state* viewport_state; + struct r300_atom viewport_state; /* ZTOP state. */ - struct r300_ztop_state ztop_state; + struct r300_atom ztop_state; /* Vertex buffers for Gallium. */ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -317,6 +322,8 @@ struct r300_context { uint32_t dirty_state; /* Flag indicating whether or not the HW is dirty. */ uint32_t dirty_hw; + /* Whether the TCL engine should be in bypass mode. */ + boolean tcl_bypass; /** Combination of DBG_xxx flags */ unsigned debug; diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d142fee050..151f72b0fe 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -52,7 +52,7 @@ #define CS_LOCALS(context) \ struct r300_context* const cs_context_copy = (context); \ struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \ - int cs_count = 0; + int cs_count = 0; (void) cs_count; #define CHECK_CS(size) \ assert(cs_winsys->check_cs(cs_winsys, (size))) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 1dc9216a7b..9f93327e59 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_simple_list.h" #include "r300_context.h" #include "r300_cs.h" @@ -36,11 +37,13 @@ #include "r300_texture.h" #include "r300_vs.h" -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend) +void r300_emit_blend_state(struct r300_context* r300, void* state) { + struct r300_blend_state* blend = (struct r300_blend_state*)state; CS_LOCALS(r300); + BEGIN_CS(8); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3); if (r300->framebuffer_state.nr_cbufs) { OUT_CS(blend->blend_control); @@ -52,14 +55,13 @@ void r300_emit_blend_state(struct r300_context* r300, OUT_CS(0); /* XXX also disable fastfill here once it's supported */ } - OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); END_CS; } -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc) +void r300_emit_blend_color_state(struct r300_context* r300, void* state) { + struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -76,9 +78,9 @@ void r300_emit_blend_color_state(struct r300_context* r300, } } -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip) +void r300_emit_clip_state(struct r300_context* r300, void* state) { + struct pipe_clip_state* clip = (struct pipe_clip_state*)state; int i; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); @@ -106,13 +108,13 @@ void r300_emit_clip_state(struct r300_context* r300, } -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa) +void r300_emit_dsa_state(struct r300_context* r300, void* state) { + struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct r300_screen* r300screen = r300_screen(r300->context.screen); CS_LOCALS(r300); - BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 6); OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); /* not needed since we use the 8bit alpha ref */ @@ -121,10 +123,16 @@ void r300_emit_dsa_state(struct r300_context* r300, }*/ OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CS(dsa->z_buffer_control); - OUT_CS(dsa->z_stencil_control); + + if (r300->framebuffer_state.zsbuf) { + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + } else { + OUT_CS(0); + OUT_CS(0); + } + OUT_CS(dsa->stencil_ref_mask); - OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top); /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */ if (r300screen->caps->is_r500) { @@ -138,6 +146,8 @@ static const float * get_shader_constant( struct rc_constant * constant, struct r300_constant_buffer * externals) { + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; struct pipe_texture *tex; @@ -160,11 +170,31 @@ static const float * get_shader_constant( /* Texture compare-fail value. */ /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient, - * this is always (0,0,0,0). */ + * this is always (0,0,0,0), right? */ case RC_STATE_SHADOW_AMBIENT: vec[3] = 0; break; + case RC_STATE_R300_VIEWPORT_SCALE: + if (r300->tcl_bypass) { + vec[0] = 1; + vec[1] = 1; + vec[2] = 1; + } else { + vec[0] = viewport->xscale; + vec[1] = viewport->yscale; + vec[2] = viewport->zscale; + } + break; + + case RC_STATE_R300_VIEWPORT_OFFSET: + if (!r300->tcl_bypass) { + vec[0] = viewport->xoffset; + vec[1] = viewport->yoffset; + vec[2] = viewport->zoffset; + } + break; + default: debug_printf("r300: Implementation error: " "Unknown RC_CONSTANT type %d\n", constant->u.State[0]); @@ -283,6 +313,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300, END_CS; } +static void r300_emit_fragment_depth_config(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + + BEGIN_CS(4); + if (r300_fragment_shader_writes_depth(fs)) { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US); + } else { + OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US); + } + END_CS; +} + void r500_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code) { @@ -374,8 +420,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | - r300_translate_colorformat(tex->tex.format), 0, - RADEON_GEM_DOMAIN_VRAM, 0); + r300_translate_colorformat(tex->tex.format) | + R300_COLOR_TILE(tex->macrotile) | + R300_COLOR_MICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), r300_translate_out_fmt(surf->format)); @@ -398,8 +446,10 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format)); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0, - RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] | + R300_DEPTHMACROTILE(tex->macrotile) | + R300_DEPTHMICROTILE(tex->microtile), + 0, RADEON_GEM_DOMAIN_VRAM, 0); } END_CS; @@ -531,8 +581,9 @@ void r300_emit_query_end(struct r300_context* r300) r300_emit_query_finish(r300, query); } -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +void r300_emit_rs_state(struct r300_context* r300, void* state) { + struct r300_rs_state* rs = (struct r300_rs_state*)state; CS_LOCALS(r300); BEGIN_CS(22); @@ -595,26 +646,47 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } -static void r300_emit_scissor_regs(struct r300_context* r300, - struct r300_scissor_regs* scissor) +void r300_emit_scissor_state(struct r300_context* r300, void* state) { + unsigned minx, miny, maxx, maxy; + uint32_t top_left, bottom_right; + struct r300_screen* r300screen = r300_screen(r300->context.screen); + struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state; CS_LOCALS(r300); - BEGIN_CS(3); - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS(scissor->top_left); - OUT_CS(scissor->bottom_right); - END_CS; -} + minx = miny = 0; + maxx = r300->framebuffer_state.width; + maxy = r300->framebuffer_state.height; -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor) -{ - if (r300->rs_state->rs.scissor) { - r300_emit_scissor_regs(r300, &scissor->scissor); + if (((struct r300_rs_state*)r300->rs_state.state)->rs.scissor) { + minx = MAX2(minx, scissor->minx); + miny = MAX2(miny, scissor->miny); + maxx = MIN2(maxx, scissor->maxx); + maxy = MIN2(maxy, scissor->maxy); + } + + if (r300screen->caps->is_r500) { + top_left = + (minx << R300_SCISSORS_X_SHIFT) | + (miny << R300_SCISSORS_Y_SHIFT); + bottom_right = + ((maxx - 1) << R300_SCISSORS_X_SHIFT) | + ((maxy - 1) << R300_SCISSORS_Y_SHIFT); } else { - r300_emit_scissor_regs(r300, &scissor->framebuffer); + /* Offset of 1440 in non-R500 chipsets. */ + top_left = + ((minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((miny + 1440) << R300_SCISSORS_Y_SHIFT); + bottom_right = + (((maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | + (((maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); } + + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(top_left); + OUT_CS(bottom_right); + END_CS; } void r300_emit_texture(struct r300_context* r300, @@ -650,8 +722,10 @@ void r300_emit_texture(struct r300_context* r300, OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1); OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1); - OUT_CS_RELOC(tex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); + OUT_CS_RELOC(tex->buffer, + R300_TXO_MACRO_TILE(tex->macrotile) | + R300_TXO_MICRO_TILE(tex->microtile), + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); END_CS; } @@ -717,32 +791,6 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset) END_CS; } -#if 0 -void r300_emit_draw_packet(struct r300_context* r300) -{ - CS_LOCALS(r300); - - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " - "vertex size %d\n", r300->vbo, - r300->vertex_info->vinfo.size); - /* Set the pointer to our vertex buffer. The emitted values are this: - * PACKET3 [3D_LOAD_VBPNTR] - * COUNT [1] - * FORMAT [size | stride << 8] - * OFFSET [offset into BO] - * VBPNTR [relocated BO] - */ - BEGIN_CS(7); - OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); - OUT_CS(1); - OUT_CS(r300->vertex_info->vinfo.size | - (r300->vertex_info->vinfo.size << 8)); - OUT_CS(r300->vbo_offset); - OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); - END_CS; -} -#endif - void r300_emit_vertex_format_state(struct r300_context* r300) { int i; @@ -867,26 +915,27 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, END_CS; } -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport) +void r300_emit_viewport_state(struct r300_context* r300, void* state) { + struct r300_viewport_state* viewport = (struct r300_viewport_state*)state; CS_LOCALS(r300); - BEGIN_CS(9); - OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); - OUT_CS_32F(viewport->xscale); - OUT_CS_32F(viewport->xoffset); - OUT_CS_32F(viewport->yscale); - OUT_CS_32F(viewport->yoffset); - OUT_CS_32F(viewport->zscale); - OUT_CS_32F(viewport->zoffset); - - if (r300->rs_state->enable_vte) { - OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); - } else { + if (r300->tcl_bypass) { + BEGIN_CS(2); OUT_CS_REG(R300_VAP_VTE_CNTL, 0); + END_CS; + } else { + BEGIN_CS(9); + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(viewport->xscale); + OUT_CS_32F(viewport->xoffset); + OUT_CS_32F(viewport->yscale); + OUT_CS_32F(viewport->yoffset); + OUT_CS_32F(viewport->zscale); + OUT_CS_32F(viewport->zoffset); + OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control); + END_CS; } - END_CS; } void r300_emit_texture_count(struct r300_context* r300) @@ -910,6 +959,16 @@ void r300_emit_texture_count(struct r300_context* r300) } +void r300_emit_ztop_state(struct r300_context* r300, void* state) +{ + struct r300_ztop_state* ztop = (struct r300_ztop_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(2); + OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top); + END_CS; +} + void r300_flush_textures(struct r300_context* r300) { CS_LOCALS(r300); @@ -933,18 +992,24 @@ void r300_emit_dirty_state(struct r300_context* r300) { struct r300_screen* r300screen = r300_screen(r300->context.screen); struct r300_texture* tex; - int i, dirty_tex = 0; + struct r300_atom* atom; + unsigned i, dwords = 1024; + int dirty_tex = 0; boolean invalid = FALSE; - if (!(r300->dirty_state)) { - return; + /* Check the required number of dwords against the space remaining in the + * current CS object. If we need more, then flush. */ + + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + dwords += atom->size; + } } - /* Check size of CS. */ - /* Make sure we have at least 8*1024 spare dwords. */ + /* Make sure we have at least 2*1024 spare dwords. */ /* XXX It would be nice to know the number of dwords we really need to * XXX emit. */ - if (!r300->winsys->check_cs(r300->winsys, 8*1024)) { + if (!r300->winsys->check_cs(r300->winsys, dwords)) { r300->context.flush(&r300->context, 0, NULL); } @@ -984,10 +1049,12 @@ validate: } } /* ...occlusion query buffer... */ - if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, - 0, RADEON_GEM_DOMAIN_GTT)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; + if (r300->dirty_state & R300_NEW_QUERY) { + if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo, + 0, RADEON_GEM_DOMAIN_GTT)) { + r300->context.flush(&r300->context, 0, NULL); + goto validate; + } } /* ...and vertex buffer. */ if (r300->vbo) { @@ -1015,27 +1082,15 @@ validate: r300->dirty_state &= ~R300_NEW_QUERY; } - if (r300->dirty_state & R300_NEW_BLEND) { - r300_emit_blend_state(r300, r300->blend_state); - r300->dirty_state &= ~R300_NEW_BLEND; - } - - if (r300->dirty_state & R300_NEW_BLEND_COLOR) { - r300_emit_blend_color_state(r300, r300->blend_color_state); - r300->dirty_state &= ~R300_NEW_BLEND_COLOR; - } - - if (r300->dirty_state & R300_NEW_CLIP) { - r300_emit_clip_state(r300, &r300->clip_state); - r300->dirty_state &= ~R300_NEW_CLIP; - } - - if (r300->dirty_state & R300_NEW_DSA) { - r300_emit_dsa_state(r300, r300->dsa_state); - r300->dirty_state &= ~R300_NEW_DSA; + foreach(atom, &r300->atom_list) { + if (atom->dirty || atom->always_dirty) { + atom->emit(r300, atom->state); + atom->dirty = FALSE; + } } if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_emit_fragment_depth_config(r300, r300->fs); if (r300screen->caps->is_r500) { r500_emit_fragment_program_code(r300, &r300->fs->shader->code); } else { @@ -1060,21 +1115,11 @@ validate: r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS; } - if (r300->dirty_state & R300_NEW_RASTERIZER) { - r300_emit_rs_state(r300, r300->rs_state); - r300->dirty_state &= ~R300_NEW_RASTERIZER; - } - if (r300->dirty_state & R300_NEW_RS_BLOCK) { r300_emit_rs_block_state(r300, r300->rs_block); r300->dirty_state &= ~R300_NEW_RS_BLOCK; } - if (r300->dirty_state & R300_NEW_SCISSOR) { - r300_emit_scissor_state(r300, r300->scissor_state); - r300->dirty_state &= ~R300_NEW_SCISSOR; - } - /* Samplers and textures are tracked separately but emitted together. */ if (r300->dirty_state & (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) { @@ -1096,11 +1141,6 @@ validate: r300->dirty_state &= ~(R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES); } - if (r300->dirty_state & R300_NEW_VIEWPORT) { - r300_emit_viewport_state(r300, r300->viewport_state); - r300->dirty_state &= ~R300_NEW_VIEWPORT; - } - if (dirty_tex) { r300_flush_textures(r300); } diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 3797d3d332..05a6bfeae8 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -31,17 +31,13 @@ struct r300_vertex_program_code; void r300_emit_aos(struct r300_context* r300, unsigned offset); -void r300_emit_blend_state(struct r300_context* r300, - struct r300_blend_state* blend); +void r300_emit_blend_state(struct r300_context* r300, void* state); -void r300_emit_blend_color_state(struct r300_context* r300, - struct r300_blend_color_state* bc); +void r300_emit_blend_color_state(struct r300_context* r300, void* state); -void r300_emit_clip_state(struct r300_context* r300, - struct pipe_clip_state* clip); +void r300_emit_clip_state(struct r300_context* r300, void* state); -void r300_emit_dsa_state(struct r300_context* r300, - struct r300_dsa_state* dsa); +void r300_emit_dsa_state(struct r300_context* r300, void* state); void r300_emit_fragment_program_code(struct r300_context* r300, struct rX00_fragment_program_code* generic_code); @@ -63,13 +59,12 @@ void r300_emit_query_begin(struct r300_context* r300, void r300_emit_query_end(struct r300_context* r300); -void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); +void r300_emit_rs_state(struct r300_context* r300, void* state); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); -void r300_emit_scissor_state(struct r300_context* r300, - struct r300_scissor_state* scissor); +void r300_emit_scissor_state(struct r300_context* r300, void* state); void r300_emit_texture(struct r300_context* r300, struct r300_sampler_state* sampler, @@ -89,11 +84,12 @@ void r300_emit_vs_constant_buffer(struct r300_context* r300, void r300_emit_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); -void r300_emit_viewport_state(struct r300_context* r300, - struct r300_viewport_state* viewport); +void r300_emit_viewport_state(struct r300_context* r300, void* state); void r300_emit_texture_count(struct r300_context* r300); +void r300_emit_ztop_state(struct r300_context* r300, void* state); + void r300_flush_textures(struct r300_context* r300); /* Emit all dirty state. */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 14a08241fc..c78a7673a3 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -37,6 +37,7 @@ static void r300_flush(struct pipe_context* pipe, { struct r300_context *r300 = r300_context(pipe); struct r300_query *query; + struct r300_atom *atom; CS_LOCALS(r300); /* We probably need to flush Draw, but we may have been called from @@ -54,7 +55,15 @@ static void r300_flush(struct pipe_context* pipe, r300_emit_invariant_state(r300); r300->dirty_state = R300_NEW_KITCHEN_SINK; r300->dirty_hw = 0; + + /* New kitchen sink, baby. */ + foreach(atom, &r300->atom_list) { + if (atom->state) { + atom->dirty = TRUE; + } + } } + /* reset flushed query */ foreach(query, &r300->query_list) { query->flushed = TRUE; diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 4e1b61ca40..60ea9c171d 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info, fs_inputs->fog = i; break; + case TGSI_SEMANTIC_POSITION: + assert(index == 0); + fs_inputs->wpos = i; + break; + default: assert(0); } @@ -114,6 +119,9 @@ static void allocate_hardware_inputs( if (inputs->fog != ATTR_UNUSED) { allocate(mydata, inputs->fog, reg++); } + if (inputs->wpos != ATTR_UNUSED) { + allocate(mydata, inputs->wpos, reg++); + } } static void get_compare_state( @@ -144,6 +152,7 @@ static void r300_translate_fragment_shader( struct r300_fragment_shader* fs = r300->fs; struct r300_fragment_program_compiler compiler; struct tgsi_to_rc ttr; + int wpos = fs->inputs.wpos; /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); @@ -171,6 +180,18 @@ static void r300_translate_fragment_shader( fs->shadow_samplers = compiler.Base.Program.ShadowSamplers; + /** + * Transform the program to support WPOS. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. */ + if (wpos != ATTR_UNUSED) { + /* Moving the input to some other reg is not really necessary. */ + rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE); + } + /* Invoke the compiler */ r3xx_compile_fragment_program(&compiler); if (compiler.Base.Error) { diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 0aa1da07f8..361813891f 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -1619,18 +1619,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_TX_OFFSET_5 0x4554 #define R300_TX_OFFSET_6 0x4558 #define R300_TX_OFFSET_7 0x455C - /* BEGIN: Guess from R200 */ + # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) -# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MACRO_TILE_LINEAR (0 << 2) +# define R300_TXO_MACRO_TILE_TILED (1 << 2) +# define R300_TXO_MACRO_TILE(x) ((x) << 2) # define R300_TXO_MICRO_TILE_LINEAR (0 << 3) -# define R300_TXO_MICRO_TILE (1 << 3) -# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE_TILED (1 << 3) +# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3) +# define R300_TXO_MICRO_TILE(x) ((x) << 3) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 - /* END: Guess from R200 */ /* 32 bit chroma key */ #define R300_TX_CHROMA_KEY_0 0x4580 @@ -2186,6 +2188,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) # define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) +# define R500_SRC_ALPHA_0_NO_READ (1 << 30) +# define R500_SRC_ALPHA_1_NO_READ (1 << 31) /* the following are shared between CBLEND and ABLEND */ # define R300_FCN_MASK (3 << 12) @@ -2281,9 +2285,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_COLORPITCH_MASK 0x00003FFE # define R300_COLOR_TILE_DISABLE (0 << 16) # define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_TILE(x) ((x) << 16) # define R300_COLOR_MICROTILE_DISABLE (0 << 17) # define R300_COLOR_MICROTILE_ENABLE (1 << 17) # define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_MICROTILE(x) ((x) << 17) # define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) # define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) # define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) @@ -2542,9 +2548,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_DEPTHPITCH_MASK 0x00003FFC # define R300_DEPTHMACROTILE_DISABLE (0 << 16) # define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMACROTILE(x) ((x) << 16) # define R300_DEPTHMICROTILE_LINEAR (0 << 17) # define R300_DEPTHMICROTILE_TILED (1 << 17) # define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHMICROTILE(x) ((x) << 17) # define R300_DEPTHENDIAN_NO_SWAP (0 << 18) # define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) # define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index a89cb633e0..710d850163 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -26,6 +26,8 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "indices/u_indices.h" + #include "pipe/p_inlines.h" #include "util/u_memory.h" @@ -69,16 +71,11 @@ uint32_t r300_translate_primitive(unsigned prim) } } -static boolean r300_nothing_to_draw(struct r300_context *r300) -{ - return r300->rs_state->rs.scissor && - r300->scissor_state->scissor.empty_area; -} - static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, unsigned mode) { - uint32_t color_control = r300->rs_state->color_control; + struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; + uint32_t color_control = rs->color_control; /* By default (see r300_state.c:r300_create_rs_state) color_control is * initialized to provoking the first vertex. @@ -98,7 +95,7 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, * ~ C. */ - if (r300->rs_state->rs.flatshade_first) { + if (rs->rs.flatshade_first) { switch (mode) { case PIPE_PRIM_TRIANGLE_FAN: color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; @@ -119,6 +116,44 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, return color_control; } +static void r300_emit_draw_immediate(struct r300_context *r300, + unsigned mode, + unsigned start, + unsigned count) +{ + struct pipe_buffer* vbo = r300->vertex_buffer[0].buffer; + unsigned vertex_size = r300->vertex_buffer[0].stride / sizeof(float); + unsigned i; + uint32_t* map; + CS_LOCALS(r300); + + map = (uint32_t*)pipe_buffer_map_range(r300->context.screen, vbo, + start * vertex_size, count * vertex_size, + PIPE_BUFFER_USAGE_CPU_READ); + + BEGIN_CS(10 + count * vertex_size); + OUT_CS_REG(R300_GA_COLOR_CONTROL, + r300_provoking_vertex_fixes(r300, mode)); + OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | + r300_translate_primitive(mode)); + //debug_printf("r300: Immd %d verts, %d attrs\n", count, vertex_size); + for (i = 0; i < count * vertex_size; i++) { + if (i % vertex_size == 0) { + //debug_printf("r300: -- vert --\n"); + } + //debug_printf("r300: 0x%08x\n", *map); + OUT_CS(*map); + map++; + } + END_CS; + + pipe_buffer_unmap(r300->context.screen, vbo); +} + static void r300_emit_draw_arrays(struct r300_context *r300, unsigned mode, unsigned count) @@ -212,43 +247,84 @@ validate: return TRUE; } +static struct pipe_buffer* r300_translate_elts(struct r300_context* r300, + struct pipe_buffer* elts, + unsigned* size, + unsigned* mode, + unsigned* count) +{ + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* new_elts; + void *in_map, *out_map; + unsigned out_prim, out_index_size, out_nr; + u_translate_func out_translate; + + (void)u_index_translator(~0, *mode, *size, *count, PV_LAST, PV_LAST, + &out_prim, &out_index_size, &out_nr, &out_translate); + + new_elts = screen->buffer_create(screen, 32, + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ, + out_index_size * out_nr); + + in_map = pipe_buffer_map(screen, elts, PIPE_BUFFER_USAGE_CPU_READ); + out_map = pipe_buffer_map(screen, new_elts, PIPE_BUFFER_USAGE_CPU_WRITE); + + out_translate(in_map, *count, out_map); + + pipe_buffer_unmap(screen, elts); + pipe_buffer_unmap(screen, new_elts); + + *size = out_index_size; + *mode = out_prim; + *count = out_nr; + + return new_elts; +} + /* This is the fast-path drawing & emission for HW TCL. */ -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count) +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) { struct r300_context* r300 = r300_context(pipe); + struct pipe_buffer* orgIndexBuffer = indexBuffer; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: use aux/indices functions to split this into smaller + * primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; + } + + if (indexSize == 1) { + indexBuffer = r300_translate_elts(r300, indexBuffer, + &indexSize, &mode, &count); } if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0)) { - return FALSE; + goto cleanup; } if (!r300->winsys->validate(r300->winsys)) { - return FALSE; + goto cleanup; } r300_emit_dirty_state(r300); @@ -258,49 +334,52 @@ boolean r300_draw_range_elements(struct pipe_context* pipe, r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count); - return TRUE; +cleanup: + if (indexBuffer != orgIndexBuffer) { + pipe->screen->buffer_destroy(indexBuffer); + } } /* Simple helpers for context setup. Should probably be moved to util. */ -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count) +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) { - return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, - mode, start, count); + pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); } -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count) +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) { struct r300_context* r300 = r300_context(pipe); if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; + return; } if (count > 65535) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + /* XXX: driver needs to handle this -- use the functions in + * aux/indices to split this into several smaller primitives. + */ + return; } r300_update_derived_state(r300); if (!r300_setup_vertex_buffers(r300)) { - return FALSE; + return; } r300_emit_dirty_state(r300); - r300_emit_aos(r300, start); - - r300_emit_draw_arrays(r300, mode, count); - - return TRUE; + if (FALSE && count <= 4 && r300->vertex_buffer_count == 1) { + r300_emit_draw_immediate(r300, mode, start, count); + } else { + r300_emit_aos(r300, start); + r300_emit_draw_arrays(r300, mode, count); + } } /**************************************************************************** @@ -309,7 +388,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, ***************************************************************************/ /* SW TCL arrays, using Draw. */ -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, +void r300_swtcl_draw_arrays(struct pipe_context* pipe, unsigned mode, unsigned start, unsigned count) @@ -318,11 +397,7 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, int i; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -346,12 +421,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(r300->draw, i, NULL); } - - return TRUE; } /* SW TCL elements, using Draw. */ -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, struct pipe_buffer* indexBuffer, unsigned indexSize, unsigned minIndex, @@ -365,11 +438,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, void* indices; if (!u_trim_pipe_prim(mode, &count)) { - return FALSE; - } - - if (r300_nothing_to_draw(r300)) { - return TRUE; + return; } for (i = 0; i < r300->vertex_buffer_count; i++) { @@ -400,8 +469,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, pipe_buffer_unmap(pipe->screen, indexBuffer); draw_set_mapped_element_buffer_range(r300->draw, 0, start, start + count - 1, NULL); - - return TRUE; } /* Object for rendering using Draw. */ diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h index da83069083..27b5e6a963 100644 --- a/src/gallium/drivers/r300/r300_render.h +++ b/src/gallium/drivers/r300/r300_render.h @@ -25,35 +25,35 @@ uint32_t r300_translate_primitive(unsigned prim); -boolean r300_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_draw_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, unsigned mode, - unsigned start, unsigned count); - -boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, - unsigned start, unsigned count); - -boolean r300_swtcl_draw_arrays(struct pipe_context* pipe, - unsigned mode, - unsigned start, - unsigned count); - -boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe, - struct pipe_buffer* indexBuffer, - unsigned indexSize, - unsigned minIndex, - unsigned maxIndex, - unsigned mode, - unsigned start, - unsigned count); +void r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + +void r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count); + +void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count); + +void r300_swtcl_draw_arrays(struct pipe_context* pipe, + unsigned mode, + unsigned start, + unsigned count); + +void r300_swtcl_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); #endif /* R300_RENDER_H */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 2a8667d483..287664b1d2 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + case PIPE_CAP_MAX_COMBINED_SAMPLERS: /* XXX I'm told this goes up to 16 */ return 8; case PIPE_CAP_NPOT_TEXTURES: @@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param) case PIPE_CAP_BLEND_EQUATION_SEPARATE: return 1; case PIPE_CAP_SM3: - return 1; - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return 8; + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } default: debug_printf("r300: Implementation error: Bad param %d\n", param); diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h index 85184e2cfd..6796841b29 100644 --- a/src/gallium/drivers/r300/r300_shader_semantics.h +++ b/src/gallium/drivers/r300/r300_shader_semantics.h @@ -40,6 +40,7 @@ struct r300_shader_semantics { int bcolor[ATTR_COLOR_COUNT]; int generic[ATTR_GENERIC_COUNT]; int fog; + int wpos; }; static INLINE void r300_shader_semantics_reset( @@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset( info->pos = ATTR_UNUSED; info->psize = ATTR_UNUSED; info->fog = ATTR_UNUSED; + info->wpos = ATTR_UNUSED; for (i = 0; i < ATTR_COLOR_COUNT; i++) { info->color[i] = ATTR_UNUSED; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 49072462ec..281ff68449 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Copyright 2009 Marek Olšák <maraeo@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -41,6 +42,120 @@ /* r300_state: Functions used to intialize state context by translating * Gallium state objects into semi-native r300 state objects. */ +static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 0, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA == 1, and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer + * will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + +static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA, + unsigned dstRGB, unsigned dstA) +{ + /* If the blend equation is ADD or REVERSE_SUBTRACT, + * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set, + * the colorbuffer will not be changed. + * Notice that the dst factors are the src factors inverted. */ + return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcRGB == PIPE_BLENDFACTOR_ZERO) && + (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + srcA == PIPE_BLENDFACTOR_ZERO) && + (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR || + dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ONE) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ONE); +} + /* Create a new blend state based on the CSO blend state. * * This encompasses alpha blending, logic/raster ops, and blend dithering. */ @@ -66,7 +181,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe, ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) | ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT); - /* optimization: some operations do not require the destination color */ + /* Optimization: some operations do not require the destination color. + * + * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled, + * otherwise blending gives incorrect results. It seems to be + * a hardware bug. */ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX || dstRGB != PIPE_BLENDFACTOR_ZERO || @@ -78,11 +197,81 @@ static void* r300_create_blend_state(struct pipe_context* pipe, srcA == PIPE_BLENDFACTOR_DST_COLOR || srcA == PIPE_BLENDFACTOR_DST_ALPHA || srcA == PIPE_BLENDFACTOR_INV_DST_COLOR || - srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA) + srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA || + srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) { + /* Enable reading from the colorbuffer. */ blend->blend_control |= R300_READ_ENABLE; - /* XXX implement the optimization with DISCARD_SRC_PIXELS*/ - /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */ + if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) { + /* Optimization: Depending on incoming pixels, we can + * conditionally disable the reading in hardware... */ + if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN && + eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) { + /* Disable reading if SRC_ALPHA == 0. */ + if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_0_NO_READ; + } + + /* Disable reading if SRC_ALPHA == 1. */ + if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstRGB == PIPE_BLENDFACTOR_ZERO) && + (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR || + dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA || + dstA == PIPE_BLENDFACTOR_ZERO)) { + blend->blend_control |= R500_SRC_ALPHA_1_NO_READ; + } + } + } + } + + /* Optimization: discard pixels which don't change the colorbuffer. + * + * The code below is non-trivial and some math is involved. + * + * Discarding pixels must be disabled when FP16 AA is enabled. + * This is a hardware bug. Also, this implementation wouldn't work + * with FP blending enabled and equation clamping disabled. + * + * Equations other than ADD are rarely used and therefore won't be + * optimized. */ + if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) && + (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) { + /* ADD: X+Y + * REVERSE_SUBTRACT: Y-X + * + * The idea is: + * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1, + * then CB will not be changed. + * + * Given the srcFactor and dstFactor variables, we can derive + * what src and dst should be equal to and discard appropriate + * pixels. + */ + if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0; + } else if (blend_discard_if_src_alpha_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1; + } else if (blend_discard_if_src_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0; + } else if (blend_discard_if_src_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1; + } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0; + } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA, + dstRGB, dstA)) { + blend->blend_control |= + R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1; + } + } /* separate alpha */ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { @@ -128,8 +317,8 @@ static void r300_bind_blend_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300->blend_state = (struct r300_blend_state*)state; - r300->dirty_state |= R300_NEW_BLEND; + r300->blend_state.state = state; + r300->blend_state.dirty = TRUE; } /* Free blend state. */ @@ -151,20 +340,24 @@ static void r300_set_blend_color(struct pipe_context* pipe, const struct pipe_blend_color* color) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); + struct r300_blend_color_state* state = + (struct r300_blend_color_state*)r300->blend_color_state.state; union util_color uc; util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc); - r300->blend_color_state->blend_color = uc.ui; + state->blend_color = uc.ui; /* XXX if FP16 blending is enabled, we should use the FP16 format */ - r300->blend_color_state->blend_color_red_alpha = + state->blend_color_red_alpha = float_to_fixed10(color->color[0]) | (float_to_fixed10(color->color[3]) << 16); - r300->blend_color_state->blend_color_green_blue = + state->blend_color_green_blue = float_to_fixed10(color->color[2]) | (float_to_fixed10(color->color[1]) << 16); - r300->dirty_state |= R300_NEW_BLEND_COLOR; + r300->blend_color_state.size = r300screen->caps->is_r500 ? 3 : 2; + r300->blend_color_state.dirty = TRUE; } static void r300_set_clip_state(struct pipe_context* pipe, @@ -173,12 +366,15 @@ static void r300_set_clip_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); if (r300_screen(pipe->screen)->caps->has_tcl) { - r300->clip_state = *state; - r300->dirty_state |= R300_NEW_CLIP; + memcpy(r300->clip_state.state, state, sizeof(struct pipe_clip_state)); + r300->clip_state.size = 29; } else { draw_flush(r300->draw); draw_set_clip_state(r300->draw, state); + r300->clip_state.size = 2; } + + r300->clip_state.dirty = TRUE; } /* Create a new depth, stencil, and alpha state based on the CSO dsa state. @@ -271,9 +467,11 @@ static void r300_bind_dsa_state(struct pipe_context* pipe, void* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_screen* r300screen = r300_screen(pipe->screen); - r300->dsa_state = (struct r300_dsa_state*)state; - r300->dirty_state |= R300_NEW_DSA; + r300->dsa_state.state = state; + r300->dsa_state.size = r300screen->caps->is_r500 ? 8 : 6; + r300->dsa_state.dirty = TRUE; } /* Free DSA state. */ @@ -283,37 +481,11 @@ static void r300_delete_dsa_state(struct pipe_context* pipe, FREE(state); } -static void r300_set_scissor_regs(const struct pipe_scissor_state* state, - struct r300_scissor_regs *scissor, - boolean is_r500) -{ - if (is_r500) { - scissor->top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) | - ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT); - } else { - /* Offset of 1440 in non-R500 chipsets. */ - scissor->top_left = - ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | - ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); - scissor->bottom_right = - (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) | - (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT); - } - - scissor->empty_area = state->minx >= state->maxx || - state->miny >= state->maxy; -} - static void r300_set_framebuffer_state(struct pipe_context* pipe, const struct pipe_framebuffer_state* state) { struct r300_context* r300 = r300_context(pipe); - struct pipe_scissor_state scissor; if (r300->draw) { draw_flush(r300->draw); @@ -321,18 +493,12 @@ static void r300->framebuffer_state = *state; - scissor.minx = scissor.miny = 0; - scissor.maxx = state->width; - scissor.maxy = state->height; - r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer, - r300_screen(r300->context.screen)->caps->is_r500); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || !r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } r300->dirty_state |= R300_NEW_FRAMEBUFFERS; - r300->dirty_state |= R300_NEW_BLEND; + + r300->blend_state.dirty = TRUE; + r300->dsa_state.dirty = TRUE; + r300->scissor_state.dirty = TRUE; } /* Create fragment shader state. */ @@ -367,6 +533,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) r300->fs = fs; r300_pick_fragment_shader(r300); + if (r300->vs && r300_vertex_shader_setup_wpos(r300)) { + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS; } @@ -407,8 +577,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe, /* Copy rasterizer state for Draw. */ rs->rs = *state; - rs->enable_vte = !state->bypass_vs_clip_and_viewport; - #ifdef PIPE_ARCH_LITTLE_ENDIAN rs->vap_control_status = R300_VC_NO_SWAP; #else @@ -524,12 +692,23 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state) draw_set_rasterizer_state(r300->draw, &rs->rs); } - r300->rs_state = rs; + if (rs) { + r300->tcl_bypass = rs->rs.bypass_vs_clip_and_viewport; + } else { + r300->tcl_bypass = FALSE; + } + + r300->rs_state.state = rs; + r300->rs_state.dirty = TRUE; + /* XXX Why is this still needed, dammit!? */ + r300->scissor_state.dirty = TRUE; + r300->viewport_state.dirty = TRUE; + /* XXX Clean these up when we move to atom emits */ - r300->dirty_state |= R300_NEW_RASTERIZER; r300->dirty_state |= R300_NEW_RS_BLOCK; - r300->dirty_state |= R300_NEW_SCISSOR; - r300->dirty_state |= R300_NEW_VIEWPORT; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } /* Free rasterizer state. */ @@ -556,7 +735,8 @@ static void* sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter, state->mag_img_filter, - state->min_mip_filter); + state->min_mip_filter, + state->max_anisotropy > 1.0); /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */ /* We must pass these to the emit function to clamp them properly. */ @@ -664,49 +844,51 @@ static void r300_set_scissor_state(struct pipe_context* pipe, { struct r300_context* r300 = r300_context(pipe); - r300_set_scissor_regs(state, &r300->scissor_state->scissor, - r300_screen(r300->context.screen)->caps->is_r500); + memcpy(r300->scissor_state.state, state, + sizeof(struct pipe_scissor_state)); - /* Don't rely on the order of states being set for the first time. */ - if (!r300->rs_state || r300->rs_state->rs.scissor) { - r300->dirty_state |= R300_NEW_SCISSOR; - } + r300->scissor_state.dirty = TRUE; } static void r300_set_viewport_state(struct pipe_context* pipe, const struct pipe_viewport_state* state) { struct r300_context* r300 = r300_context(pipe); + struct r300_viewport_state* viewport = + (struct r300_viewport_state*)r300->viewport_state.state; /* Do the transform in HW. */ - r300->viewport_state->vte_control = R300_VTX_W0_FMT; + viewport->vte_control = R300_VTX_W0_FMT; if (state->scale[0] != 1.0f) { - r300->viewport_state->xscale = state->scale[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA; + viewport->xscale = state->scale[0]; + viewport->vte_control |= R300_VPORT_X_SCALE_ENA; } if (state->scale[1] != 1.0f) { - r300->viewport_state->yscale = state->scale[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA; + viewport->yscale = state->scale[1]; + viewport->vte_control |= R300_VPORT_Y_SCALE_ENA; } if (state->scale[2] != 1.0f) { - r300->viewport_state->zscale = state->scale[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA; + viewport->zscale = state->scale[2]; + viewport->vte_control |= R300_VPORT_Z_SCALE_ENA; } if (state->translate[0] != 0.0f) { - r300->viewport_state->xoffset = state->translate[0]; - r300->viewport_state->vte_control |= R300_VPORT_X_OFFSET_ENA; + viewport->xoffset = state->translate[0]; + viewport->vte_control |= R300_VPORT_X_OFFSET_ENA; } if (state->translate[1] != 0.0f) { - r300->viewport_state->yoffset = state->translate[1]; - r300->viewport_state->vte_control |= R300_VPORT_Y_OFFSET_ENA; + viewport->yoffset = state->translate[1]; + viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA; } if (state->translate[2] != 0.0f) { - r300->viewport_state->zoffset = state->translate[2]; - r300->viewport_state->vte_control |= R300_VPORT_Z_OFFSET_ENA; + viewport->zoffset = state->translate[2]; + viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA; } - r300->dirty_state |= R300_NEW_VIEWPORT; + r300->viewport_state.dirty = TRUE; + if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) { + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS; + } } static void r300_set_vertex_buffers(struct pipe_context* pipe, @@ -778,7 +960,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader) } r300->vs = vs; - r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS; + if (r300->fs) { + r300_vertex_shader_setup_wpos(r300); + } + + r300->dirty_state |= + R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS | + R300_NEW_VERTEX_FORMAT; } else { draw_flush(r300->draw); draw_bind_vertex_shader(r300->draw, diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 727ae7ade6..192846411b 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -139,10 +139,10 @@ static void r300_vertex_psc(struct r300_context* r300) /* If TCL is bypassed, map vertex streams to equivalent VS output * locations. */ - if (r300->rs_state->enable_vte) { - stream_tab = identity; - } else { + if (r300->tcl_bypass) { stream_tab = r300->vs->stream_loc_notcl; + } else { + stream_tab = identity; } /* Vertex shaders have no semantics on their inputs, @@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300, void (*rX00_rs_col_write)(struct r300_rs_block*, int, int); void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean); void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int); + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; if (r300_screen(r300->context.screen)->caps->is_r500) { rX00_rs_col = r500_rs_col; @@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300, /* Rasterize colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { /* Always rasterize if it's written by the VS, * otherwise it locks up. */ rX00_rs_col(rs, col_count, i, FALSE); @@ -410,6 +412,16 @@ static void r300_update_rs_block(struct r300_context* r300, } } + /* Rasterize WPOS. */ + /* If the FS doesn't need it, it's not written by the VS. */ + if (fs_inputs->wpos != ATTR_UNUSED) { + rX00_rs_tex(rs, tex_count, tex_count, FALSE); + rX00_rs_tex_write(rs, tex_count, fp_offset); + + fp_offset++; + tex_count++; + } + /* Rasterize at least one color, or bad things happen. */ if (col_count == 0 && tex_count == 0) { rX00_rs_col(rs, 0, 0, TRUE); @@ -496,7 +508,8 @@ static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa) static void r300_update_ztop(struct r300_context* r300) { - r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE; + struct r300_ztop_state* ztop_state = + (struct r300_ztop_state*)r300->ztop_state.state; /* This is important enough that I felt it warranted a comment. * @@ -518,31 +531,37 @@ static void r300_update_ztop(struct r300_context* r300) * 5) Depth writes in fragment shader * 6) Outstanding occlusion queries * + * This register causes stalls all the way from SC to CB when changed, + * but it is buffered on-chip so it does not hurt to write it if it has + * not changed. + * * ~C. */ /* ZS writes */ - if (r300_dsa_writes_depth_stencil(r300->dsa_state) && - (r300_dsa_alpha_test_enabled(r300->dsa_state) || /* (1) */ - r300->fs->info.uses_kill)) { /* (2) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; - } else if (r300->query_current) { /* (6) */ - r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE; + if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) && + (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||/* (1) */ + r300->fs->info.uses_kill)) { /* (2) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else if (r300->query_current) { /* (6) */ + ztop_state->z_buffer_top = R300_ZTOP_DISABLE; + } else { + ztop_state->z_buffer_top = R300_ZTOP_ENABLE; } + + r300->ztop_state.dirty = TRUE; } void r300_update_derived_state(struct r300_context* r300) { + /* XXX */ if (r300->dirty_state & (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER | - R300_NEW_VERTEX_FORMAT)) { + R300_NEW_VERTEX_FORMAT) || r300->rs_state.dirty) { r300_update_derived_shader_state(r300); } - if (r300->dirty_state & - (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) { - r300_update_ztop(r300); - } + r300_update_ztop(r300); } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index dbe42edd91..35be00e1b0 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -257,38 +257,37 @@ static INLINE uint32_t r300_translate_wrap(int wrap) } } -static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip) +static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip, + int is_anisotropic) { uint32_t retval = 0; - switch (min) { + if (is_anisotropic) + retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO; + else { + switch (min) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MIN_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MIN_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MIN_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", min); assert(0); break; - } - switch (mag) { + } + switch (mag) { case PIPE_TEX_FILTER_NEAREST: retval |= R300_TX_MAG_FILTER_NEAREST; break; case PIPE_TEX_FILTER_LINEAR: retval |= R300_TX_MAG_FILTER_LINEAR; break; - case PIPE_TEX_FILTER_ANISO: - retval |= R300_TX_MAG_FILTER_ANISO; - break; default: debug_printf("r300: Unknown texture filter %d\n", mag); assert(0); break; + } } switch (mip) { case PIPE_TEX_MIPFILTER_NONE: diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index bcd4c030f9..b0f309695c 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(20 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(16 + (caps->has_tcl ? 2: 0)); /*** Graphics Backend (GB) ***/ /* Various GB enables */ @@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); - OUT_CS_REG(R300_US_W_FMT, 0x0); /*** VAP ***/ /* Sign/normalize control */ @@ -117,10 +115,12 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); - if (caps->is_r500) { - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); - OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + + if (caps->family >= CHIP_FAMILY_RV350) { + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE); } + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 9a96206a4d..a9bbdd56d8 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -30,6 +30,18 @@ #include "r300_texture.h" #include "r300_screen.h" +#define TILE_WIDTH 0 +#define TILE_HEIGHT 1 + +static const unsigned microblock_table[5][3][2] = { + /*linear tiled square-tiled */ + {{32, 1}, {8, 4}, {0, 0}}, /* 8 bits per pixel */ + {{16, 1}, {8, 2}, {4, 4}}, /* 16 bits per pixel */ + {{ 8, 1}, {4, 2}, {0, 0}}, /* 32 bits per pixel */ + {{ 4, 1}, {0, 0}, {2, 2}}, /* 64 bits per pixel */ + {{ 2, 1}, {0, 0}, {0, 0}} /* 128 bits per pixel */ +}; + static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500) { struct r300_texture_state* state = &tex->state; @@ -92,33 +104,67 @@ unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level, } /** + * Return the width (dim==TILE_WIDTH) or height (dim==TILE_HEIGHT) of one tile + * of the given texture. + */ +static unsigned r300_texture_get_tile_size(struct r300_texture* tex, int dim) +{ + unsigned pixsize, tile_size; + + pixsize = util_format_get_blocksize(tex->tex.format); + tile_size = microblock_table[util_logbase2(pixsize)][tex->microtile][dim] * + (tex->macrotile == R300_BUFFER_TILED ? 8 : 1); + + assert(tile_size); + return tile_size; +} + +/** * Return the stride, in bytes, of the texture images of the given texture * at the given level. */ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level) { + unsigned tile_width, width; + if (tex->stride_override) return tex->stride_override; + /* Check the level. */ if (level > tex->tex.last_level) { debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level); return 0; } - return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32); + tile_width = r300_texture_get_tile_size(tex, TILE_WIDTH); + width = align(u_minify(tex->tex.width0, level), tile_width); + + /* Should already be aligned except for S3TC. */ + return align(util_format_get_stride(tex->tex.format, width), 32); +} + +static unsigned r300_texture_get_nblocksy(struct r300_texture* tex, + unsigned level) +{ + unsigned height, tile_height; + + tile_height = r300_texture_get_tile_size(tex, TILE_HEIGHT); + height = align(u_minify(tex->tex.height0, level), tile_height); + + return util_format_get_nblocksy(tex->tex.format, height); } static void r300_setup_miptree(struct r300_texture* tex) { struct pipe_texture* base = &tex->tex; - int stride, size, layer_size; - int i; + unsigned stride, size, layer_size, nblocksy, i; - for (i = 0; i <= base->last_level; i++) { - unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i)); + debug_printf("r300: Making miptree for texture, format %s\n", pf_name(base->format)); + for (i = 0; i <= base->last_level; i++) { stride = r300_texture_get_stride(tex, i); + nblocksy = r300_texture_get_nblocksy(tex, i); layer_size = stride * nblocksy; if (base->target == PIPE_TEXTURE_CUBE) @@ -132,9 +178,9 @@ static void r300_setup_miptree(struct r300_texture* tex) tex->pitch[i] = stride / util_format_get_blocksize(base->format); debug_printf("r300: Texture miptree: Level %d " - "(%dx%dx%d px, pitch %d bytes)\n", + "(%dx%dx%d px, pitch %d bytes) %d bytes total\n", i, u_minify(base->width0, i), u_minify(base->height0, i), - u_minify(base->depth0, i), stride); + u_minify(base->depth0, i), stride, tex->size); } } @@ -163,7 +209,7 @@ static struct pipe_texture* r300_setup_miptree(tex); r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500); - tex->buffer = screen->buffer_create(screen, 1024, + tex->buffer = screen->buffer_create(screen, 2048, PIPE_BUFFER_USAGE_PIXEL, tex->size); diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index c4ed0d712f..68aef70872 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -22,6 +22,7 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r300_vs.h" +#include "r300_fs.h" #include "r300_context.h" #include "r300_screen.h" @@ -33,6 +34,8 @@ #include "radeon_compiler.h" +#include "util/u_math.h" + /* Convert info about VS output semantics into r300_shader_semantics. */ static void r300_shader_read_vs_outputs( struct tgsi_shader_info* info, @@ -88,11 +91,13 @@ static void r300_shader_read_vs_outputs( } } -static void r300_shader_vap_output_fmt( - struct r300_shader_semantics* vs_outputs, - uint* hwfmt) +static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs) { + struct r300_shader_semantics* vs_outputs = &vs->outputs; + uint32_t* hwfmt = vs->hwfmt; int i, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Do the actual vertex_info setup. * @@ -119,13 +124,19 @@ static void r300_shader_vap_output_fmt( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { hwfmt[1] |= R300_INPUT_CNTL_COLOR; hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + hwfmt[1] |= R300_INPUT_CNTL_COLOR; + hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i); + } + } /* Texture coordinates. */ gen_count = 0; @@ -146,6 +157,9 @@ static void r300_shader_vap_output_fmt( /* XXX magic */ assert(gen_count <= 8); + + /* WPOS. */ + vs->wpos_tex_output = gen_count; } /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed @@ -155,6 +169,8 @@ static void r300_stream_locations_notcl( int* stream_loc) { int i, tabi = 0, gen_count; + boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED || + vs_outputs->bcolor[1] != ATTR_UNUSED; /* Position. */ stream_loc[tabi++] = 0; @@ -166,14 +182,14 @@ static void r300_stream_locations_notcl( /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->color[i] != ATTR_UNUSED) { + if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) { stream_loc[tabi++] = 2 + i; } } /* Back-face colors. */ - for (i = 0; i < ATTR_COLOR_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (any_bcolor_used) { + for (i = 0; i < ATTR_COLOR_COUNT; i++) { stream_loc[tabi++] = 4 + i; } } @@ -181,7 +197,7 @@ static void r300_stream_locations_notcl( /* Texture coordinates. */ gen_count = 0; for (i = 0; i < ATTR_GENERIC_COUNT; i++) { - if (vs_outputs->bcolor[i] != ATTR_UNUSED) { + if (vs_outputs->generic[i] != ATTR_UNUSED) { assert(tabi < 16); stream_loc[tabi++] = 6 + gen_count; gen_count++; @@ -195,8 +211,12 @@ static void r300_stream_locations_notcl( gen_count++; } - /* XXX magic */ - assert(gen_count <= 8); + /* WPOS. */ + if (vs_outputs->wpos != ATTR_UNUSED) { + assert(tabi < 16); + stream_loc[tabi++] = 6 + gen_count; + gen_count++; + } for (; tabi < 16;) { stream_loc[tabi++] = -1; @@ -209,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) struct r300_shader_semantics* outputs = &vs->outputs; struct tgsi_shader_info* info = &vs->info; int i, reg = 0; + boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED || + outputs->bcolor[1] != ATTR_UNUSED; /* Fill in the input mapping */ for (i = 0; i < info->num_inputs; i++) @@ -226,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) c->code->outputs[outputs->psize] = reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ + /* Colors. */ for (i = 0; i < ATTR_COLOR_COUNT; i++) { if (outputs->color[i] != ATTR_UNUSED) { c->code->outputs[outputs->color[i]] = reg++; + } else if (any_bcolor_used) { + reg++; } } - /* XXX Back-face colors. */ + /* Back-face colors. */ + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + if (outputs->bcolor[i] != ATTR_UNUSED) { + c->code->outputs[outputs->bcolor[i]] = reg++; + } else if (any_bcolor_used) { + reg++; + } + } /* Texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT; i++) { @@ -246,6 +284,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c) if (outputs->fog != ATTR_UNUSED) { c->code->outputs[outputs->fog] = reg++; } + + /* WPOS. */ + if (outputs->wpos != ATTR_UNUSED) { + c->code->outputs[outputs->wpos] = reg++; + } +} + +static void r300_insert_wpos(struct r300_vertex_program_compiler* c, + struct r300_shader_semantics* outputs) +{ + int i, lastOutput = 0; + + /* Find the max output index. */ + lastOutput = MAX2(lastOutput, outputs->psize); + for (i = 0; i < ATTR_COLOR_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->color[i]); + lastOutput = MAX2(lastOutput, outputs->bcolor[i]); + } + for (i = 0; i < ATTR_GENERIC_COUNT; i++) { + lastOutput = MAX2(lastOutput, outputs->generic[i]); + } + lastOutput = MAX2(lastOutput, outputs->fog); + + /* Set WPOS after the last output. */ + lastOutput++; + rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */ + outputs->wpos = lastOutput; } void r300_translate_vertex_shader(struct r300_context* r300, @@ -256,8 +321,6 @@ void r300_translate_vertex_shader(struct r300_context* r300, /* Initialize. */ r300_shader_read_vs_outputs(&vs->info, &vs->outputs); - r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt); - r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); /* Setup the compiler */ rc_init(&compiler.Base); @@ -277,9 +340,15 @@ void r300_translate_vertex_shader(struct r300_context* r300, r300_tgsi_to_rc(&ttr, vs->state.tokens); - compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs); + compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1)); compiler.SetHwInputOutput = &set_vertex_inputs_outputs; + /* Insert the WPOS output. */ + r300_insert_wpos(&compiler, &vs->outputs); + + r300_shader_vap_output_fmt(vs); + r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl); + /* Invoke the compiler */ r3xx_compile_vertex_program(&compiler); if (compiler.Base.Error) { @@ -292,3 +361,30 @@ void r300_translate_vertex_shader(struct r300_context* r300, rc_destroy(&compiler.Base); vs->translated = TRUE; } + +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300) +{ + struct r300_vertex_shader* vs = r300->vs; + int tex_output = r300->vs->wpos_tex_output; + uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output; + uint32_t* hwfmt = vs->hwfmt; + + if (r300->fs->inputs.wpos != ATTR_UNUSED) { + /* Enable WPOS in VAP. */ + if (!(hwfmt[1] & tex_fmt)) { + hwfmt[1] |= tex_fmt; + hwfmt[3] |= (4 << (3 * tex_output)); + + assert(tex_output < 8); + return TRUE; + } + } else { + /* Disable WPOS in VAP. */ + if (hwfmt[1] & tex_fmt) { + hwfmt[1] &= ~tex_fmt; + hwfmt[3] &= ~(4 << (3 * tex_output)); + return TRUE; + } + } + return FALSE; +} diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h index 67e9db5366..18cfeee3cd 100644 --- a/src/gallium/drivers/r300/r300_vs.h +++ b/src/gallium/drivers/r300/r300_vs.h @@ -43,6 +43,9 @@ struct r300_vertex_shader { /* Stream locations for SWTCL or if TCL is bypassed. */ int stream_loc_notcl[16]; + /* Output stream location for WPOS. */ + int wpos_tex_output; + /* Has this shader been translated yet? */ boolean translated; @@ -53,4 +56,7 @@ struct r300_vertex_shader { void r300_translate_vertex_shader(struct r300_context* r300, struct r300_vertex_shader* vs); +/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */ +boolean r300_vertex_shader_setup_wpos(struct r300_context* r300); + #endif /* R300_VS_H */ |