From b0828b0adc7438ef33f9393f839226ef7dfda0dc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Oct 2009 16:41:27 +0100 Subject: llvmpipe: calculate masks in format desired by shader Also remove branches calculating masks for quads. --- src/gallium/drivers/llvmpipe/lp_rast.c | 21 ++++---- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- src/gallium/drivers/llvmpipe/lp_rast_tri.c | 82 ++++++++++++----------------- 3 files changed, 44 insertions(+), 61 deletions(-) (limited to 'src/gallium/drivers') diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 2038403c8f..01f46dcab1 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -193,7 +193,12 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, const union lp_rast_cmd_arg arg ) { const struct lp_rast_shader_inputs *inputs = arg.shade_tile; - const unsigned masks[4] = {~0, ~0, ~0, ~0}; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; + unsigned x, y; RAST_DEBUG("%s\n", __FUNCTION__); @@ -202,23 +207,20 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast, */ for (y = 0; y < TILE_SIZE; y += 2) for (x = 0; x < TILE_SIZE; x += 8) - lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, masks); + lp_rast_shade_quads( rast, inputs, rast->x + x, rast->y + y, &masks[0][0]); } void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks) + const uint32_t *masks) { #if 1 const struct lp_rast_state *state = inputs->state; struct lp_rast_tile *tile = &rast->tile; void *color; void *depth; - uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS]; - unsigned chan_index; - unsigned q; unsigned ix, iy; /* Sanity checks */ @@ -228,11 +230,6 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, ix = x % TILE_SIZE; iy = y % TILE_SIZE; - /* mask */ - for (q = 0; q < 4; ++q) - for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index) - mask[q][chan_index] = masks[q] & (1 << chan_index) ? ~0 : 0; - /* color buffer */ color = &TILE_PIXEL(tile->color, ix, iy, 0); @@ -254,7 +251,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast, inputs->a0, inputs->dadx, inputs->dady, - &mask[0][0], + masks, color, depth); #else diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 11e8e78e79..f438faaf36 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -80,6 +80,6 @@ struct lp_rasterizer { void lp_rast_shade_quads( struct lp_rasterizer *rast, const struct lp_rast_shader_inputs *inputs, unsigned x, unsigned y, - const unsigned *masks); + const uint32_t *masks); #endif diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 17ebce4c85..5f22aca668 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -37,34 +37,26 @@ #define BLOCKSIZE 4 -/* Convert 8x8 block into four runs of quads and render each in turn. +/* Render a 4x4 unmasked block: */ -#if (BLOCKSIZE == 8) static void block_full( struct lp_rasterizer *rast, const struct lp_rast_triangle *tri, int x, int y ) { - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - int iy; + static const uint32_t ALIGN16_ATTRIB masks[4][4] = + { {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0}, + {~0, ~0, ~0, ~0} }; - for (iy = 0; iy < 8; iy += 2) - lp_rast_shade_quads(rast, &tri->inputs, x, y + iy, masks); + lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); } -#else -static void block_full( struct lp_rasterizer *rast, - const struct lp_rast_triangle *tri, - int x, int y ) -{ - const unsigned masks[4] = {~0, ~0, ~0, ~0}; - lp_rast_shade_quads(rast, &tri->inputs, x, y, masks); -} -#endif -static INLINE unsigned +static INLINE void do_quad( const struct lp_rast_triangle *tri, - int x, int y, - int c1, int c2, int c3 ) + int c1, int c2, int c3, + int32_t *mask ) { const int xstep1 = -tri->dy12 ; const int xstep2 = -tri->dy23 ; @@ -73,30 +65,22 @@ do_quad( const struct lp_rast_triangle *tri, const int ystep1 = tri->dx12 ; const int ystep2 = tri->dx23 ; const int ystep3 = tri->dx31 ; - - unsigned mask = 0; - - if (c1 > 0 && - c2 > 0 && - c3 > 0) - mask |= 1; - - if (c1 + xstep1 > 0 && - c2 + xstep2 > 0 && - c3 + xstep3 > 0) - mask |= 2; - - if (c1 + ystep1 > 0 && - c2 + ystep2 > 0 && - c3 + ystep3 > 0) - mask |= 4; - - if (c1 + ystep1 + xstep1 > 0 && - c2 + ystep2 + xstep2 > 0 && - c3 + ystep3 + xstep3 > 0) - mask |= 8; - - return mask; + + mask[0] = ~(((c1) | + (c2) | + (c3)) >> 31); + + mask[1] = ~(((c1 + xstep1) | + (c2 + xstep2) | + (c3 + xstep3)) >> 31); + + mask[2] = ~(((c1 + ystep1) | + (c2 + ystep2) | + (c3 + ystep3)) >> 31); + + mask[3] = ~(((c1 + ystep1 + xstep1) | + (c2 + ystep2 + xstep2) | + (c3 + ystep3 + xstep3)) >> 31); } /* Evaluate each pixel in a block, generate a mask and possibly render @@ -121,17 +105,17 @@ do_block( struct lp_rasterizer *rast, const int ystep3 = step * tri->dx31; int ix, iy; + uint32_t ALIGN16_ATTRIB mask[4][4]; - unsigned masks[2][2] = {{0, 0}, {0, 0}}; - for (iy = 0; iy < BLOCKSIZE; iy += 2) { + for (iy = 0; iy < 4; iy += 2) { int cx1 = c1; int cx2 = c2; int cx3 = c3; - for (ix = 0; ix < BLOCKSIZE; ix += 2) { + for (ix = 0; ix < 2; ix ++) { - masks[iy >> 1][ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3); + do_quad(tri, cx1, cx2, cx3, (int32_t *)mask[iy+ix]); cx1 += xstep1; cx2 += xstep2; @@ -143,8 +127,10 @@ do_block( struct lp_rasterizer *rast, c3 += ystep3; } - if(masks[0][0] || masks[0][1] || masks[1][0] || masks[1][1]) - lp_rast_shade_quads(rast, &tri->inputs, x, y, &masks[0][0]); + /* As we do trivial reject already, masks should rarely be all + * zero: + */ + lp_rast_shade_quads(rast, &tri->inputs, x, y, &mask[0][0] ); } -- cgit v1.2.3