diff options
Diffstat (limited to 'src/mesa/drivers/dri/radeon/radeon_span.c')
-rw-r--r-- | src/mesa/drivers/dri/radeon/radeon_span.c | 589 |
1 files changed, 464 insertions, 125 deletions
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 12051ff1c8..4e100d854e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -43,46 +43,222 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "swrast/swrast.h" -#include "radeon_context.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" +#include "radeon_common.h" +#include "radeon_lock.h" #include "radeon_span.h" -#include "radeon_tex.h" - -#include "drirenderbuffer.h" #define DBG 0 +static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + + +/* r200 depth buffer is always tiled - this is the formula + according to the docs unless I typo'ed in it +*/ +#if defined(RADEON_COMMON_FOR_R200) +static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 3) & 0x1) << 8; + offset += ((x >> 4) & 0x3) << 6; + offset += ((x >> 2) & 0x1) << 5; + offset += ((y >> 1) & 0x1) << 4; + offset += ((x >> 1) & 0x1) << 3; + offset += (y & 0x1) << 2; + offset += (x & 0x1) << 1; + } + return &ptr[offset]; +} + +static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + GLint offset; + if (rrb->has_surface) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + GLuint b; + offset = 0; + b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); + offset += (b >> 1) << 12; + offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; + offset += ((y >> 2) & 0x3) << 9; + offset += ((x >> 2) & 0x1) << 8; + offset += ((x >> 3) & 0x3) << 6; + offset += ((y >> 1) & 0x1) << 5; + offset += ((x >> 1) & 0x1) << 4; + offset += (y & 0x1) << 3; + offset += (x & 0x1) << 2; + } + return &ptr[offset]; +} +#endif + +/* radeon tiling on r300-r500 has 4 states, + macro-linear/micro-linear + macro-linear/micro-tiled + macro-tiled /micro-linear + macro-tiled /micro-tiled + 1 byte surface + 2 byte surface - two types - we only provide 8x2 microtiling + 4 byte surface + 8/16 byte (unused) +*/ +static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = 0; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { + offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11; + offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10; + offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9; + offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8; + offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7; + offset += ((y >> 1) & 0x1) << 6; + offset += ((x >> 2) & 0x1) << 5; + offset += (y & 1) << 4; + offset += (x & 3) << 2; + } else { + offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11; + offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10; + offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9; + offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8; + offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7; + offset += (y & 1) << 6; + offset += (x & 15) << 2; + } + } else { + offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5; + offset += (y & 1) << 4; + offset += (x & 3) << 2; + } + } + return &ptr[offset]; +} + +static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = 0; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { + offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11; + offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10; + offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9; + offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8; + offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7; + offset += ((y >> 1) & 0x1) << 6; + offset += ((x >> 3) & 0x1) << 5; + offset += (y & 1) << 4; + offset += (x & 3) << 2; + } else { + offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11; + offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10; + offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9; + offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8; + offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7; + offset += (y & 1) << 6; + offset += ((x >> 4) & 0x1) << 5; + offset += (x & 15) << 2; + } + } else { + offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5; + offset += (y & 0x1) << 4; + offset += (x & 0x7) << 1; + } + } + return &ptr[offset]; +} + +#ifndef COMPILE_R300 +static uint32_t +z24s8_to_s8z24(uint32_t val) +{ + return (val << 24) | (val >> 8); +} + +static uint32_t +s8z24_to_z24s8(uint32_t val) +{ + return (val >> 24) | (val << 8); +} +#endif + /* * Note that all information needed to access pixels in a renderbuffer * should be obtained through the gl_renderbuffer parameter, not per-context * information. */ #define LOCAL_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLubyte *buf = (GLubyte *) drb->flippedData \ - + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ - GLuint p; \ - (void) p; + struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ + GLuint p; \ + (void)p; \ + radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); #define LOCAL_DEPTH_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLuint xo = dPriv->x; \ - GLuint yo = dPriv->y; \ - GLubyte *buf = (GLubyte *) drb->Base.Data; + struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ + radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS -#define Y_FLIP(Y) (bottom - (Y)) +#define Y_FLIP(_y) ((_y) * yScale + yBias) #define HW_LOCK() #define HW_UNLOCK() +/* XXX FBO: this is identical to the macro in spantmp2.h except we get + * the cliprect info from the context, not the driDrawable. + * Move this into spantmp2.h someday. + */ +#define HW_CLIPLOOP() \ + do { \ + int _nc = num_cliprects; \ + while ( _nc-- ) { \ + int minx = cliprects[_nc].x1 - x_off; \ + int miny = cliprects[_nc].y1 - y_off; \ + int maxx = cliprects[_nc].x2 - x_off; \ + int maxy = cliprects[_nc].y2 - y_off; + /* ================================================================ * Color buffer */ @@ -94,7 +270,41 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#include "spantmp2.h" + +/* 16 bit, ARGB1555 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV + +#define TAG(x) radeon##x##_ARGB1555 +#define TAG2(x,y) radeon##x##_ARGB1555##y +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#include "spantmp2.h" + +/* 16 bit, RGBA4 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV + +#define TAG(x) radeon##x##_ARGB4444 +#define TAG2(x,y) radeon##x##_ARGB4444##y +#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) +#include "spantmp2.h" + +/* 32 bit, xRGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) radeon##x##_xRGB8888 +#define TAG2(x,y) radeon##x##_xRGB8888##y +#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -104,7 +314,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) #include "spantmp2.h" /* ================================================================ @@ -121,106 +335,127 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * too... */ -static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 4 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0..1] = 0 */ - -#ifdef COMPILE_R300 - ba = (y / 8) * (pitch / 8) + (x / 8); -#else - ba = (y / 16) * (pitch / 16) + (x / 16); -#endif - - address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ - address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ - address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - -static INLINE GLuint -radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 2 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0] = 0 */ - - ba = (y / 16) * (pitch / 32) + (x / 32); - - address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ - address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ - address |= (x & 0x8) << 4; /* a[7] = x[3] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - /* 16-bit depth buffer functions */ #define VALUE_TYPE GLushort +#if defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ + *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d +#else #define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; + *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d +#endif +#if defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); + d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) +#else +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) +#endif #define TAG(x) radeon##x##_z16 #include "depthtmp.h" -/* 24 bit depth, 8 bit stencil depthbuffer functions +/* 24 bit depth * * Careful: It looks like the R300 uses ZZZS byte order while the R200 * uses SZZZ for 24 bit depth, 8 bit stencil mode. */ #define VALUE_TYPE GLuint -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ tmp &= 0x000000ff; \ tmp |= ((d << 8) & 0xffffff00); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ +} while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + tmp &= 0xff000000; \ + tmp |= ((d) & 0x00ffffff); \ + *_ptr = tmp; \ } while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ tmp &= 0xff000000; \ tmp |= ((d) & 0x00ffffff); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #endif -#ifdef COMPILE_R300 +#if defined(COMPILE_R300) #define READ_DEPTH( d, _x, _y ) \ - do { \ - d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0xffffff00) >> 8; \ + do { \ + d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ + }while(0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ }while(0) #else +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; +#endif + +#define TAG(x) radeon##x##_z24 +#include "depthtmp.h" + +/* 24 bit depth, 8 bit stencil depthbuffer functions + * EXT_depth_stencil + * + * Careful: It looks like the R300 uses ZZZS byte order while the R200 + * uses SZZZ for 24 bit depth, 8 bit stencil mode. + */ +#define VALUE_TYPE GLuint + +#if defined(COMPILE_R300) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) +#else +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) +#endif + +#if defined(COMPILE_R300) +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ + }while(0) +#elif defined(RADEON_COMMON_FOR_R200) #define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0x00ffffff; + do { \ + d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ + }while(0) +#else +#define READ_DEPTH( d, _x, _y ) do { \ + d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ + } while (0) #endif #define TAG(x) radeon##x##_z24_s8 @@ -235,35 +470,51 @@ do { \ #ifdef COMPILE_R300 #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ tmp &= 0xffffff00; \ tmp |= (d) & 0xff; \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ +} while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define WRITE_STENCIL( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ + tmp &= 0x00ffffff; \ + tmp |= (((d) & 0xff) << 24); \ + *_ptr = tmp; \ } while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ tmp &= 0x00ffffff; \ tmp |= (((d) & 0xff) << 24); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #endif #ifdef COMPILE_R300 #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) +#elif defined(RADEON_COMMON_FOR_R200) +#define READ_STENCIL( d, _x, _y ) \ +do { \ + GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ + d = (tmp & 0xff000000) >> 24; \ +} while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ d = (tmp & 0xff000000) >> 24; \ } while (0) #endif @@ -271,29 +522,110 @@ do { \ #define TAG(x) radeon##x##_z24_s8 #include "stenciltmp.h" -/* Move locking out to get reasonable span performance (10x better - * than doing this in HW_LOCK above). WaitForIdle() is the main - * culprit. - */ + +static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) +{ + struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); + int r; + + if (rrb == NULL || !rrb->bo) + return; + + if (flag) { + if (rrb->bo->bom->funcs->bo_wait) + radeon_bo_wait(rrb->bo); + r = radeon_bo_map(rrb->bo, 1); + if (r) { + fprintf(stderr, "(%s) error(%d) mapping buffer.\n", + __FUNCTION__, r); + } + + radeonSetSpanFunctions(rrb); + } else { + radeon_bo_unmap(rrb->bo); + rb->GetRow = NULL; + rb->PutRow = NULL; + } +} + +static void +radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) +{ + GLuint i, j; + + /* color draw buffers */ + for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) + map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); + + /* check for render to textures */ + for (i = 0; i < BUFFER_COUNT; i++) { + struct gl_renderbuffer_attachment *att = + ctx->DrawBuffer->Attachment + i; + struct gl_texture_object *tex = att->Texture; + if (tex) { + /* Render to texture. Note that a mipmapped texture need not + * be complete for render to texture, so we must restrict to + * mapping only the attached image. + */ + radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]); + ASSERT(att->Renderbuffer); + + if (map) + radeon_teximage_map(image, GL_TRUE); + else + radeon_teximage_unmap(image); + } + } + + map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); + + /* depth buffer (Note wrapper!) */ + if (ctx->DrawBuffer->_DepthBuffer) + map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); + + if (ctx->DrawBuffer->_StencilBuffer) + map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); +} static void radeonSpanRenderStart(GLcontext * ctx) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); -#ifdef COMPILE_R300 - r300ContextPtr r300 = (r300ContextPtr) rmesa; - R300_FIREVERTICES(r300); -#else - RADEON_FIREVERTICES(rmesa); -#endif - LOCK_HARDWARE(rmesa); - radeonWaitForIdleLocked(rmesa); + int i; + + radeon_firevertices(rmesa); + + /* The locking and wait for idle should really only be needed in classic mode. + * In a future memory manager based implementation, this should become + * unnecessary due to the fact that mapping our buffers, textures, etc. + * should implicitly wait for any previous rendering commands that must + * be waited on. */ + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); + } + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); + } + + radeon_map_unmap_buffers(ctx, 1); } static void radeonSpanRenderFinish(GLcontext * ctx) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int i; _swrast_flush(ctx); - UNLOCK_HARDWARE(rmesa); + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + UNLOCK_HARDWARE(rmesa); + } + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); + } + + radeon_map_unmap_buffers(ctx, 0); } void radeonInitSpanFuncs(GLcontext * ctx) @@ -307,20 +639,27 @@ void radeonInitSpanFuncs(GLcontext * ctx) /** * Plug in the Get/Put routines for the given driRenderbuffer. */ -void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) { - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 - && vis->blueBits == 5) { - radeonInitPointers_RGB565(&drb->Base); - } else { - radeonInitPointers_ARGB8888(&drb->Base); - } - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - radeonInitDepthPointers_z16(&drb->Base); - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - radeonInitDepthPointers_z24_s8(&drb->Base); - } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - radeonInitStencilPointers_z24_s8(&drb->Base); + if (rrb->base._ActualFormat == GL_RGB5) { + radeonInitPointers_RGB565(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGB8) { + radeonInitPointers_xRGB8888(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGBA8) { + radeonInitPointers_ARGB8888(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGBA4) { + radeonInitPointers_ARGB4444(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGB5_A1) { + radeonInitPointers_ARGB1555(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) { + radeonInitDepthPointers_z16(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) { + radeonInitDepthPointers_z24(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + radeonInitDepthPointers_z24_s8(&rrb->base); + } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) { + radeonInitStencilPointers_z24_s8(&rrb->base); + } else { + fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat); } } |