summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Scheidegger <rscheidegger@gmx.ch>2005-02-10 22:36:06 +0000
committerRoland Scheidegger <rscheidegger@gmx.ch>2005-02-10 22:36:06 +0000
commit4837ea30208d002bc36a836d2117f826d40c8bfa (patch)
tree4db5a234a5af7d7f02a42ed824b85e938066828d
parent26d31591257d575362776972439f614948366dd1 (diff)
add texture micro and macro tiling to radeon/r200 driver. This can improve performance up to 15% in texture-intensive applications. Convert the driver to use the correct blit format and blit width instead of fixed blit format and blit width when uploading textures to make it work.
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_context.h3
-rw-r--r--src/mesa/drivers/dri/r200/r200_reg.h2
-rw-r--r--src/mesa/drivers/dri/r200/r200_texmem.c71
-rw-r--r--src/mesa/drivers/dri/r200/r200_texstate.c73
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_context.h5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texmem.c52
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c70
9 files changed, 209 insertions, 74 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
index 4eca4ad7e5..baaca08755 100644
--- a/src/mesa/drivers/dri/r200/r200_context.c
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -272,6 +272,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual,
else
rmesa->using_hyperz = GL_TRUE;
}
+
+ if ( sPriv->drmMinor >= 15 )
+ rmesa->texmicrotile = GL_TRUE;
/* Init default driver functions then plug in our R200-specific functions
* (the texture functions are especially important)
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
index cedf1b974f..7e0a46ae51 100644
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -167,6 +167,8 @@ struct r200_tex_obj {
GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
GLboolean border_fallback;
+
+ GLuint tile_bits; /* hw texture tile bits used on this texture */
};
@@ -931,6 +933,7 @@ struct r200_context {
driOptionCache optionCache;
GLboolean using_hyperz;
+ GLboolean texmicrotile;
};
#define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx))
diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
index c1132e54ab..2468c6cebf 100644
--- a/src/mesa/drivers/dri/r200/r200_reg.h
+++ b/src/mesa/drivers/dri/r200/r200_reg.h
@@ -968,6 +968,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0)
#define R200_TXO_ENDIAN_WORD_SWAP (2 << 0)
#define R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#define R200_TXO_MACRO_TILE (1 << 2)
+#define R200_TXO_MICRO_TILE (1 << 3)
#define R200_TXO_OFFSET_MASK 0xffffffe0
#define R200_TXO_OFFSET_SHIFT 5
#define R200_PP_CUBIC_OFFSET_F1_0 0x2d04
diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c
index 3f8e5d6e7f..7472afeedd 100644
--- a/src/mesa/drivers/dri/r200/r200_texmem.c
+++ b/src/mesa/drivers/dri/r200/r200_texmem.c
@@ -43,12 +43,10 @@ SOFTWARE.
#include "context.h"
#include "colormac.h"
#include "macros.h"
-#include "radeon_reg.h" /* gets definition for usleep */
#include "r200_context.h"
-#include "r200_state.h"
#include "r200_ioctl.h"
-#include "r200_swtcl.h"
#include "r200_tex.h"
+#include "radeon_reg.h"
#include <unistd.h> /* for usleep() */
@@ -253,12 +251,13 @@ static void r200UploadRectSubImage( r200ContextPtr rmesa,
/* Blit to framebuffer
*/
- r200EmitBlit( rmesa,
- blit_format,
- dstPitch, GET_START( &region ),
- dstPitch, t->bufAddr,
- 0, 0,
- 0, done,
+ r200EmitBlit( rmesa,
+ blit_format,
+ dstPitch, GET_START( &region ),
+ dstPitch | (t->tile_bits >> 16),
+ t->bufAddr,
+ 0, 0,
+ 0, done,
width, lines );
r200EmitWait( rmesa, RADEON_WAIT_2D );
@@ -339,7 +338,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
imageWidth = texImage->Width;
imageHeight = texImage->Height;
- offset = t->bufAddr;
+ offset = t->bufAddr + t->base.totalSize / 6 * face;
if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) {
GLint imageX = 0;
@@ -363,19 +362,47 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
* We used to use 1, 2 and 4-byte texels and used to use the texture
* width to dictate the blit width - but that won't work for compressed
* textures. (Brian)
+ * NOTE: can't do that with texture tiling. (sroland)
*/
tex.offset = offset;
- tex.pitch = BLIT_WIDTH_BYTES / 64;
- tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.image = &tmp;
+ /* copy (x,y,width,height,data) */
+ memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
+
if (texImage->TexFormat->TexelBytes) {
- tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+ /* use multi-byte upload scheme */
tex.height = imageHeight;
+ tex.width = imageWidth;
+ tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK;
+ tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+ tex.offset += tmp.x & ~1023;
+ tmp.x = tmp.x % 1024;
+ if (t->tile_bits & R200_TXO_MICRO_TILE) {
+ /* need something like "tiled coordinates" ? */
+ tmp.y = tmp.x / (tex.pitch * 128) * 2;
+ tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+ tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+ }
+ else {
+ tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+ }
+ if ((t->tile_bits & R200_TXO_MACRO_TILE) &&
+ (texImage->Width * texImage->TexFormat->TexelBytes >= 256) &&
+ ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) ||
+ (texImage->Height >= 16))) {
+ /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes,
+ OR if height is smaller than 8 automatically, but if micro tiling is active
+ the limit is height 16 instead ? */
+ tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+ }
}
else {
/* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
/* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
so the kernel module reads the right amount of data. */
+ tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.pitch = (BLIT_WIDTH_BYTES / 64);
tex.height = (imageHeight + 3) / 4;
tex.width = (imageWidth + 3) / 4;
switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) {
@@ -390,19 +417,7 @@ static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t,
fprintf(stderr, "unknown compressed tex format in uploadSubImage\n");
}
}
- tex.image = &tmp;
- /* copy (x,y,width,height,data) */
- memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) );
-
- /* Adjust the base offset to account for the Y-offset. This is done,
- * instead of just letting the Y-offset automatically take care of it,
- * because it is possible, for very large textures, for the Y-offset
- * to exceede the [-8192,+8191] range.
- */
- tex.offset += tmp.y * 1024;
- tmp.y = 0;
-
LOCK_HARDWARE( rmesa );
do {
ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE,
@@ -473,7 +488,11 @@ int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face )
t->bufAddr = rmesa->r200Screen->texOffset[heap]
+ t->base.memBlock->ofs;
t->pp_txoffset = t->bufAddr;
-
+
+ if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+ /* hope it's safe to add that here... */
+ t->pp_txoffset |= t->tile_bits;
+ }
/* Mark this texobj as dirty on all units:
*/
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
index 1e56c78f9b..3fba25d0b5 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -125,8 +125,8 @@ static void r200SetTexImages( r200ContextPtr rmesa,
{
r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData;
const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset;
- GLint i;
+ GLint curOffset, blitWidth;
+ GLint i, texelBytes;
GLint numLevels;
GLint log2Width, log2Height, log2Depth;
@@ -146,6 +146,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
return;
}
+ texelBytes = baseImage->TexFormat->TexelBytes;
/* Compute which mipmap levels we really want to send to the hardware.
*/
@@ -164,6 +165,28 @@ static void r200SetTexImages( r200ContextPtr rmesa,
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
*/
curOffset = 0;
+ blitWidth = BLIT_WIDTH_BYTES;
+ t->tile_bits = 0;
+
+ /* figure out if this texture is suitable for tiling. */
+ if (texelBytes) {
+ if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) &&
+ /* texrect might be able to use micro tiling too in theory? */
+ (baseImage->Height > 1)) {
+ /* allow 32 (bytes) x 1 mip (which will use two times the space
+ the non-tiled version would use) max if base texture is large enough */
+ if ((numLevels == 1) ||
+ (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+ (baseImage->Width * texelBytes > 64)) ||
+ ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+ t->tile_bits |= R200_TXO_MICRO_TILE;
+ }
+ }
+ if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) {
+ /* we can set macro tiling even for small textures, they will be untiled anyway */
+ t->tile_bits |= R200_TXO_MACRO_TILE;
+ }
+ }
for (i = 0; i < numLevels; i++) {
const struct gl_texture_image *texImage;
@@ -195,28 +218,41 @@ static void r200SetTexImages( r200ContextPtr rmesa,
else size = texImage->CompressedSize;
}
else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
- & ~63) * texImage->Height;
+ size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+ }
+ else if (t->tile_bits & R200_TXO_MICRO_TILE) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
else {
- int w = texImage->Width * texImage->TexFormat->TexelBytes;
- if (w < 32)
- w = 32;
- size = w * texImage->Height * texImage->Depth;
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
assert(size > 0);
-
/* Align to 32-byte offset. It is faster to do this unconditionally
* (no branch penalty).
*/
curOffset = (curOffset + 0x1f) & ~0x1f;
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
+ if (texelBytes) {
+ t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+ t->image[0][i].y = 0;
+ t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+ t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+ }
+ else {
+ t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+ t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+ t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+ t->image[0][i].height = size / t->image[0][i].width;
+ }
#if 0
/* for debugging only and only applicable to non-rectangle targets */
@@ -242,16 +278,13 @@ static void r200SetTexImages( r200ContextPtr rmesa,
/* Setup remaining cube face blits, if needed */
if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
- /* Round totalSize up to multiple of BLIT_WIDTH_BYTES */
- const GLuint faceSize = (t->base.totalSize + BLIT_WIDTH_BYTES - 1)
- & ~(BLIT_WIDTH_BYTES-1);
- const GLuint lines = faceSize / BLIT_WIDTH_BYTES;
+ const GLuint faceSize = t->base.totalSize;
GLuint face;
- /* reuse face 0 x/y/width/height - just adjust y */
+ /* reuse face 0 x/y/width/height - just update the offset when uploading */
for (face = 1; face < 6; face++) {
for (i = 0; i < numLevels; i++) {
t->image[face][i].x = t->image[0][i].x;
- t->image[face][i].y = t->image[0][i].y + face * lines;
+ t->image[face][i].y = t->image[0][i].y;
t->image[face][i].width = t->image[0][i].width;
t->image[face][i].height = t->image[0][i].height;
}
@@ -310,7 +343,7 @@ static void r200SetTexImages( r200ContextPtr rmesa,
if (baseImage->IsCompressed)
t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+ t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
t->pp_txpitch -= 32;
t->dirty_state = TEX_ALL;
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
index 4229d5cb5e..5d7e28cf89 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -255,6 +255,9 @@ radeonCreateContext( const __GLcontextModes *glVisual,
rmesa->using_hyperz = GL_TRUE;
}
+ if ( sPriv->drmMinor >= 15 )
+ rmesa->texmicrotile = GL_TRUE;
+
/* Init default driver functions then plug in our Radeon-specific functions
* (the texture functions are especially important)
*/
@@ -445,6 +448,7 @@ radeonCreateContext( const __GLcontextModes *glVisual,
}
(*rmesa->get_ust)( & rmesa->swap_ust );
+ if (rmesa->sarea->tiling_enabled != 0) fprintf(stderr, "color tiling enabled!\n");
#if DO_DEBUG
RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
index 53860c12b8..8d0637ca32 100644
--- a/src/mesa/drivers/dri/radeon/radeon_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -162,6 +162,8 @@ struct radeon_tex_obj {
GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */
GLboolean border_fallback;
+
+ GLuint tile_bits; /* hw texture tile bits used on this texture */
};
@@ -186,7 +188,7 @@ struct radeon_state_atom {
GLboolean dirty; /* dirty-mark in emit_state_list */
GLboolean (*check)( GLcontext * ); /* is this state active? */
};
-
+
/* Trying to keep these relatively short as the variables are becoming
@@ -781,6 +783,7 @@ struct radeon_context {
driOptionCache optionCache;
GLboolean using_hyperz;
+ GLboolean texmicrotile;
/* Performance counters
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c
index d910a6c15a..d492e190c1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texmem.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texmem.c
@@ -46,6 +46,8 @@ SOFTWARE.
#include "radeon_ioctl.h"
#include "radeon_tex.h"
+#include <unistd.h> /* for usleep() */
+
/**
* Destroy any device-dependent state associated with the texture. This may
@@ -151,12 +153,12 @@ static void radeonUploadRectSubImage( radeonContextPtr rmesa,
/* Blit to framebuffer
*/
- radeonEmitBlit( rmesa,
- blit_format,
- dstPitch, GET_START( &region ),
- dstPitch, t->bufAddr,
- 0, 0,
- 0, done,
+ radeonEmitBlit( rmesa,
+ blit_format,
+ dstPitch, GET_START( &region ),
+ dstPitch, t->bufAddr,
+ 0, 0,
+ 0, done,
width, lines );
radeonEmitWait( rmesa, RADEON_WAIT_2D );
@@ -248,19 +250,43 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
* We used to use 1, 2 and 4-byte texels and used to use the texture
* width to dictate the blit width - but that won't work for compressed
* textures. (Brian)
+ * NOTE: can't do that with texture tiling. (sroland)
*/
tex.offset = offset;
- tex.pitch = BLIT_WIDTH_BYTES / 64;
- tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.image = &tmp;
+ /* copy (x,y,width,height,data) */
+ memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
+
if (texImage->TexFormat->TexelBytes) {
- tex.width = imageWidth * texImage->TexFormat->TexelBytes; /* in bytes */
+ /* use multi-byte upload scheme */
tex.height = imageHeight;
+ tex.width = imageWidth;
+ tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK;
+ tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1);
+ tex.offset += tmp.x & ~1023;
+ tmp.x = tmp.x % 1024;
+ if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+ /* need something like "tiled coordinates" ? */
+ tmp.y = tmp.x / (tex.pitch * 128) * 2;
+ tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes;
+ tex.pitch |= RADEON_DST_TILE_MICRO >> 22;
+ }
+ else {
+ tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1);
+ }
+ if ((t->tile_bits & RADEON_TXO_MACRO_TILE) &&
+ (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) {
+ /* radeon switches off macro tiling for small textures/mipmaps it seems */
+ tex.pitch |= RADEON_DST_TILE_MACRO >> 22;
+ }
}
else {
/* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is
needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */
/* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed
so the kernel module reads the right amount of data. */
+ tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */
+ tex.pitch = (BLIT_WIDTH_BYTES / 64);
tex.height = (imageHeight + 3) / 4;
tex.width = (imageWidth + 3) / 4;
switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) {
@@ -273,10 +299,6 @@ static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t,
break;
}
}
- tex.image = &tmp;
-
- /* copy (x,y,width,height,data) */
- memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) );
LOCK_HARDWARE( rmesa );
do {
@@ -344,6 +366,10 @@ int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint fac
+ t->base.memBlock->ofs;
t->pp_txoffset = t->bufAddr;
+ if (!(t->base.tObj->Image[0][0]->IsClientData)) {
+ /* hope it's safe to add that here... */
+ t->pp_txoffset |= t->tile_bits;
+ }
/* Mark this texobj as dirty on all units:
*/
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 5e818da9fd..b96ad740d1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -127,8 +127,8 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
{
radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData;
const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
- GLint curOffset;
- GLint i;
+ GLint curOffset, blitWidth;
+ GLint i, texelBytes;
GLint numLevels;
GLint log2Width, log2Height, log2Depth;
@@ -148,6 +148,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
return;
}
+ texelBytes = baseImage->TexFormat->TexelBytes;
/* Compute which mipmap levels we really want to send to the hardware.
*/
@@ -166,6 +167,34 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
* memory organized as a rectangle of width BLIT_WIDTH_BYTES.
*/
curOffset = 0;
+ blitWidth = BLIT_WIDTH_BYTES;
+ t->tile_bits = 0;
+
+ /* figure out if this texture is suitable for tiling. */
+ if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) {
+ if (rmesa->texmicrotile && (baseImage->Height > 1)) {
+ /* allow 32 (bytes) x 1 mip (which will use two times the space
+ the non-tiled version would use) max if base texture is large enough */
+ if ((numLevels == 1) ||
+ (((baseImage->Width * texelBytes / baseImage->Height) <= 32) &&
+ (baseImage->Width * texelBytes > 64)) ||
+ ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) {
+ /* R100 has two microtile bits (only the txoffset reg, not the blitter)
+ weird: X2 + OPT: 32bit correct, 16bit completely hosed
+ X2: 32bit correct, 16bit correct
+ OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */
+ t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/;
+ }
+ }
+ if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) {
+ /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not
+ in the case if height is smaller than 16 (not 100% sure), as does the r200,
+ so need to disable macro tiling in that case */
+ if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) {
+ t->tile_bits |= RADEON_TXO_MACRO_TILE;
+ }
+ }
+ }
for (i = 0; i < numLevels; i++) {
const struct gl_texture_image *texImage;
@@ -197,28 +226,41 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
else size = texImage->CompressedSize;
}
else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
- size = ((texImage->Width * texImage->TexFormat->TexelBytes + 63)
- & ~63) * texImage->Height;
+ size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height;
+ }
+ else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) {
+ /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned,
+ though the actual offset may be different (if texture is less than
+ 32 bytes width) to the untiled case */
+ int w = (texImage->Width * texelBytes * 2 + 31) & ~31;
+ size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
else {
- int w = texImage->Width * texImage->TexFormat->TexelBytes;
- if (w < 32)
- w = 32;
- size = w * texImage->Height * texImage->Depth;
+ int w = (texImage->Width * texelBytes + 31) & ~31;
+ size = w * texImage->Height * texImage->Depth;
+ blitWidth = MAX2(texImage->Width, 64 / texelBytes);
}
assert(size > 0);
-
/* Align to 32-byte offset. It is faster to do this unconditionally
* (no branch penalty).
*/
curOffset = (curOffset + 0x1f) & ~0x1f;
- t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
- t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
- t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
- t->image[0][i].height = size / t->image[0][i].width;
+ if (texelBytes) {
+ t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */
+ t->image[0][i].y = 0;
+ t->image[0][i].width = MIN2(size / texelBytes, blitWidth);
+ t->image[0][i].height = (size / texelBytes) / t->image[0][i].width;
+ }
+ else {
+ t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES;
+ t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES;
+ t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES);
+ t->image[0][i].height = size / t->image[0][i].width;
+ }
#if 0
/* for debugging only and only applicable to non-rectangle targets */
@@ -263,7 +305,7 @@ static void radeonSetTexImages( radeonContextPtr rmesa,
if (baseImage->IsCompressed)
t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63);
else
- t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * baseImage->TexFormat->TexelBytes) + 63) & ~(63);
+ t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63);
t->pp_txpitch -= 32;
t->dirty_state = TEX_ALL;