diff options
Diffstat (limited to 'src/gallium')
-rw-r--r-- | src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 6 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 10 | ||||
-rwxr-xr-x | src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 65 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_gen_mipmap.c | 20 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_tex_sample.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/softpipe/sp_tile_cache.c | 12 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_defines.h | 2 | ||||
-rw-r--r-- | src/gallium/winsys/xlib/xm_winsys.c | 260 |
8 files changed, 204 insertions, 185 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 5d5125f7cb..826b432f09 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -88,6 +88,10 @@ #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define FOR_EACH_CHANNEL(CHAN)\ @@ -262,6 +266,8 @@ tgsi_exec_machine_init( mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; } } diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 92e2e5e985..19bd78df3d 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -133,9 +133,15 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_PRIMITIVE_I 34 #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -#define TGSI_EXEC_TEMP_R0 35 +#define TGSI_EXEC_TEMP_THREE_I 34 +#define TGSI_EXEC_TEMP_THREE_C 3 -#define TGSI_EXEC_NUM_TEMPS (32 + 4) +#define TGSI_EXEC_TEMP_HALF_I 35 +#define TGSI_EXEC_TEMP_HALF_C 0 + +#define TGSI_EXEC_TEMP_R0 36 + +#define TGSI_EXEC_NUM_TEMPS (32 + 5) #define TGSI_EXEC_NUM_ADDRS 1 #define TGSI_EXEC_NUM_IMMEDIATES 256 diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 2fd76a3072..8018bd7fa4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,7 +36,11 @@ #ifdef PIPE_ARCH_X86 -#define HIGH_PRECISION 1 /* for 1/sqrt() */ +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 #define FOR_EACH_CHANNEL( CHAN )\ @@ -757,20 +761,6 @@ emit_rcp ( make_xmm( xmm_src ) ); } -#if HIGH_PRECISION -static void XSTDCALL -rsqrt4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = 1.0F / sqrtf( store[X + 0] ); - store[X + 1] = 1.0F / sqrtf( store[X + 1] ); - store[X + 2] = 1.0F / sqrtf( store[X + 2] ); - store[X + 3] = 1.0F / sqrtf( store[X + 3] ); -} -#endif - static void emit_rsqrt( struct x86_function *func, @@ -778,13 +768,6 @@ emit_rsqrt( unsigned xmm_src ) { #if HIGH_PRECISION -#if 1 - emit_func_call_dst_src( - func, - xmm_dst, - xmm_src, - rsqrt4f ); -#else /* Although rsqrtps() and rcpps() are low precision on some/all SSE * implementations, it is possible to improve its precision at * fairly low cost, using a newton/raphson step, as below: @@ -794,21 +777,25 @@ emit_rsqrt( * * See: http://softwarecommunity.intel.com/articles/eng/1818.htm */ - /* This is some code that woudl do the above for a scalar 'a'. We - * obviously are interested in a vector version: - * - * movss xmm3, a; - * movss xmm1, half; - * movss xmm2, three; - * rsqrtss xmm0, xmm3; - * mulss xmm3, xmm0; - * mulss xmm1, xmm0; - * mulss xmm3, xmm0; - * subss xmm2, xmm3; - * mulss xmm1, xmm2; - * movss x, xmm1; - */ -#endif + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } #else /* On Intel CPUs at least, this is only accurate to 12 bits -- not * good enough. @@ -1295,9 +1282,9 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 0, 0 ); + emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 1, 0, chan_index ); } break; diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 056ae829ae..7d71aefda9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -783,23 +783,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { void *buf; - ctx->vertices[0][0][0] = -0.5f; /*x*/ - ctx->vertices[0][0][1] = -0.5f; /*y*/ + ctx->vertices[0][0][0] = 0.0f; /*x*/ + ctx->vertices[0][0][1] = 0.0f; /*y*/ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ - ctx->vertices[1][0][0] = width - 0.5f; /*x*/ - ctx->vertices[1][0][1] = -0.5f; /*y*/ - ctx->vertices[1][1][0] = 1.0f; /*s*/ - ctx->vertices[1][1][1] = 0.0f; /*t*/ + ctx->vertices[1][0][0] = width; + ctx->vertices[1][0][1] = 0.0f; + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = 0.0f; - ctx->vertices[2][0][0] = width - 0.5f; - ctx->vertices[2][0][1] = height - 0.5f; + ctx->vertices[2][0][0] = width; + ctx->vertices[2][0][1] = height; ctx->vertices[2][1][0] = 1.0f; ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[3][0][0] = -0.5f; - ctx->vertices[3][0][1] = height - 0.5f; + ctx->vertices[3][0][0] = 0.0f; + ctx->vertices[3][0][1] = height; ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 5b63f97997..be0b57d9fa 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1051,5 +1051,19 @@ sp_get_samples(struct tgsi_sampler *sampler, default: assert(0); } + +#if 0 /* DEBUG */ + { + int i; + printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); + for (i = 0; i < 4; i++) { + printf("Frag %d: %f %f %f %f\n", i, + rgba[0][i], + rgba[1][i], + rgba[2][i], + rgba[3][i]); + } + } +#endif } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 142faf5074..28c29da87c 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -133,7 +133,7 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc) uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { - //assert(tc->entries[pos].x < 0); + /*assert(tc->entries[pos].x < 0);*/ } if (tc->surface) { pipe_surface_reference(&tc->surface, NULL); @@ -338,8 +338,8 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, for (x = 0; x < w; x += TILE_SIZE) { if (is_clear_flag_set(tc->clear_flags, x, y)) { pipe_put_tile_raw(pipe, ps, - x, y, TILE_SIZE, TILE_SIZE, - tc->tile.data.color32, 0/*STRIDE*/); + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); /* do this? */ clear_clear_flag(tc->clear_flags, x, y); @@ -373,8 +373,8 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, if (tile->x >= 0) { if (tc->depth_stencil) { pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pipe, ps, @@ -391,7 +391,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, #endif } else if (tc->texture) { - /* caching a texture, mark all entries as embpy */ + /* caching a texture, mark all entries as empty */ for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].x = -1; } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 2e77fb42a8..83330ef22f 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -274,7 +274,7 @@ enum pipe_texture_target { #define PIPE_CAP_MAX_POINT_WIDTH_AA 17 #define PIPE_CAP_MAX_TEXTURE_ANISOTROPY 18 #define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19 -#define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20 + #ifdef __cplusplus } diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8ca90ef4c6..a70752428a 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -58,6 +58,7 @@ /** + * Subclass of pipe_buffer for Xlib winsys. * Low-level OS/window system memory buffer */ struct xm_buffer @@ -74,12 +75,10 @@ struct xm_buffer #endif }; -#if defined(USE_XSHM) && !defined(XFree86Server) -# define XSHM_ENABLED(b) ((b)->shm) -#else -# define XSHM_ENABLED(b) 0 -#endif +/** + * Subclass of pipe_surface for Xlib winsys + */ struct xmesa_surface { struct pipe_surface surface; @@ -89,6 +88,9 @@ struct xmesa_surface }; +/** + * Subclass of pipe_winsys for Xlib winsys + */ struct xmesa_pipe_winsys { struct pipe_winsys base; @@ -97,23 +99,16 @@ struct xmesa_pipe_winsys }; -static void alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, - unsigned width, unsigned height); /** Cast wrapper */ static INLINE struct xmesa_surface * xmesa_surface(struct pipe_surface *ps) { -// assert(0); return (struct xmesa_surface *) ps; } - -/** - * Turn the softpipe opaque buffer pointer into a dri_bufmgr opaque - * buffer pointer... - */ +/** Cast wrapper */ static INLINE struct xm_buffer * xm_buffer( struct pipe_buffer *buf ) { @@ -121,6 +116,130 @@ xm_buffer( struct pipe_buffer *buf ) } +/** + * X Shared Memory Image extension code + */ +#if defined(USE_XSHM) && !defined(XFree86Server) + +#define XSHM_ENABLED(b) ((b)->shm) + +static volatile int mesaXErrorFlag = 0; + +/** + * Catches potential Xlib errors. + */ +static int +mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event) +{ + (void) dpy; + (void) event; + mesaXErrorFlag = 1; + return 0; +} + + +static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size) +{ + XShmSegmentInfo *const shminfo = & buf->shminfo; + + shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777); + if (shminfo->shmid < 0) { + return GL_FALSE; + } + + shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0); + if (shminfo->shmaddr == (char *) -1) { + shmctl(shminfo->shmid, IPC_RMID, 0); + return GL_FALSE; + } + + shminfo->readOnly = False; + return GL_TRUE; +} + + +/** + * Allocate a shared memory XImage back buffer for the given XMesaBuffer. + */ +static void +alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, + unsigned width, unsigned height) +{ + /* + * We have to do a _lot_ of error checking here to be sure we can + * really use the XSHM extension. It seems different servers trigger + * errors at different points if the extension won't work. Therefore + * we have to be very careful... + */ +#if 0 + GC gc; +#endif + int (*old_handler)(XMesaDisplay *, XErrorEvent *); + + b->tempImage = XShmCreateImage(xmb->xm_visual->display, + xmb->xm_visual->visinfo->visual, + xmb->xm_visual->visinfo->depth, + ZPixmap, + NULL, + &b->shminfo, + width, height); + if (b->tempImage == NULL) { + b->shm = 0; + return; + } + + + mesaXErrorFlag = 0; + old_handler = XSetErrorHandler(mesaHandleXError); + /* This may trigger the X protocol error we're ready to catch: */ + XShmAttach(xmb->xm_visual->display, &b->shminfo); + XSync(xmb->xm_visual->display, False); + + if (mesaXErrorFlag) { + /* we are on a remote display, this error is normal, don't print it */ + XFlush(xmb->xm_visual->display); + mesaXErrorFlag = 0; + XDestroyImage(b->tempImage); + b->tempImage = NULL; + b->shm = 0; + (void) XSetErrorHandler(old_handler); + return; + } + + + /* Finally, try an XShmPutImage to be really sure the extension works */ +#if 0 + gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL); + XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc, + b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False); + XSync(xmb->xm_visual->display, False); + XFreeGC(xmb->xm_visual->display, gc); + (void) XSetErrorHandler(old_handler); + if (mesaXErrorFlag) { + XFlush(xmb->xm_visual->display); + mesaXErrorFlag = 0; + XDestroyImage(b->tempImage); + b->tempImage = NULL; + b->shm = 0; + return; + } +#endif +} + +#else + +#define XSHM_ENABLED(b) 0 + +static void +alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, + unsigned width, unsigned height) +{ + b->shm = 0; +} +#endif /* USE_XSHM */ + + + /* Most callbacks map direcly onto dri_bufmgr operations: */ @@ -297,119 +416,6 @@ xm_get_name(struct pipe_winsys *pws) } -#if defined(USE_XSHM) && !defined(XFree86Server) -static volatile int mesaXErrorFlag = 0; - -/** - * Catches potential Xlib errors. - */ -static int -mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event) -{ - (void) dpy; - (void) event; - mesaXErrorFlag = 1; - return 0; -} - - -static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size) -{ - XShmSegmentInfo *const shminfo = & buf->shminfo; - - shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777); - if (shminfo->shmid < 0) { - return GL_FALSE; - } - - shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0); - if (shminfo->shmaddr == (char *) -1) { - shmctl(shminfo->shmid, IPC_RMID, 0); - return GL_FALSE; - } - - shminfo->readOnly = False; - return GL_TRUE; -} - - -/** - * Allocate a shared memory XImage back buffer for the given XMesaBuffer. - */ -static void -alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, - unsigned width, unsigned height) -{ - /* - * We have to do a _lot_ of error checking here to be sure we can - * really use the XSHM extension. It seems different servers trigger - * errors at different points if the extension won't work. Therefore - * we have to be very careful... - */ -#if 0 - GC gc; -#endif - int (*old_handler)(XMesaDisplay *, XErrorEvent *); - - b->tempImage = XShmCreateImage(xmb->xm_visual->display, - xmb->xm_visual->visinfo->visual, - xmb->xm_visual->visinfo->depth, - ZPixmap, - NULL, - &b->shminfo, - width, height); - if (b->tempImage == NULL) { - b->shm = 0; - return; - } - - - mesaXErrorFlag = 0; - old_handler = XSetErrorHandler(mesaHandleXError); - /* This may trigger the X protocol error we're ready to catch: */ - XShmAttach(xmb->xm_visual->display, &b->shminfo); - XSync(xmb->xm_visual->display, False); - - if (mesaXErrorFlag) { - /* we are on a remote display, this error is normal, don't print it */ - XFlush(xmb->xm_visual->display); - mesaXErrorFlag = 0; - XDestroyImage(b->tempImage); - b->tempImage = NULL; - b->shm = 0; - (void) XSetErrorHandler(old_handler); - return; - } - - - /* Finally, try an XShmPutImage to be really sure the extension works */ -#if 0 - gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL); - XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc, - b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False); - XSync(xmb->xm_visual->display, False); - XFreeGC(xmb->xm_visual->display, gc); - (void) XSetErrorHandler(old_handler); - if (mesaXErrorFlag) { - XFlush(xmb->xm_visual->display); - mesaXErrorFlag = 0; - XDestroyImage(b->tempImage); - b->tempImage = NULL; - b->shm = 0; - return; - } -#endif -} -#else -static void -alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, - unsigned width, unsigned height) -{ - b->shm = 0; -} -#endif - - static struct pipe_buffer * xm_buffer_create(struct pipe_winsys *pws, unsigned alignment, @@ -602,7 +608,7 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) } -struct pipe_winsys * +static struct pipe_winsys * xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) { static struct xmesa_pipe_winsys *ws = NULL; |