summaryrefslogtreecommitdiff
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.c6
-rw-r--r--src/gallium/auxiliary/tgsi/exec/tgsi_exec.h10
-rwxr-xr-xsrc/gallium/auxiliary/tgsi/exec/tgsi_sse2.c65
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c20
-rw-r--r--src/gallium/drivers/softpipe/sp_tex_sample.c14
-rw-r--r--src/gallium/drivers/softpipe/sp_tile_cache.c12
-rw-r--r--src/gallium/include/pipe/p_defines.h2
-rw-r--r--src/gallium/winsys/xlib/xm_winsys.c260
8 files changed, 204 insertions, 185 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
index 5d5125f7cb..826b432f09 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c
@@ -88,6 +88,10 @@
#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I
+#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C
+#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I
+#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C
#define TEMP_R0 TGSI_EXEC_TEMP_R0
#define FOR_EACH_CHANNEL(CHAN)\
@@ -262,6 +266,8 @@ tgsi_exec_machine_init(
mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+ mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f;
+ mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f;
}
}
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
index 92e2e5e985..19bd78df3d 100644
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h
@@ -133,9 +133,15 @@ struct tgsi_exec_labels
#define TGSI_EXEC_TEMP_PRIMITIVE_I 34
#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-#define TGSI_EXEC_TEMP_R0 35
+#define TGSI_EXEC_TEMP_THREE_I 34
+#define TGSI_EXEC_TEMP_THREE_C 3
-#define TGSI_EXEC_NUM_TEMPS (32 + 4)
+#define TGSI_EXEC_TEMP_HALF_I 35
+#define TGSI_EXEC_TEMP_HALF_C 0
+
+#define TGSI_EXEC_TEMP_R0 36
+
+#define TGSI_EXEC_NUM_TEMPS (32 + 5)
#define TGSI_EXEC_NUM_ADDRS 1
#define TGSI_EXEC_NUM_IMMEDIATES 256
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
index 2fd76a3072..8018bd7fa4 100755
--- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c
@@ -36,7 +36,11 @@
#ifdef PIPE_ARCH_X86
-#define HIGH_PRECISION 1 /* for 1/sqrt() */
+/* for 1/sqrt()
+ *
+ * This costs about 100fps (close to 10%) in gears:
+ */
+#define HIGH_PRECISION 1
#define FOR_EACH_CHANNEL( CHAN )\
@@ -757,20 +761,6 @@ emit_rcp (
make_xmm( xmm_src ) );
}
-#if HIGH_PRECISION
-static void XSTDCALL
-rsqrt4f(
- float *store )
-{
- const unsigned X = 0;
-
- store[X + 0] = 1.0F / sqrtf( store[X + 0] );
- store[X + 1] = 1.0F / sqrtf( store[X + 1] );
- store[X + 2] = 1.0F / sqrtf( store[X + 2] );
- store[X + 3] = 1.0F / sqrtf( store[X + 3] );
-}
-#endif
-
static void
emit_rsqrt(
struct x86_function *func,
@@ -778,13 +768,6 @@ emit_rsqrt(
unsigned xmm_src )
{
#if HIGH_PRECISION
-#if 1
- emit_func_call_dst_src(
- func,
- xmm_dst,
- xmm_src,
- rsqrt4f );
-#else
/* Although rsqrtps() and rcpps() are low precision on some/all SSE
* implementations, it is possible to improve its precision at
* fairly low cost, using a newton/raphson step, as below:
@@ -794,21 +777,25 @@ emit_rsqrt(
*
* See: http://softwarecommunity.intel.com/articles/eng/1818.htm
*/
- /* This is some code that woudl do the above for a scalar 'a'. We
- * obviously are interested in a vector version:
- *
- * movss xmm3, a;
- * movss xmm1, half;
- * movss xmm2, three;
- * rsqrtss xmm0, xmm3;
- * mulss xmm3, xmm0;
- * mulss xmm1, xmm0;
- * mulss xmm3, xmm0;
- * subss xmm2, xmm3;
- * mulss xmm1, xmm2;
- * movss x, xmm1;
- */
-#endif
+ {
+ struct x86_reg dst = make_xmm( xmm_dst );
+ struct x86_reg src = make_xmm( xmm_src );
+ struct x86_reg tmp0 = make_xmm( 2 );
+ struct x86_reg tmp1 = make_xmm( 3 );
+
+ assert( xmm_dst != xmm_src );
+ assert( xmm_dst != 2 && xmm_dst != 3 );
+ assert( xmm_src != 2 && xmm_src != 3 );
+
+ sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
+ sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
+ sse_rsqrtps( func, tmp1, src );
+ sse_mulps( func, src, tmp1 );
+ sse_mulps( func, dst, tmp1 );
+ sse_mulps( func, src, tmp1 );
+ sse_subps( func, tmp0, src );
+ sse_mulps( func, dst, tmp0 );
+ }
#else
/* On Intel CPUs at least, this is only accurate to 12 bits -- not
* good enough.
@@ -1295,9 +1282,9 @@ emit_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rsqrt( func, 0, 0 );
+ emit_rsqrt( func, 1, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
+ STORE( func, *inst, 1, 0, chan_index );
}
break;
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 056ae829ae..7d71aefda9 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -783,23 +783,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height)
{
void *buf;
- ctx->vertices[0][0][0] = -0.5f; /*x*/
- ctx->vertices[0][0][1] = -0.5f; /*y*/
+ ctx->vertices[0][0][0] = 0.0f; /*x*/
+ ctx->vertices[0][0][1] = 0.0f; /*y*/
ctx->vertices[0][1][0] = 0.0f; /*s*/
ctx->vertices[0][1][1] = 0.0f; /*t*/
- ctx->vertices[1][0][0] = width - 0.5f; /*x*/
- ctx->vertices[1][0][1] = -0.5f; /*y*/
- ctx->vertices[1][1][0] = 1.0f; /*s*/
- ctx->vertices[1][1][1] = 0.0f; /*t*/
+ ctx->vertices[1][0][0] = width;
+ ctx->vertices[1][0][1] = 0.0f;
+ ctx->vertices[1][1][0] = 1.0f;
+ ctx->vertices[1][1][1] = 0.0f;
- ctx->vertices[2][0][0] = width - 0.5f;
- ctx->vertices[2][0][1] = height - 0.5f;
+ ctx->vertices[2][0][0] = width;
+ ctx->vertices[2][0][1] = height;
ctx->vertices[2][1][0] = 1.0f;
ctx->vertices[2][1][1] = 1.0f;
- ctx->vertices[3][0][0] = -0.5f;
- ctx->vertices[3][0][1] = height - 0.5f;
+ ctx->vertices[3][0][0] = 0.0f;
+ ctx->vertices[3][0][1] = height;
ctx->vertices[3][1][0] = 0.0f;
ctx->vertices[3][1][1] = 1.0f;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 5b63f97997..be0b57d9fa 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1051,5 +1051,19 @@ sp_get_samples(struct tgsi_sampler *sampler,
default:
assert(0);
}
+
+#if 0 /* DEBUG */
+ {
+ int i;
+ printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]);
+ for (i = 0; i < 4; i++) {
+ printf("Frag %d: %f %f %f %f\n", i,
+ rgba[0][i],
+ rgba[1][i],
+ rgba[2][i],
+ rgba[3][i]);
+ }
+ }
+#endif
}
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 142faf5074..28c29da87c 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -133,7 +133,7 @@ sp_destroy_tile_cache(struct softpipe_tile_cache *tc)
uint pos;
for (pos = 0; pos < NUM_ENTRIES; pos++) {
- //assert(tc->entries[pos].x < 0);
+ /*assert(tc->entries[pos].x < 0);*/
}
if (tc->surface) {
pipe_surface_reference(&tc->surface, NULL);
@@ -338,8 +338,8 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe,
for (x = 0; x < w; x += TILE_SIZE) {
if (is_clear_flag_set(tc->clear_flags, x, y)) {
pipe_put_tile_raw(pipe, ps,
- x, y, TILE_SIZE, TILE_SIZE,
- tc->tile.data.color32, 0/*STRIDE*/);
+ x, y, TILE_SIZE, TILE_SIZE,
+ tc->tile.data.color32, 0/*STRIDE*/);
/* do this? */
clear_clear_flag(tc->clear_flags, x, y);
@@ -373,8 +373,8 @@ sp_flush_tile_cache(struct softpipe_context *softpipe,
if (tile->x >= 0) {
if (tc->depth_stencil) {
pipe_put_tile_raw(pipe, ps,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
- tile->data.depth32, 0/*STRIDE*/);
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_put_tile_rgba(pipe, ps,
@@ -391,7 +391,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe,
#endif
}
else if (tc->texture) {
- /* caching a texture, mark all entries as embpy */
+ /* caching a texture, mark all entries as empty */
for (pos = 0; pos < NUM_ENTRIES; pos++) {
tc->entries[pos].x = -1;
}
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 2e77fb42a8..83330ef22f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -274,7 +274,7 @@ enum pipe_texture_target {
#define PIPE_CAP_MAX_POINT_WIDTH_AA 17
#define PIPE_CAP_MAX_TEXTURE_ANISOTROPY 18
#define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19
-#define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20
+
#ifdef __cplusplus
}
diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c
index 8ca90ef4c6..a70752428a 100644
--- a/src/gallium/winsys/xlib/xm_winsys.c
+++ b/src/gallium/winsys/xlib/xm_winsys.c
@@ -58,6 +58,7 @@
/**
+ * Subclass of pipe_buffer for Xlib winsys.
* Low-level OS/window system memory buffer
*/
struct xm_buffer
@@ -74,12 +75,10 @@ struct xm_buffer
#endif
};
-#if defined(USE_XSHM) && !defined(XFree86Server)
-# define XSHM_ENABLED(b) ((b)->shm)
-#else
-# define XSHM_ENABLED(b) 0
-#endif
+/**
+ * Subclass of pipe_surface for Xlib winsys
+ */
struct xmesa_surface
{
struct pipe_surface surface;
@@ -89,6 +88,9 @@ struct xmesa_surface
};
+/**
+ * Subclass of pipe_winsys for Xlib winsys
+ */
struct xmesa_pipe_winsys
{
struct pipe_winsys base;
@@ -97,23 +99,16 @@ struct xmesa_pipe_winsys
};
-static void alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
- unsigned width, unsigned height);
/** Cast wrapper */
static INLINE struct xmesa_surface *
xmesa_surface(struct pipe_surface *ps)
{
-// assert(0);
return (struct xmesa_surface *) ps;
}
-
-/**
- * Turn the softpipe opaque buffer pointer into a dri_bufmgr opaque
- * buffer pointer...
- */
+/** Cast wrapper */
static INLINE struct xm_buffer *
xm_buffer( struct pipe_buffer *buf )
{
@@ -121,6 +116,130 @@ xm_buffer( struct pipe_buffer *buf )
}
+/**
+ * X Shared Memory Image extension code
+ */
+#if defined(USE_XSHM) && !defined(XFree86Server)
+
+#define XSHM_ENABLED(b) ((b)->shm)
+
+static volatile int mesaXErrorFlag = 0;
+
+/**
+ * Catches potential Xlib errors.
+ */
+static int
+mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event)
+{
+ (void) dpy;
+ (void) event;
+ mesaXErrorFlag = 1;
+ return 0;
+}
+
+
+static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size)
+{
+ XShmSegmentInfo *const shminfo = & buf->shminfo;
+
+ shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777);
+ if (shminfo->shmid < 0) {
+ return GL_FALSE;
+ }
+
+ shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0);
+ if (shminfo->shmaddr == (char *) -1) {
+ shmctl(shminfo->shmid, IPC_RMID, 0);
+ return GL_FALSE;
+ }
+
+ shminfo->readOnly = False;
+ return GL_TRUE;
+}
+
+
+/**
+ * Allocate a shared memory XImage back buffer for the given XMesaBuffer.
+ */
+static void
+alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
+ unsigned width, unsigned height)
+{
+ /*
+ * We have to do a _lot_ of error checking here to be sure we can
+ * really use the XSHM extension. It seems different servers trigger
+ * errors at different points if the extension won't work. Therefore
+ * we have to be very careful...
+ */
+#if 0
+ GC gc;
+#endif
+ int (*old_handler)(XMesaDisplay *, XErrorEvent *);
+
+ b->tempImage = XShmCreateImage(xmb->xm_visual->display,
+ xmb->xm_visual->visinfo->visual,
+ xmb->xm_visual->visinfo->depth,
+ ZPixmap,
+ NULL,
+ &b->shminfo,
+ width, height);
+ if (b->tempImage == NULL) {
+ b->shm = 0;
+ return;
+ }
+
+
+ mesaXErrorFlag = 0;
+ old_handler = XSetErrorHandler(mesaHandleXError);
+ /* This may trigger the X protocol error we're ready to catch: */
+ XShmAttach(xmb->xm_visual->display, &b->shminfo);
+ XSync(xmb->xm_visual->display, False);
+
+ if (mesaXErrorFlag) {
+ /* we are on a remote display, this error is normal, don't print it */
+ XFlush(xmb->xm_visual->display);
+ mesaXErrorFlag = 0;
+ XDestroyImage(b->tempImage);
+ b->tempImage = NULL;
+ b->shm = 0;
+ (void) XSetErrorHandler(old_handler);
+ return;
+ }
+
+
+ /* Finally, try an XShmPutImage to be really sure the extension works */
+#if 0
+ gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL);
+ XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc,
+ b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False);
+ XSync(xmb->xm_visual->display, False);
+ XFreeGC(xmb->xm_visual->display, gc);
+ (void) XSetErrorHandler(old_handler);
+ if (mesaXErrorFlag) {
+ XFlush(xmb->xm_visual->display);
+ mesaXErrorFlag = 0;
+ XDestroyImage(b->tempImage);
+ b->tempImage = NULL;
+ b->shm = 0;
+ return;
+ }
+#endif
+}
+
+#else
+
+#define XSHM_ENABLED(b) 0
+
+static void
+alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
+ unsigned width, unsigned height)
+{
+ b->shm = 0;
+}
+#endif /* USE_XSHM */
+
+
+
/* Most callbacks map direcly onto dri_bufmgr operations:
*/
@@ -297,119 +416,6 @@ xm_get_name(struct pipe_winsys *pws)
}
-#if defined(USE_XSHM) && !defined(XFree86Server)
-static volatile int mesaXErrorFlag = 0;
-
-/**
- * Catches potential Xlib errors.
- */
-static int
-mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event)
-{
- (void) dpy;
- (void) event;
- mesaXErrorFlag = 1;
- return 0;
-}
-
-
-static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size)
-{
- XShmSegmentInfo *const shminfo = & buf->shminfo;
-
- shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777);
- if (shminfo->shmid < 0) {
- return GL_FALSE;
- }
-
- shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0);
- if (shminfo->shmaddr == (char *) -1) {
- shmctl(shminfo->shmid, IPC_RMID, 0);
- return GL_FALSE;
- }
-
- shminfo->readOnly = False;
- return GL_TRUE;
-}
-
-
-/**
- * Allocate a shared memory XImage back buffer for the given XMesaBuffer.
- */
-static void
-alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
- unsigned width, unsigned height)
-{
- /*
- * We have to do a _lot_ of error checking here to be sure we can
- * really use the XSHM extension. It seems different servers trigger
- * errors at different points if the extension won't work. Therefore
- * we have to be very careful...
- */
-#if 0
- GC gc;
-#endif
- int (*old_handler)(XMesaDisplay *, XErrorEvent *);
-
- b->tempImage = XShmCreateImage(xmb->xm_visual->display,
- xmb->xm_visual->visinfo->visual,
- xmb->xm_visual->visinfo->depth,
- ZPixmap,
- NULL,
- &b->shminfo,
- width, height);
- if (b->tempImage == NULL) {
- b->shm = 0;
- return;
- }
-
-
- mesaXErrorFlag = 0;
- old_handler = XSetErrorHandler(mesaHandleXError);
- /* This may trigger the X protocol error we're ready to catch: */
- XShmAttach(xmb->xm_visual->display, &b->shminfo);
- XSync(xmb->xm_visual->display, False);
-
- if (mesaXErrorFlag) {
- /* we are on a remote display, this error is normal, don't print it */
- XFlush(xmb->xm_visual->display);
- mesaXErrorFlag = 0;
- XDestroyImage(b->tempImage);
- b->tempImage = NULL;
- b->shm = 0;
- (void) XSetErrorHandler(old_handler);
- return;
- }
-
-
- /* Finally, try an XShmPutImage to be really sure the extension works */
-#if 0
- gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL);
- XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc,
- b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False);
- XSync(xmb->xm_visual->display, False);
- XFreeGC(xmb->xm_visual->display, gc);
- (void) XSetErrorHandler(old_handler);
- if (mesaXErrorFlag) {
- XFlush(xmb->xm_visual->display);
- mesaXErrorFlag = 0;
- XDestroyImage(b->tempImage);
- b->tempImage = NULL;
- b->shm = 0;
- return;
- }
-#endif
-}
-#else
-static void
-alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb,
- unsigned width, unsigned height)
-{
- b->shm = 0;
-}
-#endif
-
-
static struct pipe_buffer *
xm_buffer_create(struct pipe_winsys *pws,
unsigned alignment,
@@ -602,7 +608,7 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis)
}
-struct pipe_winsys *
+static struct pipe_winsys *
xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis)
{
static struct xmesa_pipe_winsys *ws = NULL;