diff options
Diffstat (limited to 'src')
40 files changed, 1288 insertions, 961 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index dd52f7e915..c30e66f172 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -428,10 +428,12 @@ __driUtilUpdateDrawableInfo(__DRIdrawablePrivate *pdp) if (pdp->pClipRects) { _mesa_free(pdp->pClipRects); + pdp->pClipRects = NULL; } if (pdp->pBackClipRects) { _mesa_free(pdp->pBackClipRects); + pdp->pBackClipRects = NULL; } DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index b0e5f87fc7..cc8a605e50 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -64,6 +64,7 @@ static void i915_reduced_primitive_state( intelContextPtr intel, st1 &= ~ST1_ENABLE; switch (rprim) { + case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */ case GL_TRIANGLES: if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple) diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c index d175870a0c..c3030d42b0 100644 --- a/src/mesa/drivers/dri/i915/intel_pixel.c +++ b/src/mesa/drivers/dri/i915/intel_pixel.c @@ -450,10 +450,16 @@ intelDrawPixels( GLcontext *ctx, * wise happily run the fragment program on each pixel in the image). */ struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current; + /* can't just set current frag prog to 0 here as on buffer resize + we'll get new state checks which will segfault. Remains a hack. */ ctx->FragmentProgram._Current = NULL; + ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE; + ctx->FragmentProgram._Active = GL_FALSE; _swrast_DrawPixels( ctx, x, y, width, height, format, type, unpack, pixels ); ctx->FragmentProgram._Current = fpSave; + ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE; + ctx->FragmentProgram._Active = GL_TRUE; } else { _swrast_DrawPixels( ctx, x, y, width, height, format, type, diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c index 51aff68840..0e7ca9586e 100644 --- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c @@ -61,6 +61,7 @@ i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim) st1 &= ~ST1_ENABLE; switch (rprim) { + case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */ case GL_TRIANGLES: if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple) st1 |= ST1_ENABLE; diff --git a/src/mesa/drivers/dri/i915tex/intel_buffers.c b/src/mesa/drivers/dri/i915tex/intel_buffers.c index 5aed3ad0ec..17b4feadf8 100644 --- a/src/mesa/drivers/dri/i915tex/intel_buffers.c +++ b/src/mesa/drivers/dri/i915tex/intel_buffers.c @@ -272,7 +272,8 @@ intelWindowMoved(struct intel_context *intel) flags = intel_fb->vblank_flags & ~VBLANK_FLAG_SECONDARY; } - if (flags != intel_fb->vblank_flags) { + if (flags != intel_fb->vblank_flags && intel_fb->vblank_flags && + !(intel_fb->vblank_flags & VBLANK_FLAG_NO_IRQ)) { drmVBlank vbl; int i; @@ -283,7 +284,9 @@ intelWindowMoved(struct intel_context *intel) } for (i = 0; i < intel_fb->pf_num_pages; i++) { - if (!intel_fb->color_rb[i]) + if (!intel_fb->color_rb[i] || + (intel_fb->vbl_waited - intel_fb->color_rb[i]->vbl_pending) <= + (1<<23)) continue; vbl.request.sequence = intel_fb->color_rb[i]->vbl_pending; @@ -785,7 +788,8 @@ intelScheduleSwap(const __DRIdrawablePrivate * dPriv, GLboolean *missed_target) drm_i915_vblank_swap_t swap; GLboolean ret; - if ((intel_fb->vblank_flags & VBLANK_FLAG_NO_IRQ) || + if (!intel_fb->vblank_flags || + (intel_fb->vblank_flags & VBLANK_FLAG_NO_IRQ) || intelScreen->current_rotation != 0 || intelScreen->drmMinor < (intel_fb->pf_active ? 9 : 6)) return GL_FALSE; diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c index 01373f0e40..61842da29d 100644 --- a/src/mesa/drivers/dri/i915tex/intel_context.c +++ b/src/mesa/drivers/dri/i915tex/intel_context.c @@ -646,12 +646,23 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, if (intel->ctx.DrawBuffer == &intel_fb->Base) { if (intel->driDrawable != driDrawPriv) { - intel_fb->vblank_flags = (intel->intelScreen->irq_active != 0) - ? driGetDefaultVBlankFlags(&intel->optionCache) - : VBLANK_FLAG_NO_IRQ; - (*dri_interface->getUST) (&intel_fb->swap_ust); - driDrawableInitVBlank(driDrawPriv, intel_fb->vblank_flags, - &intel_fb->vbl_seq); + if (driDrawPriv->pdraw->swap_interval == (unsigned)-1) { + int i; + + intel_fb->vblank_flags = (intel->intelScreen->irq_active != 0) + ? driGetDefaultVBlankFlags(&intel->optionCache) + : VBLANK_FLAG_NO_IRQ; + + (*dri_interface->getUST) (&intel_fb->swap_ust); + driDrawableInitVBlank(driDrawPriv, intel_fb->vblank_flags, + &intel_fb->vbl_seq); + intel_fb->vbl_waited = intel_fb->vbl_seq; + + for (i = 0; i < (intel->intelScreen->third.handle ? 3 : 2); i++) { + if (intel_fb->color_rb[i]) + intel_fb->color_rb[i]->vbl_pending = intel_fb->vbl_seq; + } + } intel->driDrawable = driDrawPriv; intelWindowMoved(intel); } @@ -699,37 +710,27 @@ intelContendedLock(struct intel_context *intel, GLuint flags) if (sarea->width != intel->width || sarea->height != intel->height || sarea->rotation != intel->current_rotation) { - - void *batchMap = intel->batch->map; - + int numClipRects = intel->numClipRects; + /* * FIXME: Really only need to do this when drawing to a * common back- or front buffer. */ /* - * This will drop the outstanding batchbuffer on the floor + * This will essentially drop the outstanding batchbuffer on the floor. */ + intel->numClipRects = 0; - if (batchMap != NULL) { - driBOUnmap(intel->batch->buffer); - intel->batch->map = NULL; - } - - intel_batchbuffer_reset(intel->batch); + if (intel->Fallback) + _swrast_flush(&intel->ctx); - if (batchMap == NULL) { - driBOUnmap(intel->batch->buffer); - intel->batch->map = NULL; - } + INTEL_FIREVERTICES(intel); - /* lose all primitives */ - intel->prim.primitive = ~0; - intel->prim.start_ptr = 0; - intel->prim.flush = 0; + if (intel->batch->map != intel->batch->ptr) + intel_batchbuffer_flush(intel->batch); - /* re-emit all state */ - intel->vtbl.lost_hardware(intel); + intel->numClipRects = numClipRects; /* force window update */ intel->lastStamp = 0; @@ -764,7 +765,9 @@ void LOCK_HARDWARE( struct intel_context *intel ) BUFFER_BACK_LEFT); } - if (intel_rb && (intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) { + if (intel_rb && intel_fb->vblank_flags && + !(intel_fb->vblank_flags & VBLANK_FLAG_NO_IRQ) && + (intel_fb->vbl_waited - intel_rb->vbl_pending) > (1<<23)) { drmVBlank vbl; vbl.request.type = DRM_VBLANK_ABSOLUTE; diff --git a/src/mesa/drivers/dri/i915tex/intel_pixel_draw.c b/src/mesa/drivers/dri/i915tex/intel_pixel_draw.c index 77c67c821e..e4e57cb3a7 100644 --- a/src/mesa/drivers/dri/i915tex/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/i915tex/intel_pixel_draw.c @@ -370,10 +370,16 @@ intelDrawPixels(GLcontext * ctx, * wise happily run the fragment program on each pixel in the image). */ struct gl_fragment_program *fpSave = ctx->FragmentProgram._Current; + /* can't just set current frag prog to 0 here as on buffer resize + we'll get new state checks which will segfault. Remains a hack. */ ctx->FragmentProgram._Current = NULL; + ctx->FragmentProgram._UseTexEnvProgram = GL_FALSE; + ctx->FragmentProgram._Active = GL_FALSE; _swrast_DrawPixels( ctx, x, y, width, height, format, type, unpack, pixels ); ctx->FragmentProgram._Current = fpSave; + ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE; + ctx->FragmentProgram._Active = GL_TRUE; } else { _swrast_DrawPixels( ctx, x, y, width, height, format, type, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_buffers.c b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c index 857cd30584..f98d666563 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_buffers.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_buffers.c @@ -69,7 +69,7 @@ void nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - drm_nouveau_mem_free_t memf; + struct drm_nouveau_mem_free memf; if (NOUVEAU_DEBUG & DEBUG_MEM) { fprintf(stderr, "%s: type=0x%x, offset=0x%x, size=0x%x\n", @@ -78,8 +78,8 @@ nouveau_mem_free(GLcontext *ctx, nouveau_mem *mem) if (mem->map) drmUnmap(mem->map, mem->size); - memf.flags = mem->type; - memf.region_offset = mem->offset; + memf.flags = mem->type; + memf.offset = mem->offset; drmCommandWrite(nmesa->driFd, DRM_NOUVEAU_MEM_FREE, &memf, sizeof(memf)); FREE(mem); } @@ -88,7 +88,7 @@ nouveau_mem * nouveau_mem_alloc(GLcontext *ctx, int type, GLuint size, GLuint align) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - drm_nouveau_mem_alloc_t mema; + struct drm_nouveau_mem_alloc mema; nouveau_mem *mem; int ret; @@ -111,7 +111,7 @@ nouveau_mem_alloc(GLcontext *ctx, int type, GLuint size, GLuint align) FREE(mem); return NULL; } - mem->offset = mema.region_offset; + mem->offset = mema.offset; mem->type = mema.flags; if (NOUVEAU_DEBUG & DEBUG_MEM) { @@ -120,7 +120,7 @@ nouveau_mem_alloc(GLcontext *ctx, int type, GLuint size, GLuint align) } if (type & NOUVEAU_MEM_MAPPED) - ret = drmMap(nmesa->driFd, mem->offset, mem->size, &mem->map); + ret = drmMap(nmesa->driFd, mema.map_handle, mem->size, &mem->map); if (ret) { mem->map = NULL; nouveau_mem_free(ctx, mem); @@ -135,12 +135,7 @@ nouveau_mem_gpu_offset_get(GLcontext *ctx, nouveau_mem *mem) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - if (mem->type & NOUVEAU_MEM_FB) - return (uint32_t)mem->offset - nmesa->vram_phys; - else if (mem->type & NOUVEAU_MEM_AGP) - return (uint32_t)mem->offset - nmesa->gart_phys; - else - return 0xDEADF00D; + return mem->offset; } static GLboolean diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index d96b00242c..44392c0267 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -180,7 +180,7 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, driParseConfigFiles (&nmesa->optionCache, &screen->optionCache, screen->driScreen->myNum, "nouveau"); - nmesa->sarea = (drm_nouveau_sarea_t *)((char *)sPriv->pSAREA + + nmesa->sarea = (struct drm_nouveau_sarea *)((char *)sPriv->pSAREA + screen->sarea_priv_offset); /* Enable any supported extensions */ @@ -224,6 +224,8 @@ GLboolean nouveauCreateContext( const __GLcontextModes *glVisual, nv04TriInitFunctions( ctx ); break; case NV_10: + case NV_11: + case NV_17: case NV_20: case NV_30: case NV_40: diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h index 10d2ed6e17..9a0be2cb2a 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h @@ -109,12 +109,12 @@ typedef struct nouveau_context { uint64_t gart_size; /* Channel synchronisation */ - drm_nouveau_notifier_alloc_t *syncNotifier; + struct drm_nouveau_notifier_alloc *syncNotifier; /* ARB_occlusion_query / EXT_timer_query */ GLuint query_object_max; GLboolean * query_alloc; - drm_nouveau_notifier_alloc_t *queryNotifier; + struct drm_nouveau_notifier_alloc *queryNotifier; /* Additional hw-specific functions */ nouveau_hw_func hw_func; @@ -168,7 +168,7 @@ typedef struct nouveau_context { nouveauShader *passthrough_fp; nouveauScreenRec *screen; - drm_nouveau_sarea_t *sarea; + struct drm_nouveau_sarea *sarea; __DRIcontextPrivate *driContext; /* DRI context */ __DRIscreenPrivate *driScreen; /* DRI screen */ diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 00956aa8f8..ddc9535624 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -41,7 +41,7 @@ GLboolean nouveauDRMGetParam(nouveauContextPtr nmesa, unsigned int param, uint64_t* value) { - drm_nouveau_getparam_t getp; + struct drm_nouveau_getparam getp; getp.param = param; if (!value || drmCommandWriteRead(nmesa->driFd, DRM_NOUVEAU_GETPARAM, @@ -56,7 +56,7 @@ GLboolean nouveauDRMSetParam(nouveauContextPtr nmesa, unsigned int param, uint64_t value) { - drm_nouveau_setparam_t setp; + struct drm_nouveau_setparam setp; setp.param = param; setp.value = value; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c index e9320918f9..7b5e96b4c2 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fifo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fifo.c @@ -98,7 +98,7 @@ void nouveauWaitForIdle(nouveauContextPtr nmesa) // here we call the fifo initialization ioctl and fill in stuff accordingly GLboolean nouveauFifoInit(nouveauContextPtr nmesa) { - drm_nouveau_fifo_alloc_t fifo_init; + struct drm_nouveau_fifo_alloc fifo_init; int i, ret; #ifdef NOUVEAU_RING_DEBUG diff --git a/src/mesa/drivers/dri/nouveau/nouveau_lock.c b/src/mesa/drivers/dri/nouveau/nouveau_lock.c index c119d14dd7..aa86c9e783 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_lock.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_lock.c @@ -44,7 +44,7 @@ void nouveauGetLock( nouveauContextPtr nmesa, GLuint flags ) { __DRIdrawablePrivate *dPriv = nmesa->driDrawable; __DRIscreenPrivate *sPriv = nmesa->driScreen; - drm_nouveau_sarea_t *sarea = nmesa->sarea; + struct drm_nouveau_sarea *sarea = nmesa->sarea; drmGetLock( nmesa->driFd, nmesa->hHWContext, flags ); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_object.c b/src/mesa/drivers/dri/nouveau/nouveau_object.c index 69f8dbf794..a143488e8d 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_object.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_object.c @@ -7,7 +7,7 @@ GLboolean nouveauCreateContextObject(nouveauContextPtr nmesa, uint32_t handle, int class) { - drm_nouveau_grobj_alloc_t cto; + struct drm_nouveau_grobj_alloc cto; int ret; cto.channel = nmesa->fifo.channel; @@ -34,10 +34,13 @@ void nouveauObjectInit(nouveauContextPtr nmesa) nouveauCreateContextObject(nmesa, Nv3D, nmesa->screen->card->class_3d); if (nmesa->screen->card->type>=NV_10) { nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV10_CONTEXT_SURFACES_2D); - nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT); } else { nouveauCreateContextObject(nmesa, NvCtxSurf2D, NV04_CONTEXT_SURFACES_2D); nouveauCreateContextObject(nmesa, NvCtxSurf3D, NV04_CONTEXT_SURFACES_3D); + } + if (nmesa->screen->card->type>=NV_11) { + nouveauCreateContextObject(nmesa, NvImageBlit, NV10_IMAGE_BLIT); + } else { nouveauCreateContextObject(nmesa, NvImageBlit, NV_IMAGE_BLIT); } nouveauCreateContextObject(nmesa, NvMemFormat, NV_MEMORY_TO_MEMORY_FORMAT); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c index bc7f39b042..69b0691bb7 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_screen.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c @@ -328,7 +328,7 @@ void * __driCreateNewScreen_20050727( __DRInativeDisplay *dpy, int scrn, __DRIsc static const __DRIversion ddx_expected = { 1, 2, 0 }; static const __DRIversion dri_expected = { 4, 0, 0 }; static const __DRIversion drm_expected = { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; -#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 7 +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 9 #error nouveau_drm.h version doesn't match expected version #endif dri_interface = interface; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c b/src/mesa/drivers/dri/nouveau/nouveau_state.c index 7cb805902a..f618dcfc99 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_state.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c @@ -162,6 +162,8 @@ void nouveauDDInitState(nouveauContextPtr nmesa) nv04InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; case NV_10: + case NV_11: + case NV_17: nv10InitStateFuncs(nmesa->glCtx, &nmesa->glCtx->Driver); break; case NV_20: diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.c b/src/mesa/drivers/dri/nouveau/nouveau_sync.c index 1d1eeede18..8abc847e1e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_sync.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.c @@ -39,11 +39,11 @@ nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); \ volatile uint32_t *__v = (void*)nmesa->notifier_block + notifier->offset -drm_nouveau_notifier_alloc_t * +struct drm_nouveau_notifier_alloc * nouveau_notifier_new(GLcontext *ctx, GLuint handle, GLuint count) { nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx); - drm_nouveau_notifier_alloc_t *notifier; + struct drm_nouveau_notifier_alloc *notifier; int ret; #ifdef NOUVEAU_RING_DEBUG @@ -69,14 +69,16 @@ nouveau_notifier_new(GLcontext *ctx, GLuint handle, GLuint count) } void -nouveau_notifier_destroy(GLcontext *ctx, drm_nouveau_notifier_alloc_t *notifier) +nouveau_notifier_destroy(GLcontext *ctx, + struct drm_nouveau_notifier_alloc *notifier) { /*XXX: free notifier object.. */ FREE(notifier); } void -nouveau_notifier_reset(GLcontext *ctx, drm_nouveau_notifier_alloc_t *notifier, +nouveau_notifier_reset(GLcontext *ctx, + struct drm_nouveau_notifier_alloc *notifier, GLuint id) { NOTIFIER(n); @@ -93,7 +95,8 @@ nouveau_notifier_reset(GLcontext *ctx, drm_nouveau_notifier_alloc_t *notifier, } GLuint -nouveau_notifier_status(GLcontext *ctx, drm_nouveau_notifier_alloc_t *notifier, +nouveau_notifier_status(GLcontext *ctx, + struct drm_nouveau_notifier_alloc *notifier, GLuint id) { NOTIFIER(n); @@ -103,7 +106,8 @@ nouveau_notifier_status(GLcontext *ctx, drm_nouveau_notifier_alloc_t *notifier, GLuint nouveau_notifier_return_val(GLcontext *ctx, - drm_nouveau_notifier_alloc_t *notifier, GLuint id) + struct drm_nouveau_notifier_alloc *notifier, + GLuint id) { NOTIFIER(n); @@ -112,8 +116,8 @@ nouveau_notifier_return_val(GLcontext *ctx, GLboolean nouveau_notifier_wait_status(GLcontext *ctx, - drm_nouveau_notifier_alloc_t *notifier, GLuint id, - GLuint status, GLuint timeout) + struct drm_nouveau_notifier_alloc *notifier, + GLuint id, GLuint status, GLuint timeout) { NOTIFIER(n); unsigned int time = 0; @@ -146,7 +150,8 @@ nouveau_notifier_wait_status(GLcontext *ctx, void nouveau_notifier_wait_nop(GLcontext *ctx, - drm_nouveau_notifier_alloc_t *notifier, GLuint subc) + struct drm_nouveau_notifier_alloc *notifier, + GLuint subc) { NOTIFIER(n); GLboolean ret; diff --git a/src/mesa/drivers/dri/nouveau/nouveau_sync.h b/src/mesa/drivers/dri/nouveau/nouveau_sync.h index b56cc5fb54..b76af17276 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_sync.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_sync.h @@ -47,22 +47,24 @@ #define NV_NOTIFY 0x00000104 #define NV_NOTIFY_STYLE_WRITE_ONLY 0 -extern drm_nouveau_notifier_alloc_t * +extern struct drm_nouveau_notifier_alloc * nouveau_notifier_new(GLcontext *, GLuint handle, GLuint count); extern void -nouveau_notifier_destroy(GLcontext *, drm_nouveau_notifier_alloc_t *); +nouveau_notifier_destroy(GLcontext *, struct drm_nouveau_notifier_alloc *); extern void -nouveau_notifier_reset(GLcontext *, drm_nouveau_notifier_alloc_t *, GLuint id); +nouveau_notifier_reset(GLcontext *, struct drm_nouveau_notifier_alloc *, + GLuint id); extern GLuint -nouveau_notifier_status(GLcontext *, drm_nouveau_notifier_alloc_t *, GLuint id); +nouveau_notifier_status(GLcontext *, struct drm_nouveau_notifier_alloc *, + GLuint id); extern GLuint -nouveau_notifier_return_val(GLcontext *, drm_nouveau_notifier_alloc_t *, +nouveau_notifier_return_val(GLcontext *, struct drm_nouveau_notifier_alloc *, GLuint id); extern GLboolean -nouveau_notifier_wait_status(GLcontext *, drm_nouveau_notifier_alloc_t *, +nouveau_notifier_wait_status(GLcontext *, struct drm_nouveau_notifier_alloc *, GLuint id, GLuint status, GLuint timeout); extern void -nouveau_notifier_wait_nop(GLcontext *ctx, drm_nouveau_notifier_alloc_t *, +nouveau_notifier_wait_nop(GLcontext *ctx, struct drm_nouveau_notifier_alloc *, GLuint subc); extern GLboolean nouveauSyncInitFuncs(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/nouveau/nv10_state.c b/src/mesa/drivers/dri/nouveau/nv10_state.c index 5f304ccab9..47c4b14ba6 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state.c @@ -697,8 +697,7 @@ static GLboolean nv10InitCard(nouveauContextPtr nmesa) BEGIN_RING_SIZE(NvSub3D, 0x03f4, 1); OUT_RING(0); - /* not for nv10, only for >= nv11 */ - if ((nmesa->screen->card->id>>4) >= 0x11) { + if (nmesa->screen->card->type >= NV_11) { BEGIN_RING_SIZE(NvSub3D, 0x120, 3); OUT_RING(0); OUT_RING(1); @@ -739,11 +738,11 @@ static GLboolean nv10BindBuffers(nouveauContextPtr nmesa, int num_color, OUT_RING_CACHE(depth ? depth->offset : color[0]->offset); /* Always set to bottom left of buffer */ - BEGIN_RING_CACHE(NvSub3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_ORIGIN_X, 4); + /*BEGIN_RING_CACHE(NvSub3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_ORIGIN_X, 4); OUT_RING_CACHEf (0.0); OUT_RING_CACHEf ((GLfloat) h); OUT_RING_CACHEf (0.0); - OUT_RING_CACHEf (0.0); + OUT_RING_CACHEf (0.0);*/ return GL_TRUE; } diff --git a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c index 4576c1ede4..611469b6e4 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_swtcl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_swtcl.c @@ -58,7 +58,7 @@ static void nv10ResetLineStipple( GLcontext *ctx ); static inline void nv10StartPrimitive(struct nouveau_context* nmesa,uint32_t primitive,uint32_t size) { - if (nmesa->screen->card->type==NV_10) + if ((nmesa->screen->card->type>=NV_10) && (nmesa->screen->card->type<=NV_17)) BEGIN_RING_SIZE(NvSub3D,NV10_TCL_PRIMITIVE_3D_BEGIN_END,1); else if (nmesa->screen->card->type==NV_20) BEGIN_RING_SIZE(NvSub3D,NV20_TCL_PRIMITIVE_3D_BEGIN_END,1); @@ -66,7 +66,7 @@ static inline void nv10StartPrimitive(struct nouveau_context* nmesa,uint32_t pri BEGIN_RING_SIZE(NvSub3D,NV30_TCL_PRIMITIVE_3D_BEGIN_END,1); OUT_RING(primitive); - if (nmesa->screen->card->type==NV_10) + if ((nmesa->screen->card->type>=NV_10) && (nmesa->screen->card->type<=NV_17)) BEGIN_RING_SIZE(NvSub3D,NV10_TCL_PRIMITIVE_3D_VERTEX_ARRAY_DATA|NONINC_METHOD,size); else if (nmesa->screen->card->type==NV_20) BEGIN_RING_SIZE(NvSub3D,NV20_TCL_PRIMITIVE_3D_VERTEX_DATA|NONINC_METHOD,size); @@ -76,7 +76,7 @@ static inline void nv10StartPrimitive(struct nouveau_context* nmesa,uint32_t pri inline void nv10FinishPrimitive(struct nouveau_context *nmesa) { - if (nmesa->screen->card->type==NV_10) + if ((nmesa->screen->card->type>=NV_10) && (nmesa->screen->card->type<=NV_17)) BEGIN_RING_SIZE(NvSub3D,NV10_TCL_PRIMITIVE_3D_BEGIN_END,1); else if (nmesa->screen->card->type==NV_20) BEGIN_RING_SIZE(NvSub3D,NV20_TCL_PRIMITIVE_3D_BEGIN_END,1); @@ -454,7 +454,7 @@ static inline void nv10OutputVertexFormat(struct nouveau_context* nmesa) /* * Tell the hardware about the vertex format */ - if (nmesa->screen->card->type==NV_10) { + if ((nmesa->screen->card->type>=NV_10) && (nmesa->screen->card->type<=NV_17)) { int size; #define NV_VERTEX_ATTRIBUTE_TYPE_FLOAT 2 diff --git a/src/mesa/drivers/dri/nouveau/nv20_state.c b/src/mesa/drivers/dri/nouveau/nv20_state.c index 3d8d83a865..ccf2f6148b 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state.c @@ -568,10 +568,10 @@ static void nv20Scissor(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h) y += nmesa->drawY; } - BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_X2_X1, 1); + /*BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_X2_X1, 1); OUT_RING_CACHE((w << 16) | x ); BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_SCISSOR_Y2_Y1, 1); - OUT_RING_CACHE((h << 16) | y ); + OUT_RING_CACHE((h << 16) | y );*/ } @@ -764,11 +764,11 @@ static GLboolean nv20BindBuffers(nouveauContextPtr nmesa, int num_color, } /* Always set to bottom left of buffer */ - BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VIEWPORT_ORIGIN_X, 4); + /*BEGIN_RING_CACHE(NvSub3D, NV20_TCL_PRIMITIVE_3D_VIEWPORT_ORIGIN_X, 4); OUT_RING_CACHEf (0.0); OUT_RING_CACHEf ((GLfloat) h); OUT_RING_CACHEf (0.0); - OUT_RING_CACHEf (0.0); + OUT_RING_CACHEf (0.0);*/ return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 6615bc79fb..0349bac9a2 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -568,19 +568,10 @@ struct r300_vertex_shader_fragment { union { GLuint d[VSF_MAX_FRAGMENT_LENGTH]; float f[VSF_MAX_FRAGMENT_LENGTH]; - VERTEX_SHADER_INSTRUCTION i[VSF_MAX_FRAGMENT_LENGTH / 4]; + struct r300_vertprog_instruction i[VSF_MAX_FRAGMENT_LENGTH / 4]; } body; }; -#define VSF_DEST_PROGRAM 0x0 -#define VSF_DEST_MATRIX0 0x200 -#define VSF_DEST_MATRIX1 0x204 -#define VSF_DEST_MATRIX2 0x208 -#define VSF_DEST_VECTOR0 0x20c -#define VSF_DEST_VECTOR1 0x20d -#define VSF_DEST_UNKNOWN1 0x400 -#define VSF_DEST_UNKNOWN2 0x406 - struct r300_vertex_shader_state { struct r300_vertex_shader_fragment program; }; diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 229439dfa8..424bf44e59 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -299,13 +299,14 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) if (OutputsWritten & (1 << VERT_RESULT_COL1)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; -#if 0 - if (OutputsWritten & (1 << VERT_RESULT_BFC0)) - ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT; - - if (OutputsWritten & (1 << VERT_RESULT_BFC1)) - ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + if (OutputsWritten & (1 << VERT_RESULT_BFC0) + || OutputsWritten & (1 << VERT_RESULT_BFC1)) + ret |= + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; +#if 0 if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ; #endif diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index e59919be49..1baa74c526 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -282,9 +282,32 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200 # define R300_PVS_UPLOAD_PROGRAM 0x00000000 +/* gap */ # define R300_PVS_UPLOAD_PARAMETERS 0x00000200 +/* gap */ +# define R300_PVS_UPLOAD_CLIP_PLANE0 0x00000400 +# define R300_PVS_UPLOAD_CLIP_PLANE1 0x00000401 +# define R300_PVS_UPLOAD_CLIP_PLANE2 0x00000402 +# define R300_PVS_UPLOAD_CLIP_PLANE3 0x00000403 +# define R300_PVS_UPLOAD_CLIP_PLANE4 0x00000404 +# define R300_PVS_UPLOAD_CLIP_PLANE5 0x00000405 # define R300_PVS_UPLOAD_POINTSIZE 0x00000406 +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + /* gap */ #define R300_VAP_PVS_UPLOAD_DATA 0x2208 diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index b5cf21d644..6789efd428 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -82,6 +82,8 @@ static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0], color[1], color[2]); + rmesa->hw.blend_color.cmd[2] = 0; + rmesa->hw.blend_color.cmd[3] = 0; } /** @@ -317,20 +319,34 @@ static void r300UpdateCulling(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); uint32_t val = 0; - R300_STATECHANGE(r300, cul); if (ctx->Polygon.CullFlag) { - if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - val = R300_CULL_FRONT | R300_CULL_BACK; - else if (ctx->Polygon.CullFaceMode == GL_FRONT) + switch (ctx->Polygon.CullFaceMode) { + case GL_FRONT: val = R300_CULL_FRONT; - else + break; + case GL_BACK: val = R300_CULL_BACK; + break; + case GL_FRONT_AND_BACK: + val = R300_CULL_FRONT | R300_CULL_BACK; + break; + default: + break; + } + } - if (ctx->Polygon.FrontFace == GL_CW) - val |= R300_FRONT_FACE_CW; - else - val |= R300_FRONT_FACE_CCW; + switch (ctx->Polygon.FrontFace) { + case GL_CW: + val |= R300_FRONT_FACE_CW; + break; + case GL_CCW: + val |= R300_FRONT_FACE_CCW; + break; + default: + break; } + + R300_STATECHANGE(r300, cul); r300->hw.cul.cmd[R300_CUL_CULL] = val; } @@ -344,6 +360,20 @@ static void r300SetEarlyZState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zstencil_format); + switch (ctx->Visual.depthBits) { + case 16: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; + break; + case 24: + r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; + break; + default: + fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); + _mesa_exit(-1); + } + + // r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; + if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; @@ -355,6 +385,9 @@ static void r300SetEarlyZState(GLcontext * ctx) /* disable early Z */ r300->hw.zstencil_format.cmd[2] = R300_EARLY_Z_DISABLE; } + + r300->hw.zstencil_format.cmd[3] = 0x00000003; + r300->hw.zstencil_format.cmd[4] = 0x00000000; } static void r300SetAlphaState(GLcontext * ctx) @@ -403,6 +436,7 @@ static void r300SetAlphaState(GLcontext * ctx) R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; + r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300SetEarlyZState(ctx); } @@ -513,6 +547,9 @@ static void r300UpdatePolygonMode(GLcontext * ctx) R300_STATECHANGE(r300, polygon_mode); r300->hw.polygon_mode.cmd[1] = hw_mode; } + + r300->hw.polygon_mode.cmd[2] = 0x00000001; + r300->hw.polygon_mode.cmd[3] = 0x00000000; } /** @@ -762,6 +799,7 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) r300ContextPtr rmesa = R300_CONTEXT(ctx); R300_STATECHANGE(rmesa, shade); + rmesa->hw.shade.cmd[1] = 0x00000002; switch (mode) { case GL_FLAT: rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; @@ -772,6 +810,8 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) default: return; } + rmesa->hw.shade.cmd[3] = 0x00000000; + rmesa->hw.shade.cmd[4] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, @@ -1526,7 +1566,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { if (rmesa->state.sw_tcl_inputs[i] != -1) { - prog->program.body.i[program_end].op = EASY_VSF_OP(MUL, o_reg++, ALL, RESULT); + prog->program.body.i[program_end].opcode = EASY_VSF_OP(MUL, o_reg++, ALL, RESULT); prog->program.body.i[program_end].src[0] = VSF_REG(rmesa->state.sw_tcl_inputs[i]); prog->program.body.i[program_end].src[1] = VSF_ATTR_UNITY(rmesa->state.sw_tcl_inputs[i]); prog->program.body.i[program_end].src[2] = VSF_UNITY(rmesa->state.sw_tcl_inputs[i]); @@ -1536,7 +1576,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) prog->program.length = program_end * 4; - r300SetupVertexProgramFragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); @@ -1570,7 +1610,7 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa) bump_vpu_count(rmesa->hw.vpp.cmd, param_count); param_count /= 4; - r300SetupVertexProgramFragment(rmesa, VSF_DEST_PROGRAM, &(prog->program)); + r300SetupVertexProgramFragment(rmesa, R300_PVS_UPLOAD_PROGRAM, &(prog->program)); inst_count = (prog->program.length / 4) - 1; R300_STATECHANGE(rmesa, pvs); @@ -1848,15 +1888,10 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4260.cmd[2] = r300PackFloat32(0.0); r300->hw.unk4260.cmd[3] = r300PackFloat32(1.0); - r300->hw.shade.cmd[1] = 0x00000002; r300ShadeModel(ctx, ctx->Light.ShadeModel); - r300->hw.shade.cmd[3] = 0x00000000; - r300->hw.shade.cmd[4] = 0x00000000; r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode); r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode); - r300->hw.polygon_mode.cmd[2] = 0x00000001; - r300->hw.polygon_mode.cmd[3] = 0x00000000; r300->hw.zbias_cntl.cmd[1] = 0x00000000; r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor, @@ -1887,14 +1922,11 @@ static void r300ResetHwState(r300ContextPtr r300) r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color); r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL); - r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; r300->hw.unk4BD8.cmd[1] = 0; r300->hw.unk4E00.cmd[1] = 0; r300BlendColor(ctx, ctx->Color.BlendColor); - r300->hw.blend_color.cmd[2] = 0; - r300->hw.blend_color.cmd[3] = 0; /* Again, r300ClearBuffer uses this */ r300->hw.cb.cmd[R300_CB_OFFSET] = @@ -1925,25 +1957,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.unk4EA0.cmd[1] = 0x00000000; r300->hw.unk4EA0.cmd[2] = 0xffffffff; - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = R300_DEPTH_FORMAT_24BIT_INT_Z; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - - } - /* z compress? */ - //r300->hw.zstencil_format.cmd[1] |= R300_DEPTH_FORMAT_UNK32; - - r300->hw.zstencil_format.cmd[3] = 0x00000003; - r300->hw.zstencil_format.cmd[4] = 0x00000000; - r300->hw.zb.cmd[R300_ZB_OFFSET] = r300->radeon.radeonScreen->depthOffset + r300->radeon.radeonScreen->fbLocation; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 16dddf6557..0fb6110494 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -29,6 +29,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * \file * * \author Aapo Tahkola <aet@rasterburn.org> + * + * \author Oliver McFadden <z3ro.geek@gmail.com> */ #include "glheader.h" @@ -55,54 +57,58 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #error Cannot change these! #endif -#define SCALAR_FLAG (1<<31) -#define FLAG_MASK (1<<31) -#define OP_MASK (0xf) /* we are unlikely to have more than 15 */ -#define OPN(operator, ip) {#operator, OPCODE_##operator, ip} - -static struct { - char *name; - int opcode; - unsigned long ip; /* number of input operands and flags */ -} op_names[] = { - /* *INDENT-OFF* */ - OPN(ABS, 1), - OPN(ADD, 2), - OPN(ARL, 1 | SCALAR_FLAG), - OPN(DP3, 2), - OPN(DP4, 2), - OPN(DPH, 2), - OPN(DST, 2), - OPN(EX2, 1 | SCALAR_FLAG), - OPN(EXP, 1 | SCALAR_FLAG), - OPN(FLR, 1), - OPN(FRC, 1), - OPN(LG2, 1 | SCALAR_FLAG), - OPN(LIT, 1), - OPN(LOG, 1 | SCALAR_FLAG), - OPN(MAD, 3), - OPN(MAX, 2), - OPN(MIN, 2), - OPN(MOV, 1), - OPN(MUL, 2), - OPN(POW, 2 | SCALAR_FLAG), - OPN(RCP, 1 | SCALAR_FLAG), - OPN(RSQ, 1 | SCALAR_FLAG), - OPN(SGE, 2), - OPN(SLT, 2), - OPN(SUB, 2), - OPN(SWZ, 1), - OPN(XPD, 2), - OPN(RCC, 0), //extra - OPN(PRINT, 0), - OPN(END, 0) - /* *INDENT-ON* */ -}; - -#undef OPN +/* TODO: Get rid of t_src_class call */ +#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ + ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ + t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ + (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ + t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ + +#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + SWIZZLE_ZERO, SWIZZLE_ZERO, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) + +#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) + +#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + SWIZZLE_ONE, SWIZZLE_ONE, \ + t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) + +/* DP4 version seems to trigger some hw peculiarity */ +//#define PREFER_DP4 + +#define FREE_TEMPS() \ + do { \ + if(u_temp_i < vp->num_temporaries) { \ + WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ + vp->native = GL_FALSE; \ + } \ + u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ + } while (0) int r300VertexProgUpdateParams(GLcontext * ctx, - struct r300_vertex_program_cont *vp, float *dst) + struct r300_vertex_program_cont *vp, + float *dst) { int pi; struct gl_vertex_program *mesa_vp = &vp->mesa_program; @@ -234,8 +240,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) int i; if (vp == NULL) { - fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, - caller); + fprintf(stderr, "vp null in call to %s from %s\n", + __FUNCTION__, caller); return; } @@ -276,6 +282,8 @@ static unsigned long t_src_index(struct r300_vertex_program *vp, } } +/* these two functions should probably be merged... */ + static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src) { @@ -294,7 +302,9 @@ static unsigned long t_src(struct r300_vertex_program *vp, static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src) { - + /* src->NegateBase uses the NEGATE_ flags from program_instruction.h, + * which equal our VSF_FLAGS_ values, so it's safe to just pass it here. + */ return MAKE_VSF_SOURCE(t_src_index(vp, src), t_swizzle(GET_SWZ(src->Swizzle, 0)), t_swizzle(GET_SWZ(src->Swizzle, 0)), @@ -306,128 +316,727 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp, (src->RelAddr << 4); } -static unsigned long t_opcode(enum prog_opcode opcode) +static GLboolean valid_dst(struct r300_vertex_program *vp, + struct prog_dst_register *dst) { + if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { + return GL_FALSE; + } else if (dst->File == PROGRAM_ADDRESS) { + assert(dst->Index == 0); + } + + return GL_TRUE; +} - switch (opcode) { - /* *INDENT-OFF* */ - case OPCODE_ARL: return R300_VPI_OUT_OP_ARL; - case OPCODE_DST: return R300_VPI_OUT_OP_DST; - case OPCODE_EX2: return R300_VPI_OUT_OP_EX2; - case OPCODE_EXP: return R300_VPI_OUT_OP_EXP; - case OPCODE_FRC: return R300_VPI_OUT_OP_FRC; - case OPCODE_LG2: return R300_VPI_OUT_OP_LG2; - case OPCODE_LOG: return R300_VPI_OUT_OP_LOG; - case OPCODE_MAX: return R300_VPI_OUT_OP_MAX; - case OPCODE_MIN: return R300_VPI_OUT_OP_MIN; - case OPCODE_MUL: return R300_VPI_OUT_OP_MUL; - case OPCODE_RCP: return R300_VPI_OUT_OP_RCP; - case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ; - case OPCODE_SGE: return R300_VPI_OUT_OP_SGE; - case OPCODE_SLT: return R300_VPI_OUT_OP_SLT; - case OPCODE_DP4: return R300_VPI_OUT_OP_DOT; - /* *INDENT-ON* */ +/* + * Instruction Inputs Output Description + * ----------- ------ ------ -------------------------------- + * ABS v v absolute value + * ADD v,v v add + * ARL s a address register load + * DP3 v,v ssss 3-component dot product + * DP4 v,v ssss 4-component dot product + * DPH v,v ssss homogeneous dot product + * DST v,v v distance vector + * EX2 s ssss exponential base 2 + * EXP s v exponential base 2 (approximate) + * FLR v v floor + * FRC v v fraction + * LG2 s ssss logarithm base 2 + * LIT v v compute light coefficients + * LOG s v logarithm base 2 (approximate) + * MAD v,v,v v multiply and add + * MAX v,v v maximum + * MIN v,v v minimum + * MOV v v move + * MUL v,v v multiply + * POW s,s ssss exponentiate + * RCP s ssss reciprocal + * RSQ s ssss reciprocal square root + * SGE v,v v set on greater than or equal + * SLT v,v v set on less than + * SUB v,v v subtract + * SWZ v v extended swizzle + * XPD v,v v cross product + * + * Table X.5: Summary of vertex program instructions. "v" indicates a + * floating-point vector input or output, "s" indicates a floating-point + * scalar input, "ssss" indicates a scalar output replicated across a + * 4-component result vector, and "a" indicates a single address register + * component. + */ - default: - fprintf(stderr, "%s: Should not be called with opcode %d!", - __FUNCTION__, opcode); - } - _mesa_exit(-1); - return 0; +static void t_opcode_abs(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), + t_src_class(src[0].File), + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + o_inst->src[2] = 0; } -static unsigned long op_operands(enum prog_opcode opcode) +static void t_opcode_add(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) { - int i; + unsigned long hw_op; - /* Can we trust mesas opcodes to be in order ? */ - for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++) - if (op_names[i].opcode == opcode) - return op_names[i].ip; +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = ONE_SRC_0; + o_inst->src[1] = t_src(vp, &src[0]); + o_inst->src[2] = t_src(vp, &src[1]); +#else + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; - fprintf(stderr, "op %d not found in op_names\n", opcode); - _mesa_exit(-1); - return 0; +#endif } -static GLboolean valid_dst(struct r300_vertex_program *vp, - struct prog_dst_register *dst) +static void t_opcode_arl(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) { - if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) { - return GL_FALSE; - } else if (dst->File == PROGRAM_ADDRESS) { - assert(dst->Index == 0); - } + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ARL, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} - return GL_TRUE; +static void t_opcode_dp3(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + SWIZZLE_ZERO, t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + + o_inst->src[2] = ZERO_SRC_1; } -/* TODO: Get rid of t_src_class call */ -#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ - ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \ - t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \ - (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \ - t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \ +static void t_opcode_dp4(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} -#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static void t_opcode_dph(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), + VSF_IN_COMPONENT_ONE, t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} -#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) +static void t_opcode_dst(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_DST, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} -#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - SWIZZLE_ZERO, SWIZZLE_ZERO, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) +static void t_opcode_ex2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_EX2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} -#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4)) +static void t_opcode_exp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_EXP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} -#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4)) +static struct r300_vertprog_instruction *t_opcode_flr(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} + ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; + o_inst++; + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = + MAKE_VSF_SOURCE(*u_temp_i, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + /* Not 100% sure about this */ + (!src[0]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE + /*VSF_FLAG_ALL */ ); + + o_inst->src[2] = ZERO_SRC_0; + (*u_temp_i)--; + + return o_inst; +} -#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - SWIZZLE_ONE, SWIZZLE_ONE, \ - t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4)) +static void t_opcode_frc(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} -/* DP4 version seems to trigger some hw peculiarity */ -//#define PREFER_DP4 +static void t_opcode_lg2(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[0].RelAddr << 4); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} -#define FREE_TEMPS() \ - do { \ - if(u_temp_i < vp->num_temporaries) { \ - WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \ - vp->native = GL_FALSE; \ - } \ - u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ - } while (0) +static void t_opcode_lit(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + /* NOTE: Users swizzling might not work. */ + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + VSF_IN_COMPONENT_ZERO, // z + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); +} -static void r300TranslateVertexShader(struct r300_vertex_program *vp, - struct prog_instruction *vpi) +static void t_opcode_log(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_LOG, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} + +static void t_opcode_mad(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) { - int i, cur_reg = 0; - VERTEX_SHADER_INSTRUCTION *o_inst; - unsigned long operands; - int are_srcs_scalar; unsigned long hw_op; - /* Initial value should be last tmp reg that hw supports. - Strangely enough r300 doesnt mind even though these would be out of range. - Smart enough to realize that it doesnt need it? */ - int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; - struct prog_src_register src[3]; - vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == PROGRAM_TEMPORARY + && src[2].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = t_src(vp, &src[2]); +} + +static void t_opcode_max(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} + +static void t_opcode_min(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_MIN, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} + +static void t_opcode_mov(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ONE_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#endif +} + +static void t_opcode_mul(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + // HW mul can take third arg but appears to have some other limitations. + + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + + o_inst->src[2] = ZERO_SRC_1; +} + +static void t_opcode_pow(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = t_src_scalar(vp, &src[1]); +} + +static void t_opcode_rcp(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_RCP, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} + +static void t_opcode_rsq(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_RSQ, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src_scalar(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +} + +static void t_opcode_sge(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_SGE, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} + +static void t_opcode_slt(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_SLT, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = t_src(vp, &src[1]); + o_inst->src[2] = ZERO_SRC_1; +} + +static void t_opcode_sub(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + unsigned long hw_op; + + //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W + +#if 1 + hw_op = (src[0].File == PROGRAM_TEMPORARY + && src[1].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ONE_SRC_0; + o_inst->src[2] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); +#else + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = + MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | + (src[1].RelAddr << 4); + o_inst->src[2] = 0; +#endif +} + +static void t_opcode_swz(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3]) +{ + //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} + +#if 1 + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ZERO_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#else + hw_op = + (src[0].File == + PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : + R300_VPI_OUT_OP_MAD; + + o_inst->opcode = + MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + o_inst->src[0] = t_src(vp, &src[0]); + o_inst->src[1] = ONE_SRC_0; + o_inst->src[2] = ZERO_SRC_0; +#endif +} + +static struct r300_vertprog_instruction *t_opcode_xpd(struct r300_vertex_program *vp, + struct prog_instruction *vpi, + struct r300_vertprog_instruction *o_inst, + struct prog_src_register src[3], + int *u_temp_i) +{ + /* mul r0, r1.yzxw, r2.zxyw + mad r0, -r2.yzxw, r1.zxyw, r0 + NOTE: might need MAD_2 + */ + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, *u_temp_i, + t_dst_mask(vpi->DstReg.WriteMask), + VSF_OUT_CLASS_TMP); + + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + src[1]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1]. + RelAddr << 4); + + o_inst->src[2] = ZERO_SRC_1; + o_inst++; + (*u_temp_i)--; + + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg), + t_dst_mask(vpi->DstReg.WriteMask), + t_dst_class(vpi->DstReg.File)); + + o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w + t_src_class(src[1].File), + (!src[1]. + NegateBase) ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[1]. + RelAddr << 4); + + o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z + t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x + t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y + t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w + t_src_class(src[0].File), + src[0]. + NegateBase ? VSF_FLAG_ALL : + VSF_FLAG_NONE) | (src[0]. + RelAddr << 4); + + o_inst->src[2] = + MAKE_VSF_SOURCE(*u_temp_i + 1, VSF_IN_COMPONENT_X, + VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z, + VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP, + VSF_FLAG_NONE); + + return o_inst; +} + +static void t_inputs_outputs(struct r300_vertex_program *vp) +{ + int i; + int cur_reg = 0; for (i = 0; i < VERT_ATTRIB_MAX; i++) vp->inputs[i] = -1; @@ -437,39 +1046,71 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)); - /* Assign outputs */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) { vp->outputs[VERT_RESULT_HPOS] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) - vp->outputs[VERT_RESULT_COL1] = cur_reg++; - -#if 0 /* Not supported yet */ - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) - vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { + vp->outputs[VERT_RESULT_COL1] = + vp->outputs[VERT_RESULT_COL0] + 1; + cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) - vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + vp->outputs[VERT_RESULT_BFC0] = + vp->outputs[VERT_RESULT_COL0] + 2; + cur_reg = vp->outputs[VERT_RESULT_BFC0] + 1; + } - if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) + if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + vp->outputs[VERT_RESULT_BFC1] = + vp->outputs[VERT_RESULT_COL0] + 3; + cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + } +#if 0 + if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) { vp->outputs[VERT_RESULT_FOGC] = cur_reg++; + } #endif - for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) - if (vp->key.OutputsWritten & (1 << i)) + for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { + if (vp->key.OutputsWritten & (1 << i)) { vp->outputs[i] = cur_reg++; + } + } +} + +static void r300TranslateVertexShader(struct r300_vertex_program *vp, + struct prog_instruction *vpi) +{ + int i; + struct r300_vertprog_instruction *o_inst; + unsigned long num_operands; + /* Initial value should be last tmp reg that hw supports. + Strangely enough r300 doesnt mind even though these would be out of range. + Smart enough to realize that it doesnt need it? */ + int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1; + struct prog_src_register src[3]; + vp->pos_end = 0; /* Not supported yet */ + vp->program.length = 0; + /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ vp->translated = GL_TRUE; vp->native = GL_TRUE; - o_inst = vp->program.body.i; - for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) { + t_inputs_outputs(vp); + + for (o_inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + vpi++, o_inst++) { + FREE_TEMPS(); if (!valid_dst(vp, &vpi->DstReg)) { @@ -478,29 +1119,30 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, vpi->DstReg.Index = u_temp_i; } - operands = op_operands(vpi->Opcode); - are_srcs_scalar = operands & SCALAR_FLAG; - operands &= OP_MASK; + num_operands = _mesa_num_inst_src_regs(vpi->Opcode); - for (i = 0; i < operands; i++) + /* copy the sources (src) from mesa into a local variable... is this needed? */ + for (i = 0; i < num_operands; i++) { src[i] = vpi->SrcReg[i]; + } - if (operands == 3) { /* TODO: scalars */ + if (num_operands == 3) { /* TODO: scalars */ if (CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), + MAKE_VSF_SOURCE(t_src_index + (vp, &src[2]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[2].File), - VSF_FLAG_NONE) | (src[2]. - RelAddr << - 4); + t_src_class(src[2]. + File), + VSF_FLAG_NONE) | + (src[2].RelAddr << 4); o_inst->src[1] = ZERO_SRC_2; o_inst->src[2] = ZERO_SRC_2; @@ -511,24 +1153,24 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, src[2].RelAddr = 0; u_temp_i--; } - } - if (operands >= 2) { + if (num_operands >= 2) { if (CMP_SRCS(src[1], src[0])) { - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i, - VSF_FLAG_ALL, + o_inst->opcode = + MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, + u_temp_i, VSF_FLAG_ALL, VSF_OUT_CLASS_TMP); o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), + MAKE_VSF_SOURCE(t_src_index + (vp, &src[0]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, - t_src_class(src[0].File), - VSF_FLAG_NONE) | (src[0]. - RelAddr << - 4); + t_src_class(src[0]. + File), + VSF_FLAG_NONE) | + (src[0].RelAddr << 4); o_inst->src[1] = ZERO_SRC_0; o_inst->src[2] = ZERO_SRC_0; @@ -541,517 +1183,101 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, } } - /* These ops need special handling. */ switch (vpi->Opcode) { - case OPCODE_POW: - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_POW, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = t_src_scalar(vp, &src[1]); - goto next; - - case OPCODE_MOV: //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} - case OPCODE_SWZ: -#if 1 - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#else - hw_op = - (src[0].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = ZERO_SRC_0; -#endif - - goto next; - + case OPCODE_ABS: + t_opcode_abs(vp, vpi, o_inst, src); + break; case OPCODE_ADD: -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = ONE_SRC_0; - o_inst->src[1] = t_src(vp, &src[0]); - o_inst->src[2] = t_src(vp, &src[1]); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - -#endif - goto next; - - case OPCODE_MAD: - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == PROGRAM_TEMPORARY && - src[2].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - goto next; - - case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */ - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_DP3: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - SWIZZLE_ZERO, - t_src_class(src[1].File), - src[1]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_SUB: //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W -#if 1 - hw_op = (src[0].File == PROGRAM_TEMPORARY && - src[1].File == - PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : - R300_VPI_OUT_OP_MAD; - - o_inst->op = - MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ONE_SRC_0; - o_inst->src[2] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); -#else - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), - t_swizzle(GET_SWZ - (src[1].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[1].Swizzle, 3)), - t_src_class(src[1].File), - (!src[1]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[1]. - RelAddr << 4); - o_inst->src[2] = 0; -#endif - goto next; - - case OPCODE_ABS: //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 3)), - t_src_class(src[0].File), - (!src[0]. - NegateBase) ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[2] = 0; - goto next; - + t_opcode_add(vp, vpi, o_inst, src); + break; + case OPCODE_ARL: + t_opcode_arl(vp, vpi, o_inst, src); + break; + case OPCODE_DP3: + t_opcode_dp3(vp, vpi, o_inst, src); + break; + case OPCODE_DP4: + t_opcode_dp4(vp, vpi, o_inst, src); + break; + case OPCODE_DPH: + t_opcode_dph(vp, vpi, o_inst, src); + break; + case OPCODE_DST: + t_opcode_dst(vp, vpi, o_inst, src); + break; + case OPCODE_EX2: + t_opcode_ex2(vp, vpi, o_inst, src); + break; + case OPCODE_EXP: + t_opcode_exp(vp, vpi, o_inst, src); + break; case OPCODE_FLR: - /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} - ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - o_inst++; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = MAKE_VSF_SOURCE(u_temp_i, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - /* Not 100% sure about this */ - (!src[0]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE - /*VSF_FLAG_ALL */ ); - - o_inst->src[2] = ZERO_SRC_0; - u_temp_i--; - goto next; - - case OPCODE_LG2: // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_ALL : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - goto next; - - case OPCODE_LIT: //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - /* NOTE: Users swizzling might not work. */ - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - VSF_IN_COMPONENT_ZERO, // z - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - goto next; - - case OPCODE_DPH: //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = - MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), - t_swizzle(GET_SWZ - (src[0].Swizzle, 0)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 1)), - t_swizzle(GET_SWZ - (src[0].Swizzle, 2)), - VSF_IN_COMPONENT_ONE, - t_src_class(src[0].File), - src[0]. - NegateBase ? VSF_FLAG_XYZ : - VSF_FLAG_NONE) | (src[0]. - RelAddr << 4); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - goto next; - - case OPCODE_XPD: - /* mul r0, r1.yzxw, r2.zxyw - mad r0, -r2.yzxw, r1.zxyw, r0 - NOTE: might need MAD_2 - */ - - o_inst->op = MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i, - t_dst_mask(vpi->DstReg. - WriteMask), - VSF_OUT_CLASS_TMP); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - src[1]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[2] = ZERO_SRC_1; - o_inst++; - u_temp_i--; - - o_inst->op = - MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w - t_src_class(src[1]. - File), - (!src[1]. - NegateBase) ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[1].RelAddr << 4); - - o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z - t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x - t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y - t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w - t_src_class(src[0]. - File), - src[0]. - NegateBase ? - VSF_FLAG_ALL : - VSF_FLAG_NONE) | - (src[0].RelAddr << 4); - - o_inst->src[2] = MAKE_VSF_SOURCE(u_temp_i + 1, - VSF_IN_COMPONENT_X, - VSF_IN_COMPONENT_Y, - VSF_IN_COMPONENT_Z, - VSF_IN_COMPONENT_W, - VSF_IN_CLASS_TMP, - VSF_FLAG_NONE); - - goto next; - - case OPCODE_RCC: - fprintf(stderr, "Dont know how to handle op %d yet\n", - vpi->Opcode); - _mesa_exit(-1); + /* FIXME */ + o_inst = t_opcode_flr(vp, vpi, o_inst, src, &u_temp_i); + break; + case OPCODE_FRC: + t_opcode_frc(vp, vpi, o_inst, src); + break; + case OPCODE_LG2: + t_opcode_lg2(vp, vpi, o_inst, src); + break; + case OPCODE_LIT: + t_opcode_lit(vp, vpi, o_inst, src); + break; + case OPCODE_LOG: + t_opcode_log(vp, vpi, o_inst, src); + break; + case OPCODE_MAD: + t_opcode_mad(vp, vpi, o_inst, src); + break; + case OPCODE_MAX: + t_opcode_max(vp, vpi, o_inst, src); + break; + case OPCODE_MIN: + t_opcode_min(vp, vpi, o_inst, src); + break; + case OPCODE_MOV: + t_opcode_mov(vp, vpi, o_inst, src); break; - case OPCODE_END: + case OPCODE_MUL: + t_opcode_mul(vp, vpi, o_inst, src); + break; + case OPCODE_POW: + t_opcode_pow(vp, vpi, o_inst, src); + break; + case OPCODE_RCP: + t_opcode_rcp(vp, vpi, o_inst, src); + break; + case OPCODE_RSQ: + t_opcode_rsq(vp, vpi, o_inst, src); + break; + case OPCODE_SGE: + t_opcode_sge(vp, vpi, o_inst, src); + break; + case OPCODE_SLT: + t_opcode_slt(vp, vpi, o_inst, src); + break; + case OPCODE_SUB: + t_opcode_sub(vp, vpi, o_inst, src); + break; + case OPCODE_SWZ: + t_opcode_swz(vp, vpi, o_inst, src); + break; + case OPCODE_XPD: + /* FIXME */ + o_inst = t_opcode_xpd(vp, vpi, o_inst, src, &u_temp_i); break; default: + assert(0); break; } - - o_inst->op = - MAKE_VSF_OP(t_opcode(vpi->Opcode), - t_dst_index(vp, &vpi->DstReg), - t_dst_mask(vpi->DstReg.WriteMask), - t_dst_class(vpi->DstReg.File)); - - if (are_srcs_scalar) { - switch (operands) { - case 1: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src_scalar(vp, &src[0]); - o_inst->src[1] = t_src_scalar(vp, &src[1]); - o_inst->src[2] = t_src_scalar(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } else { - switch (operands) { - case 1: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = ZERO_SRC_0; - o_inst->src[2] = ZERO_SRC_0; - break; - - case 2: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = ZERO_SRC_1; - break; - - case 3: - o_inst->src[0] = t_src(vp, &src[0]); - o_inst->src[1] = t_src(vp, &src[1]); - o_inst->src[2] = t_src(vp, &src[2]); - break; - - default: - fprintf(stderr, - "scalars and op RCC not handled yet"); - _mesa_exit(-1); - break; - } - } - next:; } - /* Will most likely segfault before we get here... fix later. */ - if (o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH / 4) { + vp->program.length = (o_inst - vp->program.body.i) * 4; + if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { vp->program.length = 0; vp->native = GL_FALSE; - return; } - vp->program.length = (o_inst - vp->program.body.i) * 4; #if 0 fprintf(stderr, "hw program:\n"); for (i = 0; i < vp->program.length; i++) @@ -1065,7 +1291,8 @@ static void position_invariant(struct gl_program *prog) struct gl_program_parameter_list *paramList; int i; - gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 }; + gl_state_index tokens[STATE_LENGTH] = + { STATE_MVP_MATRIX, 0, 0, 0, 0 }; /* tokens[4] = matrix modifier */ #ifdef PREFER_DP4 @@ -1159,8 +1386,8 @@ static void insert_wpos(struct r300_vertex_program *vp, prog->NumInstructions - 1); /* END */ _mesa_copy_instructions(&vpi[prog->NumInstructions + 1], - &prog->Instructions[prog->NumInstructions - 1], - 1); + &prog->Instructions[prog->NumInstructions - + 1], 1); vpi_insert = &vpi[prog->NumInstructions - 1]; vpi_insert[i].Opcode = OPCODE_MOV; @@ -1206,8 +1433,8 @@ static void pos_as_texcoord(struct r300_vertex_program *vp, prog->NumTemporaries++; for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) { - if (vpi->DstReg.File == PROGRAM_OUTPUT && - vpi->DstReg.Index == VERT_RESULT_HPOS) { + if (vpi->DstReg.File == PROGRAM_OUTPUT + && vpi->DstReg.Index == VERT_RESULT_HPOS) { vpi->DstReg.File = PROGRAM_TEMPORARY; vpi->DstReg.Index = tempregi; } @@ -1223,20 +1450,18 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key vp = _mesa_calloc(sizeof(*vp)); _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key)); - vp->wpos_idx = wpos_idx; if (mesa_vp->IsPositionInvariant) { position_invariant(&mesa_vp->Base); } - if (wpos_idx > -1) + if (wpos_idx > -1) { pos_as_texcoord(vp, &mesa_vp->Base); + } assert(mesa_vp->Base.NumInstructions); - vp->num_temporaries = mesa_vp->Base.NumTemporaries; - r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); return vp; @@ -1252,11 +1477,10 @@ void r300SelectVertexShader(r300ContextPtr r300) struct r300_vertex_program *vp; GLint wpos_idx; - vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; + vpc = + (struct r300_vertex_program_cont *)ctx->VertexProgram._Current; InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - wpos_idx = -1; if (InputsRead & FRAG_BIT_WPOS) { for (i = 0; i < ctx->Const.MaxTextureUnits; i++) @@ -1271,28 +1495,35 @@ void r300SelectVertexShader(r300ContextPtr r300) InputsRead |= (FRAG_BIT_TEX0 << i); wpos_idx = i; } + wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; + wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + + wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS; - if (InputsRead & FRAG_BIT_COL0) + if (InputsRead & FRAG_BIT_COL0) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0; + } - if ((InputsRead & FRAG_BIT_COL1) /*|| - (InputsRead & FRAG_BIT_FOGC) */ ) + if ((InputsRead & FRAG_BIT_COL1)) { wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1; + } - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) - if (InputsRead & (FRAG_BIT_TEX0 << i)) + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); + } + } - wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead; if (vpc->mesa_program.IsPositionInvariant) { /* we wan't position don't we ? */ wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); } for (vp = vpc->progs; vp; vp = vp->next) - if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == - 0) { + if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) + == 0) { r300->selected_vp = vp; return; } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 252d5a901f..2d399e243a 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -3,10 +3,10 @@ #include "r300_reg.h" -typedef struct { - GLuint op; +struct r300_vertprog_instruction { + GLuint opcode; GLuint src[3]; -} VERTEX_SHADER_INSTRUCTION; +}; #define VSF_FLAG_X 1 #define VSF_FLAG_Y 2 diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 3d20ba7d14..841c6a5302 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.1 + * Version: 7.0.1 * - * Copyright (C) 1999-2003 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -100,6 +100,11 @@ _mesa_validate_DrawElements(GLcontext *ctx, (const GLubyte *) indices); } } + else { + /* not using a VBO */ + if (!indices) + return GL_FALSE; + } if (ctx->Const.CheckArrayBounds) { /* find max array index */ @@ -170,6 +175,16 @@ _mesa_validate_DrawRangeElements(GLcontext *ctx, GLenum mode, && !(ctx->VertexProgram._Enabled && ctx->Array.ArrayObj->VertexAttrib[0].Enabled)) return GL_FALSE; + /* Vertex buffer object tests */ + if (ctx->Array.ElementArrayBufferObj->Name) { + /* XXX re-use code from above? */ + } + else { + /* not using VBO */ + if (!indices) + return GL_FALSE; + } + if (ctx->Const.CheckArrayBounds) { /* Find max array index. * We don't trust the user's start and end values. diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 21adcf3210..2ad1badac7 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -1495,9 +1495,20 @@ _mesa_make_current( GLcontext *newCtx, GLframebuffer *drawBuffer, */ if (!newCtx->DrawBuffer || newCtx->DrawBuffer->Name == 0) { _mesa_reference_framebuffer(&newCtx->DrawBuffer, drawBuffer); + /* fix up the fb fields - these will end up wrong otherwise + if the DRIdrawable changes, and someone may rely on them. + */ + /* What a mess!?! */ + int i; + GLenum buffers[MAX_DRAW_BUFFERS]; + for(i = 0; i < newCtx->Const.MaxDrawBuffers; i++) { + buffers[i] = newCtx->Color.DrawBuffer[i]; + } + _mesa_drawbuffers(newCtx, newCtx->Const.MaxDrawBuffers, buffers, NULL); } if (!newCtx->ReadBuffer || newCtx->ReadBuffer->Name == 0) { _mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer); + _mesa_ReadBuffer(newCtx->Pixel.ReadBuffer); } newCtx->NewState |= _NEW_BUFFERS; diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 1fd31a5321..dc10d9ffbc 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -65,7 +65,9 @@ compute_depth_max(struct gl_framebuffer *fb) fb->_DepthMax = 0xffffffff; } fb->_DepthMaxF = (GLfloat) fb->_DepthMax; - fb->_MRD = 1.0; /* Minimum resolvable depth value, for polygon offset */ + + /* Minimum resolvable depth value, for polygon offset */ + fb->_MRD = 1.0 / fb->_DepthMaxF; } diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index e874719e64..ba46cdc1b1 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -3882,9 +3882,19 @@ _mesa_pack_stencil_span( const GLcontext *ctx, GLuint n, } \ } while (0) + +/** + * Unpack a row of depth/z values from memory, returning GLushort, GLuint + * or GLfloat values. + * The glPixelTransfer (scale/bias) params will be applied. + * + * \param dstType one of GL_UNSIGNED_SHORT, GL_UNSIGNED_INT, GL_FLOAT + * \param depthMax max value for returned GLushort or GLuint values + * (ignored for GLfloat). + */ void _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, - GLenum dstType, GLvoid *dest, GLfloat depthScale, + GLenum dstType, GLvoid *dest, GLuint depthMax, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking ) { @@ -3907,7 +3917,9 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, } return; } - if (srcType == GL_UNSIGNED_SHORT && dstType == GL_UNSIGNED_INT) { + if (srcType == GL_UNSIGNED_SHORT + && dstType == GL_UNSIGNED_INT + && depthMax == 0xffffffff) { const GLushort *src = (const GLushort *) source; GLuint *dst = (GLuint *) dest; GLuint i; @@ -3955,7 +3967,7 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, break; case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */ if (dstType == GL_UNSIGNED_INT && - depthScale == (GLfloat) 0xffffff && + depthMax == 0xffffff && ctx->Pixel.DepthScale == 1.0 && ctx->Pixel.DepthBias == 0.0) { const GLuint *src = (const GLuint *) source; @@ -4033,16 +4045,16 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, if (dstType == GL_UNSIGNED_INT) { GLuint *zValues = (GLuint *) dest; GLuint i; - if (depthScale <= (GLfloat) 0xffffff) { + if (depthMax <= 0xffffff) { /* no overflow worries */ for (i = 0; i < n; i++) { - zValues[i] = (GLuint) (depthValues[i] * depthScale); + zValues[i] = (GLuint) (depthValues[i] * (GLfloat) depthMax); } } else { /* need to use double precision to prevent overflow problems */ for (i = 0; i < n; i++) { - GLdouble z = depthValues[i] * depthScale; + GLdouble z = depthValues[i] * (GLfloat) depthMax; if (z >= (GLdouble) 0xffffffff) zValues[i] = 0xffffffff; else @@ -4053,14 +4065,14 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, else if (dstType == GL_UNSIGNED_SHORT) { GLushort *zValues = (GLushort *) dest; GLuint i; - ASSERT(depthScale <= 65535.0); + ASSERT(depthMax <= 0xffff); for (i = 0; i < n; i++) { - zValues[i] = (GLushort) (depthValues[i] * depthScale); + zValues[i] = (GLushort) (depthValues[i] * (GLfloat) depthMax); } } else { ASSERT(dstType == GL_FLOAT); - ASSERT(depthScale == 1.0F); + /*ASSERT(depthMax == 1.0F);*/ } } diff --git a/src/mesa/main/image.h b/src/mesa/main/image.h index 990398a7c4..2a16989fa7 100644 --- a/src/mesa/main/image.h +++ b/src/mesa/main/image.h @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5.2 + * Version: 7.1 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -181,7 +181,7 @@ _mesa_pack_stencil_span( const GLcontext *ctx, GLuint n, extern void _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, - GLenum dstType, GLvoid *dest, GLfloat depthScale, + GLenum dstType, GLvoid *dest, GLuint depthMax, GLenum srcType, const GLvoid *source, const struct gl_pixelstore_attrib *srcPacking ); diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index f902365b9b..3420d8e2ba 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1264,6 +1264,10 @@ _mesa_init_teximage_fields(GLcontext *ctx, GLenum target, * A hardware driver might override this function if, for example, the * max 3D texture size is 512x512x64 (i.e. not a cube). * + * Note that width, height, depth == 0 is not an error. However, a + * texture with zero width/height/depth will be considered "incomplete" + * and texturing will effectively be disabled. + * * \param target one of GL_PROXY_TEXTURE_1D, GL_PROXY_TEXTURE_2D, * GL_PROXY_TEXTURE_3D, GL_PROXY_TEXTURE_RECTANGLE_NV, * GL_PROXY_TEXTURE_CUBE_MAP_ARB. @@ -1293,7 +1297,7 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width >0 && _mesa_bitcount(width - 2 * border) != 1) || level >= ctx->Const.MaxTextureLevels) { /* bad width or level */ return GL_FALSE; @@ -1303,10 +1307,10 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width > 0 && _mesa_bitcount(width - 2 * border) != 1) || height < 2 * border || height > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || + height > 0 && _mesa_bitcount(height - 2 * border) != 1) || level >= ctx->Const.MaxTextureLevels) { /* bad width or height or level */ return GL_FALSE; @@ -1316,21 +1320,21 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width > 0 && _mesa_bitcount(width - 2 * border) != 1) || height < 2 * border || height > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || + height > 0 && _mesa_bitcount(height - 2 * border) != 1) || depth < 2 * border || depth > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(depth - 2 * border) != 1) || + depth > 0 && _mesa_bitcount(depth - 2 * border) != 1) || level >= ctx->Const.Max3DTextureLevels) { /* bad width or height or depth or level */ return GL_FALSE; } return GL_TRUE; case GL_PROXY_TEXTURE_RECTANGLE_NV: - if (width < 1 || width > ctx->Const.MaxTextureRectSize || - height < 1 || height > ctx->Const.MaxTextureRectSize || + if (width < 0 || width > ctx->Const.MaxTextureRectSize || + height < 0 || height > ctx->Const.MaxTextureRectSize || level != 0) { /* bad width or height or level */ return GL_FALSE; @@ -1340,10 +1344,10 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.MaxCubeTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width > 0 && _mesa_bitcount(width - 2 * border) != 1) || height < 2 * border || height > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || + height > 0 && _mesa_bitcount(height - 2 * border) != 1) || level >= ctx->Const.MaxCubeTextureLevels) { /* bad width or height */ return GL_FALSE; @@ -1353,7 +1357,7 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width > 0 && _mesa_bitcount(width - 2 * border) != 1) || level >= ctx->Const.MaxTextureLevels) { /* bad width or level */ return GL_FALSE; @@ -1367,10 +1371,10 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, maxSize = 1 << (ctx->Const.MaxTextureLevels - 1); if (width < 2 * border || width > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(width - 2 * border) != 1) || + width > 0 && _mesa_bitcount(width - 2 * border) != 1) || height < 2 * border || height > 2 + maxSize || (!ctx->Extensions.ARB_texture_non_power_of_two && - _mesa_bitcount(height - 2 * border) != 1) || + height > 0 && _mesa_bitcount(height - 2 * border) != 1) || level >= ctx->Const.MaxTextureLevels) { /* bad width or height or level */ return GL_FALSE; @@ -1472,7 +1476,7 @@ texture_error_check( GLcontext *ctx, GLenum target, if (target == GL_PROXY_TEXTURE_1D || target == GL_TEXTURE_1D) { proxy_target = GL_PROXY_TEXTURE_1D; height = 1; - width = 1; + depth = 1; } else { _mesa_error( ctx, GL_INVALID_ENUM, "glTexImage1D(target)" ); diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 9b8a06df14..3b5151ed17 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1077,7 +1077,7 @@ _mesa_texstore_rgba(TEXSTORE_PARAMS) GLboolean _mesa_texstore_z32(TEXSTORE_PARAMS) { - const GLfloat depthScale = (GLfloat) 0xffffffff; + const GLuint depthScale = 0xffffffff; (void) dims; ASSERT(dstFormat == &_mesa_texformat_z32); ASSERT(dstFormat->TexelBytes == sizeof(GLuint)); @@ -1124,7 +1124,7 @@ _mesa_texstore_z32(TEXSTORE_PARAMS) GLboolean _mesa_texstore_z16(TEXSTORE_PARAMS) { - const GLfloat depthScale = 65535.0f; + const GLuint depthScale = 0xffff; (void) dims; ASSERT(dstFormat == &_mesa_texformat_z16); ASSERT(dstFormat->TexelBytes == sizeof(GLushort)); @@ -2319,6 +2319,8 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS) GLboolean _mesa_texstore_z24_s8(TEXSTORE_PARAMS) { + const GLuint depthScale = 0xffffff; + ASSERT(dstFormat == &_mesa_texformat_z24_s8); ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT); ASSERT(srcType == GL_UNSIGNED_INT_24_8_EXT); @@ -2357,7 +2359,7 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) _mesa_unpack_depth_span(ctx, srcWidth, GL_UNSIGNED_INT, /* dst type */ dstRow, /* dst addr */ - (GLfloat) 0xffffff, /* depthScale */ + depthScale, srcType, src, srcPacking); /* get the 8-bit stencil values */ _mesa_unpack_stencil_span(ctx, srcWidth, diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 66509d56db..1a931326af 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -779,7 +779,7 @@ _mesa_get_programiv(GLcontext *ctx, GLuint program, *params = shProg->Validated; break; case GL_INFO_LOG_LENGTH: - *params = shProg->InfoLog ? strlen(shProg->InfoLog) : 0; + *params = shProg->InfoLog ? strlen(shProg->InfoLog) + 1 : 0; break; case GL_ATTACHED_SHADERS: *params = shProg->NumShaders; @@ -831,10 +831,10 @@ _mesa_get_shaderiv(GLcontext *ctx, GLuint name, GLenum pname, GLint *params) *params = shader->CompileStatus; break; case GL_INFO_LOG_LENGTH: - *params = shader->InfoLog ? strlen(shader->InfoLog) : 0; + *params = shader->InfoLog ? strlen(shader->InfoLog) + 1 : 0; break; case GL_SHADER_SOURCE_LENGTH: - *params = shader->Source ? strlen((char *) shader->Source) : 0; + *params = shader->Source ? strlen((char *) shader->Source) + 1 : 0; break; default: _mesa_error(ctx, GL_INVALID_ENUM, "glGetShaderiv(pname)"); diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 2b5196f095..35e80e0452 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 6.5.3 + * Version: 7.1 * * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. * @@ -2328,7 +2328,8 @@ _slang_gen_field(slang_assemble_ctx * A, slang_operation *oper) n = _slang_gen_swizzle(n, swizzle); return n; } - else if (ti.spec.type == SLANG_SPEC_FLOAT) { + else if ( ti.spec.type == SLANG_SPEC_FLOAT + || ti.spec.type == SLANG_SPEC_INT) { const GLuint rows = 1; slang_swizzle swz; slang_ir_node *n; diff --git a/src/mesa/swrast/s_drawpix.c b/src/mesa/swrast/s_drawpix.c index 1c9f64b275..0cf425e1c6 100644 --- a/src/mesa/swrast/s_drawpix.c +++ b/src/mesa/swrast/s_drawpix.c @@ -484,7 +484,7 @@ draw_depth_pixels( GLcontext *ctx, GLint x, GLint y, } else { /* General case */ - const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF; + const GLuint depthMax = ctx->DrawBuffer->_DepthMax; GLint skipPixels = 0; /* in case width > MAX_WIDTH do the copy in chunks */ @@ -695,7 +695,7 @@ draw_depth_stencil_pixels(GLcontext *ctx, GLint x, GLint y, const GLint imgX = x, imgY = y; const GLboolean scaleOrBias = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0; - const GLfloat depthScale = ctx->DrawBuffer->_DepthMaxF; + const GLuint depthMax = ctx->DrawBuffer->_DepthMax; const GLuint stencilMask = ctx->Stencil.WriteMask[0]; const GLuint stencilType = (STENCIL_BITS == 8) ? GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT; @@ -783,7 +783,7 @@ draw_depth_stencil_pixels(GLcontext *ctx, GLint x, GLint y, /* general case */ GLuint zValues[MAX_WIDTH]; /* 16 or 32-bit Z value storage */ _mesa_unpack_depth_span(ctx, width, - depthRb->DataType, zValues, depthScale, + depthRb->DataType, zValues, depthMax, type, depthStencilSrc, &clippedUnpack); if (zoom) { _swrast_write_zoomed_z_span(ctx, imgX, imgY, width, x, diff --git a/src/mesa/swrast/s_stencil.c b/src/mesa/swrast/s_stencil.c index 89991fad02..d0cbdd6917 100644 --- a/src/mesa/swrast/s_stencil.c +++ b/src/mesa/swrast/s_stencil.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5 + * Version: 7.1 * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -392,6 +392,23 @@ do_stencil_test( GLcontext *ctx, GLuint face, GLuint n, GLstencil stencil[], } +/** + * Compute the zpass/zfail masks by comparing the pre- and post-depth test + * masks. + */ +static INLINE void +compute_pass_fail_masks(GLuint n, const GLubyte origMask[], + const GLubyte newMask[], + GLubyte passMask[], GLubyte failMask[]) +{ + GLuint i; + for (i = 0; i < n; i++) { + ASSERT(newMask[i] == 0 || newMask[i] == 1); + passMask[i] = origMask[i] & newMask[i]; + failMask[i] = origMask[i] & (newMask[i] ^ 1); + } +} + /** * Apply stencil and depth testing to the span of pixels. @@ -460,39 +477,24 @@ stencil_and_ztest_span(GLcontext *ctx, SWspan *span, GLuint face) /* * Perform depth buffering, then apply zpass or zfail stencil function. */ - GLubyte passmask[MAX_WIDTH], failmask[MAX_WIDTH], oldmask[MAX_WIDTH]; - GLuint i; + GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; /* save the current mask bits */ - _mesa_memcpy(oldmask, mask, n * sizeof(GLubyte)); + _mesa_memcpy(origMask, mask, n * sizeof(GLubyte)); /* apply the depth test */ _swrast_depth_test_span(ctx, span); - /* Set the stencil pass/fail flags according to result of depth testing. - * if oldmask[i] == 0 then - * Don't touch the stencil value - * else if oldmask[i] and newmask[i] then - * Depth test passed - * else - * assert(oldmask[i] && !newmask[i]) - * Depth test failed - * endif - */ - for (i=0;i<n;i++) { - ASSERT(mask[i] == 0 || mask[i] == 1); - passmask[i] = oldmask[i] & mask[i]; - failmask[i] = oldmask[i] & (mask[i] ^ 1); - } + compute_pass_fail_masks(n, origMask, mask, passMask, failMask); /* apply the pass and fail operations */ if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { apply_stencil_op( ctx, ctx->Stencil.ZFailFunc[face], face, - n, stencil, failmask ); + n, stencil, failMask ); } if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, - n, stencil, passmask ); + n, stencil, passMask ); } } @@ -902,6 +904,7 @@ stencil_test_pixels( GLcontext *ctx, GLuint face, GLuint n, static GLboolean stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face ) { + GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; struct gl_framebuffer *fb = ctx->DrawBuffer; struct gl_renderbuffer *rb = fb->_StencilBuffer; const GLuint n = span->end; @@ -916,13 +919,10 @@ stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face ) if (!rb->GetPointer(ctx, rb, 0, 0)) { /* No direct access */ GLstencil stencil[MAX_WIDTH]; - GLubyte origMask[MAX_WIDTH]; ASSERT(rb->DataType == GL_UNSIGNED_BYTE); _swrast_get_values(ctx, rb, n, x, y, stencil, sizeof(GLubyte)); - _mesa_memcpy(origMask, mask, n * sizeof(GLubyte)); - (void) do_stencil_test(ctx, face, n, stencil, mask); if (ctx->Depth.Test == GL_FALSE) { @@ -930,27 +930,19 @@ stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face ) n, stencil, mask); } else { + _mesa_memcpy(origMask, mask, n * sizeof(GLubyte)); + _swrast_depth_test_span(ctx, span); + compute_pass_fail_masks(n, origMask, mask, passMask, failMask); + if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { - GLubyte failmask[MAX_WIDTH]; - GLuint i; - for (i = 0; i < n; i++) { - ASSERT(mask[i] == 0 || mask[i] == 1); - failmask[i] = origMask[i] & (mask[i] ^ 1); - } apply_stencil_op(ctx, ctx->Stencil.ZFailFunc[face], face, - n, stencil, failmask); + n, stencil, failMask); } if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { - GLubyte passmask[MAX_WIDTH]; - GLuint i; - for (i = 0; i < n; i++) { - ASSERT(mask[i] == 0 || mask[i] == 1); - passmask[i] = origMask[i] & mask[i]; - } apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face, - n, stencil, passmask); + n, stencil, passMask); } } @@ -972,28 +964,21 @@ stencil_and_ztest_pixels( GLcontext *ctx, SWspan *span, GLuint face ) ctx->Stencil.ZPassFunc[face], face, mask); } else { - GLubyte passmask[MAX_WIDTH], failmask[MAX_WIDTH], oldmask[MAX_WIDTH]; - GLuint i; - - _mesa_memcpy(oldmask, mask, n * sizeof(GLubyte)); + _mesa_memcpy(origMask, mask, n * sizeof(GLubyte)); _swrast_depth_test_span(ctx, span); - for (i=0;i<n;i++) { - ASSERT(mask[i] == 0 || mask[i] == 1); - passmask[i] = oldmask[i] & mask[i]; - failmask[i] = oldmask[i] & (mask[i] ^ 1); - } + compute_pass_fail_masks(n, origMask, mask, passMask, failMask); if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { apply_stencil_op_to_pixels(ctx, n, x, y, ctx->Stencil.ZFailFunc[face], - face, failmask); + face, failMask); } if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { apply_stencil_op_to_pixels(ctx, n, x, y, ctx->Stencil.ZPassFunc[face], - face, passmask); + face, passMask); } } diff --git a/src/mesa/swrast_setup/ss_context.c b/src/mesa/swrast_setup/ss_context.c index f8a1cadfa5..a9c7d941e5 100644 --- a/src/mesa/swrast_setup/ss_context.c +++ b/src/mesa/swrast_setup/ss_context.c @@ -280,26 +280,29 @@ _swsetup_Translate( GLcontext *ctx, const void *vertex, SWvertex *dest ) /** XXX try to limit these loops someday */ for (i = 0 ; i < ctx->Const.MaxTextureCoordUnits ; i++) - _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_TEX0+i, + _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_TEX0 + i, dest->attrib[FRAG_ATTRIB_TEX0 + i] ); for (i = 0 ; i < ctx->Const.MaxVarying ; i++) - _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_GENERIC0+i, + _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_GENERIC0 + i, dest->attrib[FRAG_ATTRIB_VAR0 + i] ); - _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR0, tmp ); - /* XXX need float color FRAG_ATTRIB_COL0?? */ - UNCLAMPED_FLOAT_TO_RGBA_CHAN( dest->color, tmp ); + if (ctx->Visual.rgbMode) { + _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR0, + dest->attrib[FRAG_ATTRIB_COL0] ); + UNCLAMPED_FLOAT_TO_RGBA_CHAN( dest->color, tmp ); - _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR1, tmp ); - COPY_4V(dest->attrib[FRAG_ATTRIB_COL1], tmp); + _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR1, + dest->attrib[FRAG_ATTRIB_COL1]); + } + else { + _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR_INDEX, tmp ); + dest->attrib[FRAG_ATTRIB_CI][0] = tmp[0]; + } _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_FOG, tmp ); dest->attrib[FRAG_ATTRIB_FOGC][0] = tmp[0]; - _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_COLOR_INDEX, tmp ); - dest->attrib[FRAG_ATTRIB_CI][0] = tmp[0]; - /* XXX See _tnl_get_attr about pointsize ... */ _tnl_get_attr( ctx, vertex, _TNL_ATTRIB_POINTSIZE, tmp ); dest->pointSize = tmp[0]; diff --git a/src/mesa/swrast_setup/ss_tritmp.h b/src/mesa/swrast_setup/ss_tritmp.h index c14468e951..f6b738d60d 100644 --- a/src/mesa/swrast_setup/ss_tritmp.h +++ b/src/mesa/swrast_setup/ss_tritmp.h @@ -37,7 +37,7 @@ static void TAG(triangle)(GLcontext *ctx, GLuint e0, GLuint e1, GLuint e2 ) SWvertex *verts = SWSETUP_CONTEXT(ctx)->verts; SWvertex *v[3]; GLfloat z[3]; - GLfloat offset; + GLfloat offset, oz0, oz1, oz2; GLenum mode = GL_FILL; GLuint facing = 0; GLchan saved_color[3][4]; @@ -142,12 +142,16 @@ static void TAG(triangle)(GLcontext *ctx, GLuint e0, GLuint e1, GLuint e2 ) } } - if (IND & SS_OFFSET_BIT) - { - offset = ctx->Polygon.OffsetUnits * ctx->DrawBuffer->_MRD; + if (IND & SS_OFFSET_BIT) { + const GLfloat max = ctx->DrawBuffer->_DepthMaxF; + /* save original Z values (restored later) */ z[0] = v[0]->attrib[FRAG_ATTRIB_WPOS][2]; z[1] = v[1]->attrib[FRAG_ATTRIB_WPOS][2]; z[2] = v[2]->attrib[FRAG_ATTRIB_WPOS][2]; + /* Note that Z values are already scaled to [0,65535] (for example) + * so no MRD value is used here. + */ + offset = ctx->Polygon.OffsetUnits; if (cc * cc > 1e-16) { const GLfloat ez = z[0] - z[2]; const GLfloat fz = z[1] - z[2]; @@ -155,35 +159,33 @@ static void TAG(triangle)(GLcontext *ctx, GLuint e0, GLuint e1, GLuint e2 ) const GLfloat dzdx = FABSF((ey * fz - ez * fy) * oneOverArea); const GLfloat dzdy = FABSF((ez * fx - ex * fz) * oneOverArea); offset += MAX2(dzdx, dzdy) * ctx->Polygon.OffsetFactor; - /* Unfortunately, we need to clamp to prevent negative Zs below. - * Technically, we should do the clamping per-fragment. - */ - offset = MAX2(offset, -v[0]->attrib[FRAG_ATTRIB_WPOS][2]); - offset = MAX2(offset, -v[1]->attrib[FRAG_ATTRIB_WPOS][2]); - offset = MAX2(offset, -v[2]->attrib[FRAG_ATTRIB_WPOS][2]); } + /* new Z values */ + oz0 = CLAMP(v[0]->attrib[FRAG_ATTRIB_WPOS][2] + offset, 0.0, max); + oz1 = CLAMP(v[1]->attrib[FRAG_ATTRIB_WPOS][2] + offset, 0.0, max); + oz2 = CLAMP(v[2]->attrib[FRAG_ATTRIB_WPOS][2] + offset, 0.0, max); } } if (mode == GL_POINT) { if ((IND & SS_OFFSET_BIT) && ctx->Polygon.OffsetPoint) { - v[0]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[1]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[2]->attrib[FRAG_ATTRIB_WPOS][2] += offset; + v[0]->attrib[FRAG_ATTRIB_WPOS][2] = oz0; + v[1]->attrib[FRAG_ATTRIB_WPOS][2] = oz1; + v[2]->attrib[FRAG_ATTRIB_WPOS][2] = oz2; } _swsetup_render_point_tri( ctx, e0, e1, e2, facing ); } else if (mode == GL_LINE) { if ((IND & SS_OFFSET_BIT) && ctx->Polygon.OffsetLine) { - v[0]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[1]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[2]->attrib[FRAG_ATTRIB_WPOS][2] += offset; + v[0]->attrib[FRAG_ATTRIB_WPOS][2] = oz0; + v[1]->attrib[FRAG_ATTRIB_WPOS][2] = oz1; + v[2]->attrib[FRAG_ATTRIB_WPOS][2] = oz2; } _swsetup_render_line_tri( ctx, e0, e1, e2, facing ); } else { if ((IND & SS_OFFSET_BIT) && ctx->Polygon.OffsetFill) { - v[0]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[1]->attrib[FRAG_ATTRIB_WPOS][2] += offset; - v[2]->attrib[FRAG_ATTRIB_WPOS][2] += offset; + v[0]->attrib[FRAG_ATTRIB_WPOS][2] = oz0; + v[1]->attrib[FRAG_ATTRIB_WPOS][2] = oz1; + v[2]->attrib[FRAG_ATTRIB_WPOS][2] = oz2; } _swrast_Triangle( ctx, v[0], v[1], v[2] ); } |