diff options
author | Dave Airlie <airlied@linux.ie> | 2009-03-22 12:01:21 +1000 |
---|---|---|
committer | Dave Airlie <airlied@linux.ie> | 2009-03-22 12:01:21 +1000 |
commit | 2d26d4ac66e8c3e48b73d3e172d0e0d2a2ad31a5 (patch) | |
tree | 6b0d899db605b28e10b9b953d181b09bf40bb472 /src/mesa | |
parent | 407e8ae5b167b0193e1e5b1266a5d61ed836dfb5 (diff) | |
parent | 699897e81c623e53be51fba0488f535b0a8d7761 (diff) |
Merge remote branch 'origin/master' into HEAD
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i915/i830_texstate.c | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i915/i915_texstate.c | 11 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 24 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_screen.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_tex.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_tex_image.c | 18 | ||||
-rw-r--r-- | src/mesa/shader/prog_optimize.c | 428 | ||||
-rw-r--r-- | src/mesa/shader/prog_statevars.c | 13 | ||||
-rw-r--r-- | src/mesa/shader/prog_statevars.h | 1 | ||||
-rw-r--r-- | src/mesa/shader/slang/slang_codegen.c | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_atom_rasterizer.c | 2 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_atom_stipple.c | 38 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_cb_texture.c | 4 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_context.h | 3 | ||||
-rw-r--r-- | src/mesa/state_tracker/st_mesa_to_tgsi.c | 179 |
15 files changed, 671 insertions, 65 deletions
diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index c718bb0055..df43b779a7 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -38,7 +38,7 @@ static GLuint -translate_texture_format(GLuint mesa_format) +translate_texture_format(GLuint mesa_format, GLuint internal_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -162,7 +165,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) 0, intelObj-> firstLevel); - format = translate_texture_format(firstImage->TexFormat->MesaFormat); + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index adbb52a3a3..6d25f8dd8e 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -37,7 +37,8 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLenum DepthMode) +translate_texture_format(GLuint mesa_format, GLuint internal_format, + GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -173,7 +177,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) firstLevel); format = translate_texture_format(firstImage->TexFormat->MesaFormat, - tObj->DepthMode); + firstImage->InternalFormat, + tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 9b320480b6..e6113eff87 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -69,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +90,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -161,7 +168,7 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) struct brw_wm_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -199,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -278,6 +287,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e8c074712c..d20ea15187 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -211,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = { static const __DRItexBufferExtension intelTexBufferExtension = { { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, intelSetTexBuffer, + intelSetTexBuffer2, }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 742ccc043a..f5372d82fb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 943636c37b..e902187637 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -714,7 +714,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, } void -intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint glx_texture_format, + __DRIdrawable *dPriv) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; @@ -745,7 +747,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (rb->region->cpp == 3 ? 3 : 4); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + internalFormat = GL_RGB; + else + internalFormat = GL_RGBA; mt = intel_miptree_create_for_region(intel, target, internalFormat, @@ -785,3 +790,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_unlock_texture(&intel->ctx, texObj); } + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index ec06da141d..458a69f70b 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -33,6 +33,9 @@ #include "prog_print.h" +#define MAX_LOOP_NESTING 50 + + static GLboolean dbg = GL_FALSE; @@ -76,6 +79,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) /** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ +static void +replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) +{ + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } +} + + +/** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. */ @@ -83,7 +117,7 @@ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLuint tempMap[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { @@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog) memset(tempUsed, 0, sizeof(tempUsed)); + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog) } } - /* now replace occurances of old temp indexes with new indexes */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - GLuint index = inst->SrcReg[j].Index; - assert(index <= tempMax); - assert(tempUsed[index]); - inst->SrcReg[j].Index = tempMap[index]; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - assert(tempUsed[index]); - assert(index <= tempMax); - inst->DstReg.Index = tempMap[index]; - } - } + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } @@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog) } +/** A live register interval */ +struct interval +{ + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ +}; + + +/** A list of register intervals */ +struct interval_list +{ + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; +}; + + +static void +append_interval(struct interval_list *list, const struct interval *inv) +{ + list->Intervals[list->Num++] = *inv; +} + + +/** Insert interval inv into list, sorted by interval end */ +static void +insert_interval_by_end(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } +#endif +} + + +/** Remove the given interval from the interval list */ +static void +remove_interval(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } +} + + +/** called by qsort() */ +static int +compare_start(const void *a, const void *b) +{ + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; +} + +/** sort the interval list according to interval starts */ +static void +sort_interval_list_by_start(struct interval_list *list) +{ + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } +#endif +} + + +/** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ +static void +update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) +{ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + _mesa_printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + int j; + for (j = 0; j < inv->Start; j++) + _mesa_printf(" "); + for (j = inv->Start; j <= inv->End; j++) + _mesa_printf("x"); + } + _mesa_printf("\n"); + } + } + + return GL_TRUE; +} + + +static GLuint +alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) +{ + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return MAX_PROGRAM_TEMPS; +} + + +/** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ +static void +_mesa_reallocate_registers(struct gl_program *prog) +{ + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLuint maxTemp = 0; + + if (dbg) { + _mesa_printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + _mesa_printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + _mesa_printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + _mesa_printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + _mesa_printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLuint k = alloc_register(usedRegs); + if (k == MAX_PROGRAM_TEMPS) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + _mesa_printf(" remap register %d -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + _mesa_printf("Optimize: End live-interval register reallocation\n"); + _mesa_printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } +} + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) if (1) _mesa_consolidate_registers(program); + else /*NEW*/ + _mesa_reallocate_registers(program); } diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index f51d9e2651..aeb7cf6de2 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -506,6 +506,13 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } } return; + case STATE_FB_SIZE: + value[0] = (GLfloat) (ctx->DrawBuffer->Width - 1); + value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[2] = 0.0F; + value[3] = 0.0F; + return; + case STATE_ROT_MATRIX_0: { const int unit = (int) state[2]; @@ -628,6 +635,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_PCM_BIAS: return _NEW_PIXEL; + case STATE_FB_SIZE: + return _NEW_BUFFERS; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -828,6 +838,9 @@ append_token(char *dst, gl_state_index k) case STATE_SHADOW_AMBIENT: append(dst, "CompareFailValue"); break; + case STATE_FB_SIZE: + append(dst, "FbSize"); + break; case STATE_ROT_MATRIX_0: append(dst, "rotMatrixRow0"); break; diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index d563080db1..1180d9eaa4 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -117,6 +117,7 @@ typedef enum gl_state_index_ { STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ STATE_PCM_BIAS, /**< Post color matrix RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ + STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 8263aae334..a7cfc45e6f 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1441,7 +1441,7 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun, if (A->pragmas->Debug) { char s[1000]; - snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); + _mesa_snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); n->Comment = _slang_strdup(s); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index ea76487bcf..61687fbc3e 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,8 +79,6 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); - raster->origin_lower_left = 1; /* Always true for OpenGL */ - /* _NEW_POLYGON, _NEW_BUFFERS */ { diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index f395930ab4..31e124b329 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -39,24 +39,52 @@ #include "pipe/p_defines.h" +/** + * OpenGL's polygon stipple is indexed with window coordinates in which + * the origin (0,0) is the lower-left corner of the window. + * With Gallium, the origin is the upper-left corner of the window. + * To convert GL's polygon stipple to what gallium expects we need to + * invert the pattern vertically and rotate the stipple rows according + * to the window height. + */ +static void +invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight) +{ + GLuint i; + + for (i = 0; i < 32; i++) { + dest[i] = src[(winHeight - 1 - i) & 0x1f]; + } +} + + + static void update_stipple( struct st_context *st ) { - const GLuint sz = sizeof(st->state.poly_stipple.stipple); + const GLuint sz = sizeof(st->state.poly_stipple); assert(sz == sizeof(st->ctx->PolygonStipple)); - if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) { + if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) { /* state has changed */ - memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz); - st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple); + struct pipe_poly_stipple newStipple; + + memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz); + + invert_stipple(newStipple.stipple, st->ctx->PolygonStipple, + st->ctx->DrawBuffer->Height); + + st->pipe->set_polygon_stipple(st->pipe, &newStipple); } } +/** Update the stipple when the pattern or window height changes */ const struct st_tracked_state st_update_polygon_stipple = { "st_update_polygon_stipple", /* name */ { /* dirty */ - (_NEW_POLYGONSTIPPLE), /* mesa */ + (_NEW_POLYGONSTIPPLE | + _NEW_BUFFERS), /* mesa */ 0, /* st */ }, update_stipple /* update */ diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index edfa8854d8..311d812ccf 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -467,7 +467,7 @@ st_TexImage(GLcontext * ctx, */ if (stObj->pt) { if (stObj->teximage_realloc || - level > stObj->pt->last_level || + level > (GLint) stObj->pt->last_level || (stObj->pt->last_level == level && stObj->pt->target != PIPE_TEXTURE_CUBE && !st_texture_match_image(stObj->pt, &stImage->base, @@ -803,7 +803,6 @@ st_TexSubimage(GLcontext * ctx, PIPE_TRANSFER_WRITE, xoffset, yoffset, width, height); - dstRowStride = stImage->transfer->stride; } if (!texImage->Data) { @@ -812,6 +811,7 @@ st_TexSubimage(GLcontext * ctx, } src = (const GLubyte *) pixels; + dstRowStride = stImage->transfer->stride; for (i = 0; i++ < depth;) { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index d7518ab689..ae8c2978bf 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -93,12 +93,13 @@ struct st_context struct pipe_constant_buffer constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; - struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; GLuint num_samplers; GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ } state; struct { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index cbf3f334c0..ffa607dd87 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -219,8 +219,9 @@ compile_instruction( const GLuint immediateMapping[], GLboolean indirectAccess, GLuint preamble_size, - GLuint processor, - GLboolean *insideSubroutine) + GLuint procType, + GLboolean *insideSubroutine, + GLint wposTemp) { GLuint i; struct tgsi_full_dst_register *fulldst; @@ -247,19 +248,29 @@ compile_instruction( GLuint j; fullsrc = &fullinst->FullSrcRegisters[i]; - fullsrc->SrcRegister.File = map_register_file( - inst->SrcReg[i].File, - inst->SrcReg[i].Index, - immediateMapping, - indirectAccess ); - fullsrc->SrcRegister.Index = map_register_file_index( - fullsrc->SrcRegister.File, - inst->SrcReg[i].Index, - inputMapping, - outputMapping, - immediateMapping, - indirectAccess ); + if (procType == TGSI_PROCESSOR_FRAGMENT && + inst->SrcReg[i].File == PROGRAM_INPUT && + inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + /* special case of INPUT[WPOS] */ + fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; + fullsrc->SrcRegister.Index = wposTemp; + } + else { + /* any other src register */ + fullsrc->SrcRegister.File = map_register_file( + inst->SrcReg[i].File, + inst->SrcReg[i].Index, + immediateMapping, + indirectAccess ); + fullsrc->SrcRegister.Index = map_register_file_index( + fullsrc->SrcRegister.File, + inst->SrcReg[i].Index, + inputMapping, + outputMapping, + immediateMapping, + indirectAccess ); + } /* swizzle (ext swizzle also depends on negation) */ { @@ -733,6 +744,111 @@ find_temporaries(const struct gl_program *program, } +/** + * Find an unused temporary in the tempsUsed array. + */ +static int +find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) +{ + int i; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!tempsUsed[i]) { + tempsUsed[i] = GL_TRUE; + return i; + } + } + return -1; +} + + +/** helper for building simple TGSI instruction, one src register */ +static void +build_tgsi_instruction1(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 1; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; +} + + +/** helper for building simple TGSI instruction, two src registers */ +static void +build_tgsi_instruction2(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1, + int srcFile2, int srcIndex2) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 2; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; + inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; + inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; +} + + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + */ +static int +emit_inverted_wpos(struct tgsi_token *tokens, + int wpos_temp, + int winsize_const, + int wpos_input, + struct tgsi_header *header, int maxTokens) +{ + struct tgsi_full_instruction fullinst; + int ti = 0; + + /* MOV wpos_temp.xzw, input[wpos]; */ + build_tgsi_instruction1(&fullinst, + TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, + TGSI_FILE_INPUT, 0); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ + build_tgsi_instruction2(&fullinst, + TGSI_OPCODE_SUB, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, + TGSI_FILE_CONSTANT, winsize_const, + TGSI_FILE_INPUT, wpos_input); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + return ti; +} + + /** @@ -778,16 +894,34 @@ st_translate_mesa_program( GLuint ti; /* token index */ struct tgsi_header *header; struct tgsi_processor *processor; - struct tgsi_full_instruction fullinst; GLuint preamble_size = 0; GLuint immediates[1000]; GLuint numImmediates = 0; GLboolean insideSubroutine = GL_FALSE; GLboolean indirectAccess = GL_FALSE; + GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; + GLint wposTemp = -1, winHeightConst = -1; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX); + find_temporaries(program, tempsUsed); + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (program->InputsRead & FRAG_BIT_WPOS) { + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index winSizeState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + winHeightConst = _mesa_add_state_reference(program->Parameters, + winSizeState); + wposTemp = find_free_temporary(tempsUsed); + } + } + + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); header = (struct tgsi_header *) &tokens[1]; @@ -884,11 +1018,9 @@ st_translate_mesa_program( /* temporary decls */ { - GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; GLboolean inside_range = GL_FALSE; GLuint start_range = 0; - find_temporaries(program, tempsUsed); tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { if (tempsUsed[i] && !inside_range) { @@ -1018,7 +1150,17 @@ st_translate_mesa_program( } } + /* invert WPOS fragment input */ + if (wposTemp >= 0) { + ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, + inputMapping[FRAG_ATTRIB_WPOS], + header, maxTokens - ti); + preamble_size = 2; /* two instructions added */ + } + for (i = 0; i < program->NumInstructions; i++) { + struct tgsi_full_instruction fullinst; + compile_instruction( &program->Instructions[i], &fullinst, @@ -1028,7 +1170,8 @@ st_translate_mesa_program( indirectAccess, preamble_size, procType, - &insideSubroutine ); + &insideSubroutine, + wposTemp); ti += tgsi_build_full_instruction( &fullinst, |