diff options
Diffstat (limited to 'src/gallium/drivers')
31 files changed, 1024 insertions, 451 deletions
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 48edc62f49..3a80df427d 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -830,13 +830,11 @@ exec_declaration(struct spu_exec_machine *mach, unsigned first, last, mask; interpolation_func interp; - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Interpolation.Interpolate ) { + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: interp = constant_interpolation; break; @@ -898,7 +896,7 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 4ea06ce02b..45bf4f4028 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -29,26 +29,88 @@ #define I915_BATCH_H #include "i915_winsys.h" -#include "i915_debug.h" -#define BATCH_LOCALS +struct i915_batchbuffer +{ + struct pipe_buffer *buffer; + struct i915_winsys *winsys; + + unsigned char *map; + unsigned char *ptr; + + size_t size; + size_t actual_size; + + size_t relocs; + size_t max_relocs; +}; + +static INLINE boolean +i915_batchbuffer_check( struct i915_batchbuffer *batch, + size_t dwords, + size_t relocs ) +{ + /** TODO JB: Check relocs */ + return dwords * 4 <= batch->size - (batch->ptr - batch->map); +} + +static INLINE size_t +i915_batchbuffer_space( struct i915_batchbuffer *batch ) +{ + return batch->size - (batch->ptr - batch->map); +} + +static INLINE void +i915_batchbuffer_dword( struct i915_batchbuffer *batch, + unsigned dword ) +{ + if (i915_batchbuffer_space(batch) < 4) + return; + + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; +} + +static INLINE void +i915_batchbuffer_write( struct i915_batchbuffer *batch, + void *data, + size_t size ) +{ + if (i915_batchbuffer_space(batch) < size) + return; + + memcpy(data, batch->ptr, size); + batch->ptr += size; +} + +static INLINE void +i915_batchbuffer_reloc( struct i915_batchbuffer *batch, + struct pipe_buffer *buffer, + size_t flags, + size_t offset ) +{ + batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset ); +} + +static INLINE void +i915_batchbuffer_flush( struct i915_batchbuffer *batch, + struct pipe_fence_handle **fence ) +{ + batch->winsys->batch_flush( batch->winsys, fence ); +} #define BEGIN_BATCH( dwords, relocs ) \ - (i915->batch_start = i915->winsys->batch_start( i915->winsys, dwords, relocs )) + (i915_batchbuffer_check( i915->batch, dwords, relocs )) #define OUT_BATCH( dword ) \ - i915->winsys->batch_dword( i915->winsys, dword ) + i915_batchbuffer_dword( i915->batch, dword ) #define OUT_RELOC( buf, flags, delta ) \ - i915->winsys->batch_reloc( i915->winsys, buf, flags, delta ) - -#define ADVANCE_BATCH() + i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) #define FLUSH_BATCH(fence) do { \ - if (0) i915_dump_batchbuffer( i915 ); \ i915->winsys->batch_flush( i915->winsys, fence ); \ - i915->batch_start = NULL; \ i915->hardware_dirty = ~0; \ } while (0) -#endif +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index 24449e3fb3..22f91fab92 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -31,6 +31,7 @@ #include "i915_blit.h" #include "i915_reg.h" #include "i915_batch.h" +#include "i915_debug.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -45,7 +46,6 @@ i915_fill_blit(struct i915_context *i915, unsigned color) { unsigned BR13, CMD; - BATCH_LOCALS; dst_pitch *= (short) cpp; @@ -79,7 +79,6 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); OUT_BATCH(color); - ADVANCE_BATCH(); } @@ -100,7 +99,6 @@ i915_copy_blit( struct i915_context *i915, unsigned CMD, BR13; int dst_y2 = dst_y + h; int dst_x2 = dst_x + w; - BATCH_LOCALS; I915_DBG(i915, @@ -156,7 +154,6 @@ i915_copy_blit( struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); - ADVANCE_BATCH(); } diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c index cde69daacc..8a2d3ca43f 100644 --- a/src/gallium/drivers/i915simple/i915_clear.c +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -44,4 +44,5 @@ i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_DEFINED; } diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 4bef21619c..4c01b8d5b1 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -50,10 +50,12 @@ static void i915_destroy( struct pipe_context *pipe ) static boolean -i915_draw_elements( struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned prim, unsigned start, unsigned count) +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) { struct i915_context *i915 = i915_context( pipe ); struct draw_context *draw = i915->draw; @@ -77,7 +79,10 @@ i915_draw_elements( struct pipe_context *pipe, void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); } else { /* no index/element buffer */ @@ -86,7 +91,9 @@ i915_draw_elements( struct pipe_context *pipe, draw_set_mapped_constant_buffer(draw, - i915->current.constants[PIPE_SHADER_VERTEX]); + i915->current.constants[PIPE_SHADER_VERTEX], + ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float) )); /* draw! */ draw_arrays(i915->draw, prim, start, count); @@ -100,12 +107,23 @@ i915_draw_elements( struct pipe_context *pipe, } if (indexBuffer) { pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } return TRUE; } +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count ); +} static boolean i915_draw_arrays( struct pipe_context *pipe, unsigned prim, unsigned start, unsigned count) @@ -136,13 +154,14 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, i915->pipe.draw_arrays = i915_draw_arrays; i915->pipe.draw_elements = i915_draw_elements; + i915->pipe.draw_range_elements = i915_draw_range_elements; /* * Create drawing context and plug our rendering stage into it. */ i915->draw = draw_create(); assert(i915->draw); - if (!GETENV("I915_NO_VBUF")) { + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { @@ -162,7 +181,8 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, /* Batch stream debugging is a bit hacked up at the moment: */ - i915->batch_start = NULL; + i915->batch = i915_winsys->batch_get(i915_winsys); + i915->batch->winsys = i915_winsys; return &i915->pipe; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 53fc5ed079..892a88fd2c 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -178,6 +178,8 @@ struct i915_rasterizer_state { struct i915_sampler_state { unsigned state[3]; const struct pipe_sampler_state *templ; + unsigned minlod; + unsigned maxlod; }; @@ -209,6 +211,8 @@ struct i915_texture { struct pipe_buffer *buffer; }; +struct i915_batchbuffer; + struct i915_context { struct pipe_context pipe; @@ -241,10 +245,12 @@ struct i915_context unsigned num_vertex_elements; unsigned num_vertex_buffers; - unsigned *batch_start; + struct i915_batchbuffer *batch; /** Vertex buffer */ struct pipe_buffer *vbo; + size_t vbo_offset; + unsigned vbo_flushed; struct i915_state current; unsigned hardware_dirty; diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 9b9111167f..5e26d1b905 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -861,8 +861,9 @@ void i915_dump_batchbuffer( struct i915_context *i915 ) { struct debug_stream stream; - unsigned *start = i915->batch_start; - unsigned *end = i915->winsys->batch_start( i915->winsys, 0, 0 ); + /* TODO fix me */ + unsigned *start = 0;/*i915->batch_start;*/ + unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/ unsigned long bytes = (unsigned long) (end - start) * 4; boolean done = FALSE; diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 7d23e6b6b9..472e0ab774 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -62,12 +62,12 @@ static void i915_flush( struct pipe_context *pipe, assert(BEGIN_BATCH(1, 0)); } OUT_BATCH( flush ); - ADVANCE_BATCH(); } /* If there are no flags, just flush pending commands to hardware: */ FLUSH_BATCH(fence); + i915->vbo_flushed = 1; } diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 3ccf74c72c..23cd909337 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -676,7 +676,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - /* aka TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: emit_simple_arith(p, inst, A0_MOV, 1); break; @@ -943,8 +943,8 @@ i915_translate_instructions(struct i915_fp_compile *p, if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { uint i; - for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; i++) { assert(ifs->constant_flags[i] == 0x0); ifs->constant_flags[i] = I915_CONSTFLAG_USER; @@ -954,8 +954,8 @@ i915_translate_instructions(struct i915_fp_compile *p, else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - i <= parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; i++) { assert(i < I915_MAX_TEMPORARY); /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 7fb2adbb53..aef3682bbf 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -38,6 +38,7 @@ */ +#include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_util.h" @@ -62,8 +63,21 @@ struct i915_vbuf_render { /** Vertex size in bytes */ unsigned vertex_size; + /** Software primitive */ + unsigned prim; + /** Hardware primitive */ unsigned hwprim; + + /** Genereate a vertex list */ + unsigned fallback; + + /* Stuff for the vbo */ + struct pipe_buffer *vbo; + size_t vbo_size; + size_t vbo_offset; + void *vbo_ptr; + size_t vbo_alloc_size; }; @@ -95,8 +109,8 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) static void * i915_vbuf_render_allocate_vertices( struct vbuf_render *render, - ushort vertex_size, - ushort nr_vertices ) + ushort vertex_size, + ushort nr_vertices ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; @@ -105,14 +119,31 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, /* FIXME: handle failure */ assert(!i915->vbo); - i915->vbo = winsys->buffer_create(winsys, 64, I915_BUFFER_USAGE_LIT_VERTEX, - size); - + + if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { + } else { + i915->vbo_flushed = 0; + pipe_buffer_reference(winsys, &i915_render->vbo, NULL); + } + + if (!i915_render->vbo) { + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); + i915_render->vbo_offset = 0; + i915_render->vbo = winsys->buffer_create(winsys, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = winsys->buffer_map(winsys, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + winsys->buffer_unmap(winsys, i915_render->vbo); + } + + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_offset; i915->dirty |= I915_NEW_VBO; - - return winsys->buffer_map(winsys, - i915->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); + + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; } @@ -121,18 +152,51 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, unsigned prim ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); - + i915_render->prim = prim; + switch(prim) { case PIPE_PRIM_POINTS: i915_render->hwprim = PRIM3D_POINTLIST; + i915_render->fallback = 0; return TRUE; case PIPE_PRIM_LINES: i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINE_LOOP: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = PIPE_PRIM_LINE_LOOP; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + i915_render->hwprim = PRIM3D_LINESTRIP; + i915_render->fallback = 0; return TRUE; case PIPE_PRIM_TRIANGLES: i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + i915_render->hwprim = PRIM3D_TRISTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + i915_render->hwprim = PRIM3D_TRIFAN; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_QUADS: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUADS; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUAD_STRIP; + return TRUE; + case PIPE_PRIM_POLYGON: + i915_render->hwprim = PRIM3D_POLY; + i915_render->fallback = 0; return TRUE; default: + assert((int)"Error unkown primtive type" & 0); /* Actually, can handle a lot more just fine... Fixme. */ return FALSE; @@ -140,6 +204,205 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, } + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices( struct vbuf_render *render, + unsigned start, uint nr, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + unsigned end = start + nr; + switch(type) { + case 0: + for (i = start; i+1 < end; i += 2) + OUT_BATCH( (i+0) | (i+1) << 16 ); + if (i < end) + OUT_BATCH( i ); + break; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) { + for (i = start + 1; i < end; i++) + OUT_BATCH( (i-0) | (i+0) << 16 ); + OUT_BATCH( (i-0) | ( start) << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = start; i + 3 < end; i += 4) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+1) << 16 ); + OUT_BATCH( (i+2) | (i+3) << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = start; i + 3 < end; i += 2) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+2) << 16 ); + OUT_BATCH( (i+0) | (i+3) << 16 ); + } + break; + default: + assert(0); + } +} + +static unsigned +draw_arrays_calc_nr_indices( uint nr, unsigned type ) +{ + switch (type) { + case 0: + return nr; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) + return nr * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +draw_arrays_fallback( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned nr_indices; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); + if (!nr_indices) + return; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + + draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); + +out: + return; +} + +static void +i915_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (i915_render->fallback) { + draw_arrays_fallback( render, start, nr ); + return; + } + + /* JB: TODO submit direct cmds */ + draw_arrays_fallback( render, start, nr ); +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices( struct vbuf_render *render, + const ushort *indices, + uint nr_indices, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + switch(type) { + case 0: + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( indices[i] | indices[i+1] << 16 ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] ); + } + break; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) { + for (i = 1; i < nr_indices; i++) + OUT_BATCH( indices[i-1] | indices[i] << 16 ); + OUT_BATCH( indices[i-1] | indices[0] << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = 0; i + 3 < nr_indices; i += 4) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+2] | indices[i+3] << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i + 3 < nr_indices; i += 2) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+2] << 16 ); + OUT_BATCH( indices[i+0] | indices[i+3] << 16 ); + } + break; + default: + assert(0); + break; + } +} + +static unsigned +draw_calc_nr_indices( uint nr_indices, unsigned type ) +{ + switch (type) { + case 0: + return nr_indices; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) + return nr_indices * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr_indices / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr_indices - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + static void i915_vbuf_render_draw( struct vbuf_render *render, const ushort *indices, @@ -147,13 +410,14 @@ i915_vbuf_render_draw( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - unsigned i; + unsigned save_nr_indices; - assert(nr_indices); + save_nr_indices = nr_indices; + + nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); + if (!nr_indices) + return; - /* this seems to be bogus, since we validate state right after this */ - /*assert((i915->dirty & ~I915_NEW_VBO) == 0);*/ - if (i915->dirty) i915_update_derived( i915 ); @@ -167,25 +431,26 @@ i915_vbuf_render_draw( struct vbuf_render *render, */ i915_update_derived( i915 ); i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { assert(0); - return; + goto out; } } OUT_BATCH( _3DPRIMITIVE | - PRIM_INDIRECT | - i915_render->hwprim | - PRIM_INDIRECT_ELTS | - nr_indices ); - for (i = 0; i + 1 < nr_indices; i += 2) { - OUT_BATCH( indices[i] | - (indices[i + 1] << 16) ); - } - if (i < nr_indices) { - OUT_BATCH( indices[i] ); - } + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + draw_generate_indices( render, + indices, + save_nr_indices, + i915_render->fallback ); + +out: + return; } @@ -197,11 +462,13 @@ i915_vbuf_render_release_vertices( struct vbuf_render *render, { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - struct pipe_winsys *winsys = i915->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)vertices_used; assert(i915->vbo); - winsys->buffer_unmap(winsys, i915->vbo); - pipe_buffer_reference(winsys, &i915->vbo, NULL); + + i915_render->vbo_offset += size; + i915->vbo = NULL; + i915->dirty |= I915_NEW_VBO; } @@ -220,6 +487,7 @@ static struct vbuf_render * i915_vbuf_render_create( struct i915_context *i915 ) { struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + struct pipe_winsys *winsys = i915->pipe.winsys; i915_render->i915 = i915; @@ -229,14 +497,27 @@ i915_vbuf_render_create( struct i915_context *i915 ) * batch buffer. */ i915_render->base.max_indices = 16*1024; - + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; i915_render->base.set_primitive = i915_vbuf_render_set_primitive; i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; i915_render->base.release_vertices = i915_vbuf_render_release_vertices; i915_render->base.destroy = i915_vbuf_render_destroy; - + + i915_render->vbo_alloc_size = 128 * 4096; + i915_render->vbo_size = i915_render->vbo_alloc_size; + i915_render->vbo_offset = 0; + i915_render->vbo = winsys->buffer_create(winsys, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = winsys->buffer_map(winsys, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + winsys->buffer_unmap(winsys, i915_render->vbo); + return &i915_render->base; } @@ -258,6 +539,8 @@ struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) render->destroy(render); return NULL; } - + /** TODO JB: this shouldn't be here */ + draw_set_render(i915->draw, render); + return stage; } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e6c4671700..dbb33f2695 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -41,7 +41,6 @@ #include "i915_state_inlines.h" #include "i915_fpc.h" - /* The i915 (and related graphics cores) do not support GL_CLAMP. The * Intel drivers for "other operating systems" implement GL_CLAMP as * GL_CLAMP_TO_EDGE, so the same is done here. @@ -178,6 +177,7 @@ static void i915_bind_blend_state(struct pipe_context *pipe, void *blend) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->blend = (struct i915_blend_state*)blend; @@ -194,6 +194,7 @@ static void i915_set_blend_color( struct pipe_context *pipe, const struct pipe_blend_color *blend_color ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->blend_color = *blend_color; @@ -250,11 +251,17 @@ i915_create_sampler_state(struct pipe_context *pipe, if (sampler->normalized_coords) cso->state[1] |= SS3_NORMALIZED_COORDS; - if (0) /* XXX not tested yet */ { int minlod = (int) (16.0 * sampler->min_lod); + int maxlod = (int) (16.0 * sampler->max_lod); minlod = CLAMP(minlod, 0, 16 * 11); - cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT); + maxlod = CLAMP(maxlod, 0, 16 * 11); + + if (minlod > maxlod) + maxlod = minlod; + + cso->minlod = minlod; + cso->maxlod = maxlod; } { @@ -280,6 +287,8 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, !memcmp(i915->sampler, sampler, num * sizeof(void *))) return; + draw_flush(i915->draw); + for (i = 0; i < num; ++i) i915->sampler[i] = sampler[i]; for (i = num; i < PIPE_MAX_SAMPLERS; ++i) @@ -398,6 +407,7 @@ static void i915_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; @@ -415,6 +425,7 @@ static void i915_set_scissor_state( struct pipe_context *pipe, const struct pipe_scissor_state *scissor ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); memcpy( &i915->scissor, scissor, sizeof(*scissor) ); i915->dirty |= I915_NEW_SCISSOR; @@ -451,6 +462,7 @@ static void i915_bind_fs_state(struct pipe_context *pipe, void *shader) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->fs = (struct i915_fragment_shader*) shader; @@ -506,6 +518,7 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, { struct i915_context *i915 = i915_context(pipe); struct pipe_winsys *ws = pipe->winsys; + draw_flush(i915->draw); assert(shader < PIPE_SHADER_TYPES); assert(index == 0); @@ -574,6 +587,7 @@ static void i915_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); i915->framebuffer = *fb; /* struct copy */ @@ -586,6 +600,7 @@ static void i915_set_clip_state( struct pipe_context *pipe, const struct pipe_clip_state *clip ) { struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); draw_set_clip_state(i915->draw, clip); @@ -698,6 +713,10 @@ static void i915_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); i915->num_vertex_buffers = count; @@ -711,6 +730,11 @@ static void i915_set_vertex_elements(struct pipe_context *pipe, const struct pipe_vertex_element *elements) { struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + i915->num_vertex_elements = count; /* pass-through to draw module */ draw_set_vertex_elements(i915->draw, count, elements); diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 6f947d4346..bc801a82f0 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -211,33 +211,43 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned pitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned cpitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned ctile = BUF_3D_USE_FENCE; +#if 0 + if (!((cpitch - 1) & cpitch) && cpitch >= 512) + ctile = BUF_3D_TILED_SURFACE; +#endif OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(pitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); + BUF_3D_PITCH(cpitch) | /* pitch in bytes */ + ctile); OUT_RELOC(cbuf_surface->buffer, I915_BUFFER_ACCESS_WRITE, - 0); + cbuf_surface->offset); } /* What happens if no zbuf?? */ if (depth_surface) { unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); - + unsigned ztile = BUF_3D_USE_FENCE; +#if 0 + if (!((zpitch - 1) & zpitch) && zpitch >= 512) + ztile = BUF_3D_TILED_SURFACE; +#endif + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_DEPTH | BUF_3D_PITCH(zpitch) | /* pitch in bytes */ - BUF_3D_USE_FENCE); + ztile); OUT_RELOC(depth_surface->buffer, I915_BUFFER_ACCESS_WRITE, - 0); + depth_surface->offset); } { @@ -376,6 +386,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) { uint w, h; boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + (void)k; assert(k); OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index dfbbcab624..2501f2d7cb 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -54,7 +54,7 @@ static void upload_S0S1(struct i915_context *i915) /* INTEL_NEW_VBO */ /* TODO: re-use vertex buffers here? */ - LIS0 = 0; + LIS0 = i915->vbo_offset; /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! */ @@ -156,8 +156,12 @@ const struct i915_tracked_state i915_upload_S5 = { */ static void upload_S6( struct i915_context *i915 ) { - unsigned LIS6 = (S6_COLOR_WRITE_ENABLE | - (2 << S6_TRISTRIP_PV_SHIFT)); + unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + + /* I915_NEW_FRAMEBUFFER + */ + if (i915->framebuffer.cbufs[0]) + LIS6 |= S6_COLOR_WRITE_ENABLE; /* I915_NEW_BLEND */ @@ -174,7 +178,7 @@ static void upload_S6( struct i915_context *i915 ) } const struct i915_tracked_state i915_upload_S6 = { - I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, upload_S6 }; diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 982eec4a1b..24440843f3 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -35,6 +35,35 @@ #include "i915_state.h" +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); /** * Compute i915 texture sampling state. * @@ -50,6 +79,7 @@ static void update_sampler(struct i915_context *i915, unsigned state[3] ) { const struct pipe_texture *pt = &tex->base; + unsigned minlod, lastlod; /* Need to do this after updating the maps, which call the * intel_finalize_mipmap_tree and hence can update firstLevel: @@ -95,6 +125,15 @@ static void update_sampler(struct i915_context *i915, } #endif + /* See note at the top of file */ + minlod = sampler->minlod; + lastlod = pt->last_level << 4; + + if (lastlod < minlod) { + minlod = lastlod; + } + + state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); } @@ -112,18 +151,23 @@ void i915_update_samplers( struct i915_context *i915 ) /* could also examine the fragment program? */ if (i915->texture[unit]) { update_sampler( i915, - unit, - i915->sampler[unit], /* sampler state */ - i915->texture[unit], /* texture */ - i915->current.sampler[unit] /* the result */ - ); - - i915->current.sampler_enable_nr++; - i915->current.sampler_enable_flags |= (1 << unit); + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); } } - i915->hardware_dirty |= I915_HW_SAMPLER; + i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; } @@ -168,7 +212,7 @@ translate_texture_format(enum pipe_format pipeFormat) return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); #endif case PIPE_FORMAT_S8Z24_UNORM: - return (MAPSURF_32BIT | MT_32BIT_xL824); + return (MAPSURF_32BIT | MT_32BIT_xI824); default: debug_printf("i915: translate_texture_format() bad image format %x\n", pipeFormat); @@ -179,14 +223,17 @@ translate_texture_format(enum pipe_format pipeFormat) static void -i915_update_texture(struct i915_context *i915, uint unit, +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, uint state[6]) { - const struct i915_texture *tex = i915->texture[unit]; const struct pipe_texture *pt = &tex->base; uint format, pitch; const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; const uint num_levels = pt->last_level; + unsigned max_lod = num_levels * 4; assert(tex); assert(width); @@ -207,16 +254,19 @@ i915_update_texture(struct i915_context *i915, uint unit, | MS3_USE_FENCE_REGS); /* - * XXX sampler->max_lod should be used to program the MAX_LOD field below. - * Also, when min_filter != mag_filter and there's just one mipmap level, + * XXX When min_filter != mag_filter and there's just one mipmap level, * set max_lod = 1 to make sure i915 chooses between min/mag filtering. */ + /* See note at the top of file */ + if (max_lod > (sampler->maxlod >> 2)) + max_lod = sampler->maxlod >> 2; + /* MS4 state */ state[1] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK - | ((num_levels * 4) << MS4_MAX_LOD_SHIFT) + | ((max_lod) << MS4_MAX_LOD_SHIFT) | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); } @@ -231,7 +281,11 @@ i915_update_textures(struct i915_context *i915) /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { - i915_update_texture(i915, unit, i915->current.texbuffer[unit]); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); } } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index df11ba0544..9cd32e3919 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -42,12 +42,58 @@ #include "i915_debug.h" #include "i915_screen.h" +/* + * Helper function and arrays + */ + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; static unsigned minify( unsigned d ) { return MAX2(1, d>>1); } +static unsigned +power_of_two(unsigned x) +{ + unsigned value = 1; + while (value <= x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ static void @@ -86,7 +132,6 @@ i915_miptree_set_level_info(struct i915_texture *tex, tex->image_offset[level][0] = 0; } - static void i915_miptree_set_image_offset(struct i915_texture *tex, unsigned level, unsigned img, unsigned x, unsigned y) @@ -99,57 +144,51 @@ i915_miptree_set_image_offset(struct i915_texture *tex, tex->image_offset[level][img] = (x + y * tex->pitch); /* - DBG("%s level %d img %d pos %d,%d image_offset %x\n", + printf("%s level %d img %d pos %d,%d image_offset %x\n", __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); */ } -/* Hack it up to use the old winsys->surface_alloc_storage() - * method for now: +/* + * Layout functions + */ + + +/** + * Special case to deal with display targets. */ static boolean -i915_displaytarget_layout(struct pipe_screen *screen, - struct i915_texture *tex) +i915_displaytarget_layout(struct i915_texture *tex) { - struct pipe_winsys *ws = screen->winsys; - struct pipe_surface surf; - unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); - - - memset(&surf, 0, sizeof(surf)); - - ws->surface_alloc_storage( ws, - &surf, - tex->base.width[0], - tex->base.height[0], - tex->base.format, - flags, - tex->base.tex_usage); - - /* Now extract the goodies: - */ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->cpp != 4) + return 0; + i915_miptree_set_level_info( tex, 0, 1, 0, 0, tex->base.width[0], tex->base.height[0], 1 ); i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); - tex->buffer = surf.buffer; - tex->pitch = surf.pitch; - tex->total_height = 0; + if (tex->base.width[0] >= 128) { + tex->pitch = power_of_two(tex->base.width[0] * pt->cpp) / pt->cpp; + tex->total_height = round_up(tex->base.height[0], 8); + } else { + tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); + tex->total_height = tex->base.height[0]; + } + /* + printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->cpp, + tex->pitch, tex->total_height, tex->pitch * tex->total_height * 4); + */ - return tex->buffer != NULL; + return 1; } - - - - static void i945_miptree_layout_2d( struct i915_texture *tex ) { @@ -161,6 +200,12 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned width = pt->width[0]; unsigned height = pt->height[0]; +#if 0 /* used for tiled display targets */ + if (pt->last_level == 0 && pt->cpp == 4) + if (i915_displaytarget_layout(tex)) + return; +#endif + tex->pitch = pt->width[0]; /* May need to adjust pitch to accomodate the placement of @@ -179,7 +224,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp; + tex->pitch = align_int(tex->pitch * pt->cpp, 64) / pt->cpp; tex->total_height = 0; for (level = 0; level <= pt->last_level; level++) { @@ -212,25 +257,108 @@ i945_miptree_layout_2d( struct i915_texture *tex ) } } +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; -static const int initial_offsets[6][2] = { - {0, 0}, - {0, 2}, - {1, 0}, - {1, 2}, - {1, 1}, - {1, 3} -}; + const unsigned dim = pt->width[0]; + unsigned face; + unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; -static const int step_offsets[6][2] = { - {0, 2}, - {0, 2}, - {-1, 2}, - {-1, 2}, - {-1, 1}, - {-1, 1} -}; + /* + printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + */ + + assert(lvlWidth == lvlHeight); /* cubemap images are square */ + + /* + * XXX Should only be used for compressed formats. But lets + * keep this code active just in case. + * + * Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (dim > 32) + tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + else + tex->pitch = 14 * 8; + + /* + * XXX The 4 is only needed for compressed formats. See above. + */ + tex->total_height = dim * 4 + 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, 0, 0, lvlWidth, lvlHeight, 1); + lvlWidth /= 2; + lvlHeight /= 2; + } + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * dim; + unsigned y = initial_offsets[face][1] * dim; + unsigned d = dim; + +#if 0 /* Fix and enable this code for compressed formats */ + if (dim == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (dim < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } +#endif + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + +#if 0 /* Fix and enable this code for compressed formats */ + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + default: +#endif + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; +#if 0 + break; + } +#endif + } + } +} static boolean i915_miptree_layout(struct i915_texture * tex) @@ -363,99 +491,15 @@ i945_miptree_layout(struct i915_texture * tex) unsigned level; switch (pt->target) { - case PIPE_TEXTURE_CUBE:{ - const unsigned dim = pt->width[0]; - unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; - - assert(lvlWidth == lvlHeight); /* cubemap images are square */ - - /* Depending on the size of the largest images, pitch can be - * determined either by the old-style packing of cubemap faces, - * or the final row of 4x4, 2x2 and 1x1 faces below this. - */ - if (dim > 32) - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; - else - tex->pitch = 14 * 8; - - tex->total_height = dim * 4 + 4; - - /* Set all the levels to effectively occupy the whole rectangular region. - */ - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, - 0, 0, - lvlWidth, lvlHeight, 1); - lvlWidth /= 2; - lvlHeight /= 2; - } - - - for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; - - if (dim == 4 && face >= 4) { - y = tex->total_height - 4; - x = (face - 4) * 8; - } - else if (dim < 4 && (face > 0)) { - y = tex->total_height - 4; - x = face * 8; - } - - for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_image_offset(tex, level, face, x, y); - - d >>= 1; - - switch (d) { - case 4: - switch (face) { - case PIPE_TEX_FACE_POS_X: - case PIPE_TEX_FACE_NEG_X: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - case PIPE_TEX_FACE_POS_Y: - case PIPE_TEX_FACE_NEG_Y: - y += 12; - x -= 8; - break; - case PIPE_TEX_FACE_POS_Z: - case PIPE_TEX_FACE_NEG_Z: - y = tex->total_height - 4; - x = (face - 4) * 8; - break; - } - - case 2: - y = tex->total_height - 4; - x = 16 + face * 8; - break; - - case 1: - x += 48; - break; - - default: - x += step_offsets[face][0] * d; - y += step_offsets[face][1] * d; - break; - } - } - } - break; - } + case PIPE_TEXTURE_CUBE: + i945_miptree_layout_cube(tex); + break; case PIPE_TEXTURE_3D:{ unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; - unsigned level; tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; tex->total_height = 0; @@ -532,39 +576,32 @@ i915_texture_create(struct pipe_screen *screen, struct pipe_winsys *ws = screen->winsys; struct i915_texture *tex = CALLOC_STRUCT(i915_texture); - if (!tex) + if (!tex) return NULL; tex->base = *templat; tex->base.refcount = 1; tex->base.screen = screen; - if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { - if (!i915_displaytarget_layout(screen, tex)) - goto fail; - } - else { - if (i915screen->is_i945) { - if (!i945_miptree_layout(tex)) - goto fail; - } - else { - if (!i915_miptree_layout(tex)) - goto fail; - } - - tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); - - if (!tex->buffer) - goto fail; + if (i915screen->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; + } else { + if (!i915_miptree_layout(tex)) + goto fail; } + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->pitch * tex->base.cpp * + tex->total_height); + + if (!tex->buffer) + goto fail; + return &tex->base; - fail: +fail: FREE(tex); return NULL; } @@ -600,11 +637,6 @@ i915_texture_release(struct pipe_screen *screen, *pt = NULL; } - - -/* - * XXX note: same as code in sp_surface.c - */ static struct pipe_surface * i915_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, @@ -629,10 +661,10 @@ i915_get_tex_surface(struct pipe_screen *screen, assert(zslice == 0); } - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (ps) { - assert(ps->refcount); - assert(ps->winsys); + ps->refcount = 1; + ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; @@ -642,11 +674,11 @@ i915_get_tex_surface(struct pipe_screen *screen, ps->pitch = tex->pitch; ps->offset = offset; ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; } return ps; } - void i915_init_texture_functions(struct i915_context *i915) { diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index 5e16543f4e..9afaa16a62 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -55,6 +55,7 @@ extern "C" { * etc. */ +struct i915_batchbuffer; struct pipe_buffer; struct pipe_fence_handle; struct pipe_winsys; @@ -75,20 +76,10 @@ struct pipe_screen; struct i915_winsys { /** - * Reserve space on batch buffer. - * - * Returns a null pointer if there is insufficient space in the batch buffer - * to hold the requested number of dwords and relocations. - * - * The number of dwords should also include the number of relocations. + * Get the current batch buffer from the winsys. */ - unsigned *(*batch_start)( struct i915_winsys *sws, - unsigned dwords, - unsigned relocs ); - - void (*batch_dword)( struct i915_winsys *sws, - unsigned dword ); - + struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws ); + /** * Emit a relocation to a buffer. * @@ -103,7 +94,10 @@ struct i915_winsys { struct pipe_buffer *buf, unsigned access_flags, unsigned delta ); - + + /** + * Flush the batch. + */ void (*batch_flush)( struct i915_winsys *sws, struct pipe_fence_handle **fence ); }; diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index c3b815a82b..96f8fb87a3 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -169,9 +169,9 @@ static void upload_sf_prog( struct brw_context *brw ) case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) { - int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; - int interp_mode = parse.FullToken.FullDeclaration.Interpolation.Interpolate; + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; @@ -291,8 +291,8 @@ static void update_sf_linkage( struct brw_context *brw ) case TGSI_TOKEN_TYPE_DECLARATION: if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) { - int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First; - int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last; + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; for (i = first; i < last; i++) { vp_semantic[i].semantic = diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index f4694a4433..fb3da92421 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -26,9 +26,7 @@ void brw_shader_info(const struct tgsi_token *tokens, case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned last = decl->u.DeclarationRange.Last; - - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + unsigned last = decl->DeclarationRange.Last; // Broken by crazy wpos init: //assert( info->nr_regs[decl->Declaration.File] <= last); diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 9020fcc001..81423e2d7d 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -988,10 +988,8 @@ post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) static void process_declaration(const struct tgsi_full_declaration *decl, struct brw_prog_info *info) { - int first = decl->u.DeclarationRange.First; - int last = decl->u.DeclarationRange.Last; - - assert (decl->Declaration.Declare != TGSI_DECLARE_MASK); + int first = decl->DeclarationRange.First; + int last = decl->DeclarationRange.Last; switch(decl->Declaration.File) { case TGSI_FILE_CONSTANT: @@ -1137,8 +1135,8 @@ static void process_instruction(struct brw_vs_compile *c, emit_min(p, dst, args[0], args[1]); break; case TGSI_OPCODE_MOV: -#if 0 case TGSI_OPCODE_SWZ: +#if 0 /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index 74ccfd494a..bf1b4d961a 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -351,18 +351,16 @@ void brw_wm_emit_decls(struct brw_wm_compile *c) case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; - unsigned first = decl->u.DeclarationRange.First; - unsigned last = decl->u.DeclarationRange.Last; + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; unsigned mask = decl->Declaration.UsageMask; /* ? */ unsigned i; if (decl->Declaration.File != TGSI_FILE_INPUT) break; - assert(decl->Declaration.Interpolate); - for( i = first; i <= last; i++ ) { - switch (decl->Interpolation.Interpolate) { + switch (decl->Declaration.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: emit_cinterp(c, i, mask); break; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2af0db3714..626c3a9d4e 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -88,7 +88,8 @@ static void softpipe_destroy( struct pipe_context *pipe ) struct pipe_winsys *ws = pipe->winsys; uint i; - draw_destroy( softpipe->draw ); + if (softpipe->draw) + draw_destroy( softpipe->draw ); softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); @@ -128,12 +129,12 @@ softpipe_create( struct pipe_screen *screen, uint i; #ifdef PIPE_ARCH_X86 - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; + softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); softpipe->pipe.winsys = pipe_winsys; softpipe->pipe.screen = screen; @@ -178,6 +179,7 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; @@ -216,17 +218,23 @@ softpipe_create( struct pipe_screen *screen, * Create drawing context and plug our rendering stage into it. */ softpipe->draw = draw_create(); - assert(softpipe->draw); + if (!softpipe->draw) + goto fail; + softpipe->setup = sp_draw_render_stage(softpipe); + if (!softpipe->setup) + goto fail; - if (GETENV( "SP_NO_RAST" ) != NULL) + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) softpipe->no_rast = TRUE; - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); + if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { + /* Deprecated path -- vbuf is the intended interface to the draw module: + */ + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + sp_init_vbuf(softpipe); } /* plug in AA line/point stages */ @@ -241,4 +249,8 @@ softpipe_create( struct pipe_screen *screen, sp_init_surface_functions(softpipe); return &softpipe->pipe; + + fail: + softpipe_destroy(&softpipe->pipe); + return NULL; } diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 62eabfb30e..078886f93c 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -69,7 +69,7 @@ struct softpipe_context { struct pipe_blend_color blend_color; struct pipe_clip_state clip; - struct pipe_constant_buffer constants[2]; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6c58f9909d..12b44a8211 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -47,14 +47,15 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; uint i; - for (i = 0; i < 2; i++) { + for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); } draw_set_mapped_constant_buffer(sp->draw, - sp->mapped_constants[PIPE_SHADER_VERTEX]); + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); } static void @@ -68,7 +69,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL); + draw_set_mapped_constant_buffer(sp->draw, NULL, 0); for (i = 0; i < 2; i++) { if (sp->constants[i].size) @@ -108,11 +109,14 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ + boolean -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -141,11 +145,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); } else { /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } @@ -171,6 +178,19 @@ softpipe_draw_elements(struct pipe_context *pipe, return TRUE; } +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 55741cc1df..69f7f960aa 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -46,7 +46,7 @@ /* Surely this should be defined somewhere in a tgsi header: */ -typedef void (XSTDCALL *codegen_function)( +typedef void (PIPE_CDECL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, const float (*constant)[4], diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 1cf9ffa632..941ab62e00 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -64,16 +64,17 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) } +typedef const float (*cptrf4)[4]; static void do_tri(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - + setup_tri( setup->setup, - prim->v[0]->data, - prim->v[1]->data, - prim->v[2]->data ); + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data, + (cptrf4)prim->v[2]->data ); } static void @@ -82,8 +83,8 @@ do_line(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_line( setup->setup, - prim->v[0]->data, - prim->v[1]->data ); + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data ); } static void @@ -92,7 +93,7 @@ do_point(struct draw_stage *stage, struct prim_header *prim) struct setup_stage *setup = setup_stage( stage ); setup_point( setup->setup, - prim->v[0]->data ); + (cptrf4)prim->v[0]->data ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e063fe82ef..e9fae951e0 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -116,30 +116,28 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) - if (prim == PIPE_PRIM_TRIANGLES || - prim == PIPE_PRIM_LINES || - prim == PIPE_PRIM_POINTS) { - cvbr->prim = prim; - return TRUE; - } - else { - return FALSE; - } + cvbr->prim = prim; + return TRUE; } +static INLINE cptrf4 get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (cptrf4)((char *)vertex_buffer + index * stride); +} static void -sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); - unsigned i, j; - void *vertex_buffer = cvbr->vertex_buffer; - cptrf4 v[3]; + unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i; + const void *vertex_buffer = cvbr->vertex_buffer; /* XXX: break this dependency - make setup_context live under * softpipe, rename the old "setup" draw stage to something else. @@ -149,40 +147,98 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) switch (cvbr->prim) { - case PIPE_PRIM_TRIANGLES: - for (i = 0; i < nr_indices; i += 3) { - for (j = 0; j < 3; j++) - v[j] = (cptrf4)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - setup_tri( setup_ctx, - v[0], - v[1], - v[2]); + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, indices[i-0], stride) ); } break; case PIPE_PRIM_LINES: - for (i = 0; i < nr_indices; i += 2) { - for (j = 0; j < 2; j++) - v[j] = (cptrf4)((char *)vertex_buffer + - indices[i+j] * vertex_size); + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { setup_line( setup_ctx, - v[0], - v[1] ); + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); } break; - case PIPE_PRIM_POINTS: - for (i = 0; i < nr_indices; i++) { - v[0] = (cptrf4)((char *)vertex_buffer + - indices[i] * vertex_size); + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; - setup_point( setup_ctx, - v[0] ); + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); } break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + default: + assert(0); } /* XXX: why are we calling this??? If we had to call something, it @@ -202,131 +258,107 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - const void *vertex_buffer = cvbr->vertex_buffer; - const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); + const void *vertex_buffer = NULL; + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; struct setup_context *setup_ctx = sp_draw_setup_context(setup); - cptrf4 v[3]; -#define VERTEX(I) \ - (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) + vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - v[0] = VERTEX(i); - setup_point( setup_ctx, v[0] ); + setup_point( setup_ctx, + get_vert(vertex_buffer, i-0, stride) ); } break; + case PIPE_PRIM_LINES: - assert(nr % 2 == 0); - for (i = 0; i < nr; i += 2) { - v[0] = VERTEX(i); - v[1] = VERTEX(i + 1); - setup_line( setup_ctx, v[0], v[1] ); + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; + case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < nr; i++) { - v[0] = VERTEX(i - 1); - v[1] = VERTEX(i); - setup_line( setup_ctx, v[0], v[1] ); + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); } break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + case PIPE_PRIM_TRIANGLES: - assert(nr % 3 == 0); - for (i = 0; i < nr; i += 3) { - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i + 2); + for (i = 2; i < nr; i += 3) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; + case PIPE_PRIM_TRIANGLE_STRIP: - assert(nr >= 3); - for (i = 2; i < nr; i++) { - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; + case PIPE_PRIM_TRIANGLE_FAN: - assert(nr >= 3); - for (i = 2; i < nr; i++) { - v[0] = VERTEX(0); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; case PIPE_PRIM_QUADS: - assert(nr % 4 == 0); - for (i = 0; i < nr; i += 4) { - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i + 2); + for (i = 3; i < nr; i += 4) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); - v[0] = VERTEX(i + 0); - v[1] = VERTEX(i + 2); - v[2] = VERTEX(i + 3); setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); } break; case PIPE_PRIM_QUAD_STRIP: - assert(nr >= 4); - for (i = 2; i < nr; i += 2) { - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i); - v[2] = VERTEX(i + 1); + for (i = 3; i < nr; i += 2) { setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); - v[0] = VERTEX(i - 2); - v[1] = VERTEX(i + 1); - v[2] = VERTEX(i - 1); setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); - } - break; - case PIPE_PRIM_POLYGON: - /* draw as tri fan */ - for (i = 2; i < nr; i++) { - v[0] = VERTEX(0); - v[1] = VERTEX(i - 1); - v[2] = VERTEX(i); - setup_tri( setup_ctx, - v[0], - v[1], - v[2] ); + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride)); } break; default: - /* XXX finish remaining prim types */ assert(0); } - -#undef VERTEX } diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a1859f9883..33888abcc5 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -230,7 +230,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) int x = quad->x0 % TILE_SIZE + (j & 1); int y = quad->y0 % TILE_SIZE + (j >> 1); uint z24s8 = tile->data.depth32[y][x]; - z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 24); + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); tile->data.depth32[y][x] = z24s8; } break; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 452e51fa79..701e02b295 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -171,6 +171,13 @@ boolean softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 1d7a1fffe4..ef8c5bd6b0 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -207,12 +207,19 @@ softpipe_get_tex_surface(struct pipe_screen *screen, * done with the CPU. Let's adjust the flags to take that into * account. */ - if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) - ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE; + if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* GPU_WRITE means "render" and that can involve reads (blending) */ + ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; + } if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* Mark the surface as dirty. The tile cache will look for this. */ + spt->modified = TRUE; + } pipe_texture_reference(&ps->texture, pt); ps->face = face; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 779a9d8fc9..0e1017632c 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -47,6 +47,8 @@ struct softpipe_texture /* The data is held here: */ struct pipe_buffer *buffer; + + boolean modified; }; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 28c29da87c..2d5d2b50f5 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -37,6 +37,7 @@ #include "util/p_tile.h" #include "sp_context.h" #include "sp_surface.h" +#include "sp_texture.h" #include "sp_tile_cache.h" #define NUM_ENTRIES 32 @@ -169,6 +170,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, PIPE_BUFFER_USAGE_CPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_Z24S8_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || ps->format == PIPE_FORMAT_Z32_UNORM || ps->format == PIPE_FORMAT_S8_UNORM); @@ -505,6 +507,15 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, face, level); struct softpipe_cached_tile *tile = tc->entries + pos; + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->modified) { + /* texture was modified, force a cache reload */ + tile->x = -1; + spt->modified = FALSE; + } + } + if (tile_x != tile->x || tile_y != tile->y || z != tile->z || |