From 4da82fd5c5e0a7535e30aa81f08dcbe1a26358b7 Mon Sep 17 00:00:00 2001
From: Brian <brian.paul@tungstengraphics.com>
Date: Tue, 5 Feb 2008 14:23:34 -0700
Subject: Cell: re-enable inlined vertex buffers

Vertex data must be on a 16-byte address/offset so SIMD operations will work
properly in the SPU code.
---
 src/mesa/pipe/cell/ppu/cell_vbuf.c  | 12 +++++-------
 src/mesa/pipe/cell/spu/spu_main.c   |  3 ++-
 src/mesa/pipe/cell/spu/spu_render.c | 12 ++++++++----
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'src/mesa/pipe')

diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c
index 0fee61821a..e9fafe492e 100644
--- a/src/mesa/pipe/cell/ppu/cell_vbuf.c
+++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c
@@ -40,7 +40,7 @@
 
 
 /** Allow vertex data to be inlined after RENDER command */
-#define ALLOW_INLINE_VERTS 0
+#define ALLOW_INLINE_VERTS 1
 
 
 /**
@@ -199,9 +199,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
    {
       const uint index_bytes = ROUNDUP8(nr_indices * 2);
       const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size;
-
-      const uint batch_size = sizeof(struct cell_command_render)
-         + index_bytes;
+      const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
 
       struct cell_command_render *render
          = (struct cell_command_render *)
@@ -223,9 +221,9 @@ cell_vbuf_draw(struct vbuf_render *vbr,
       render->num_verts = nr_vertices;
       if (ALLOW_INLINE_VERTS &&
           min_index == 0 &&
-          vertex_bytes <= cell_batch_free_space(cell)) {
-         /* vertex data inlined, after indices */
-         void *dst = cell_batch_alloc(cell, vertex_bytes);
+          vertex_bytes + 16 <= cell_batch_free_space(cell)) {
+         /* vertex data inlined, after indices, at 16-byte boundary */
+         void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16);
          memcpy(dst, vertices, vertex_bytes);
          render->inline_verts = TRUE;
          render->vertex_buf = ~0;
diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c
index 4f126d5e5b..e375197fe6 100644
--- a/src/mesa/pipe/cell/spu/spu_main.c
+++ b/src/mesa/pipe/cell/spu/spu_main.c
@@ -387,7 +387,7 @@ cmd_batch(uint opcode)
                = (struct cell_command_render *) &buffer[pos];
             uint pos_incr;
             cmd_render(render, &pos_incr);
-            pos += sizeof(*render) / 8 + ((pos_incr + 1) / 2);
+            pos += pos_incr;
          }
          break;
       case CELL_CMD_RELEASE_VERTS:
@@ -541,6 +541,7 @@ main(main_param_t speid, main_param_t argp)
    (void) speid;
 
    ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
+   ASSERT(sizeof(struct cell_command_render) % 8 == 0);
 
    one_time_init();
 
diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c
index e8705eeeba..932fb500b3 100644
--- a/src/mesa/pipe/cell/spu/spu_render.c
+++ b/src/mesa/pipe/cell/spu/spu_render.c
@@ -171,6 +171,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
    ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
    const uint vertex_size = render->vertex_size; /* in bytes */
    /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
+   uint index_bytes;
    const ubyte *vertices;
    const ushort *indexes;
    uint i, j;
@@ -199,13 +200,16 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr)
 
    /* indexes are right after the render command in the batch buffer */
    indexes = (const ushort *) (render + 1);
-   *pos_incr = (render->num_indexes * 2 + 3) / 4;
+   index_bytes = ROUNDUP8(render->num_indexes * 2);
+   *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
 
 
    if (render->inline_verts) {
-      /* Vertices are right after indexes in batch buffer */
-      vertices = (const ubyte *) (render + 1) + *pos_incr * 4;
-      *pos_incr = *pos_incr + total_vertex_bytes / 4;
+      /* Vertices are after indexes in batch buffer at next 16-byte addr */
+      vertices = (const ubyte *) render + (*pos_incr * 8);
+      vertices = (const ubyte *) align_pointer((void *) vertices, 16);
+      ASSERT_ALIGN16(vertices);
+      *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
    }
    else {
       /* Begin DMA fetch of vertex buffer */
-- 
cgit v1.2.3