/************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" #include "draw_vertex.h" #include "rtasm/rtasm_x86sse.h" #ifdef PIPE_ARCH_X86 /* Note - don't yet have to worry about interacting with the code in * draw_vs_aos.c as there is no intermingling of generated code... * That may have to change, we'll see. */ static void emit_load_R32G32B32A32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { sse_movups(cp->func, data, src_ptr); } static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { #if 1 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); /* data = z ? ? ? */ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); /* data = z ? 0 1 */ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); /* data = ? 0 z 1 */ sse_movlps(cp->func, data, src_ptr); /* data = x y z 1 */ #else sse_movups(cp->func, data, src_ptr); /* data = x y z ? */ sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) ); /* data = ? x y z */ sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) ); /* data = 1 x y z */ sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) ); /* data = x y z 1 */ #endif } static void emit_load_R32G32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); sse_movlps(cp->func, data, src_ptr); } static void emit_load_R32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); } static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); sse2_cvtdq2ps(cp->func, data, data); sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); } /* Extended swizzles? Maybe later. */ static void emit_swizzle( struct aos_compilation *cp, struct x86_reg dest, struct x86_reg src, ubyte shuffle ) { sse_shufps(cp->func, dest, src, shuffle); } static boolean get_buffer_ptr( struct aos_compilation *cp, boolean linear, unsigned buf_idx, struct x86_reg elt, struct x86_reg ptr) { struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), buf_idx * sizeof(struct aos_buffer)); struct x86_reg buf_stride = x86_make_disp(buf, Offset(struct aos_buffer, stride)); if (linear) { struct x86_reg buf_ptr = x86_make_disp(buf, Offset(struct aos_buffer, ptr)); /* Calculate pointer to current attrib: */ x86_mov(cp->func, ptr, buf_ptr); x86_mov(cp->func, elt, buf_stride); x86_add(cp->func, elt, ptr); if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192)); x86_mov(cp->func, buf_ptr, elt); } else { struct x86_reg buf_base_ptr = x86_make_disp(buf, Offset(struct aos_buffer, base_ptr)); /* Calculate pointer to current attrib: */ x86_mov(cp->func, ptr, buf_stride); x86_imul(cp->func, ptr, elt); x86_add(cp->func, ptr, buf_base_ptr); } cp->insn_counter++; return TRUE; } static boolean load_input( struct aos_compilation *cp, unsigned idx, struct x86_reg bufptr ) { unsigned format = cp->vaos->base.key.element[idx].in.format; unsigned offset = cp->vaos->base.key.element[idx].in.offset; struct x86_reg dataXMM = aos_get_xmm_reg(cp); /* Figure out source pointer address: */ struct x86_reg src = x86_make_disp(bufptr, offset); aos_adopt_xmm_reg( cp, dataXMM, TGSI_FILE_INPUT, idx, TRUE ); switch (format) { case PIPE_FORMAT_R32_FLOAT: emit_load_R32(cp, dataXMM, src); break; case PIPE_FORMAT_R32G32_FLOAT: emit_load_R32G32(cp, dataXMM, src); break; case PIPE_FORMAT_R32G32B32_FLOAT: emit_load_R32G32B32(cp, dataXMM, src); break; case PIPE_FORMAT_R32G32B32A32_FLOAT: emit_load_R32G32B32A32(cp, dataXMM, src); break; case PIPE_FORMAT_B8G8R8A8_UNORM: emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); break; case PIPE_FORMAT_R8G8B8A8_UNORM: emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); break; default: AOS_ERROR(cp, "unhandled input format"); return FALSE; } return TRUE; } static boolean load_inputs( struct aos_compilation *cp, unsigned buffer, struct x86_reg ptr ) { unsigned i; for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { if (cp->vaos->base.key.element[i].in.buffer == buffer) { if (!load_input( cp, i, ptr )) return FALSE; cp->insn_counter++; } } return TRUE; } boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) { unsigned i; for (i = 0; i < cp->vaos->nr_vb; i++) { struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), i * sizeof(struct aos_buffer)); struct x86_reg buf_base_ptr = x86_make_disp(buf, Offset(struct aos_buffer, base_ptr)); if (cp->vaos->base.key.const_vbuffers & (1<<i)) { struct x86_reg ptr = cp->tmp_EAX; x86_mov(cp->func, ptr, buf_base_ptr); /* Load all inputs for this constant vertex buffer */ load_inputs( cp, i, x86_deref(ptr) ); /* Then just force them out to aos_machine.input[] */ aos_spill_all( cp ); } else if (linear) { struct x86_reg elt = cp->idx_EBX; struct x86_reg ptr = cp->tmp_EAX; struct x86_reg buf_stride = x86_make_disp(buf, Offset(struct aos_buffer, stride)); struct x86_reg buf_ptr = x86_make_disp(buf, Offset(struct aos_buffer, ptr)); /* Calculate pointer to current attrib: */ x86_mov(cp->func, ptr, buf_stride); x86_imul(cp->func, ptr, elt); x86_add(cp->func, ptr, buf_base_ptr); /* In the linear case, keep the buffer pointer instead of the * index number. */ if (cp->vaos->nr_vb == 1) x86_mov( cp->func, elt, ptr ); else x86_mov( cp->func, buf_ptr, ptr ); cp->insn_counter++; } } return TRUE; } boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) { unsigned j; for (j = 0; j < cp->vaos->nr_vb; j++) { if (cp->vaos->base.key.const_vbuffers & (1<<j)) { /* just retreive pre-transformed input */ } else if (linear && cp->vaos->nr_vb == 1) { load_inputs( cp, 0, cp->idx_EBX ); } else { struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); struct x86_reg ptr = cp->tmp_EAX; if (!get_buffer_ptr( cp, linear, j, elt, ptr )) return FALSE; if (!load_inputs( cp, j, ptr )) return FALSE; } } return TRUE; } boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) { if (linear && cp->vaos->nr_vb == 1) { struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), (0 * sizeof(struct aos_buffer) + Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192)); } else if (linear) { /* Nothing to do */ } else { x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); } return TRUE; } static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { sse_movups(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { sse_movlps(cp->func, dst_ptr, dataXMM); sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM); } static void emit_store_R32G32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { sse_movlps(cp->func, dst_ptr, dataXMM); } static void emit_store_R32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { sse_movss(cp->func, dst_ptr, dataXMM); } static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255)); sse2_cvtps2dq(cp->func, dataXMM, dataXMM); sse2_packssdw(cp->func, dataXMM, dataXMM); sse2_packuswb(cp->func, dataXMM, dataXMM); sse_movss(cp->func, dst_ptr, dataXMM); } static boolean emit_output( struct aos_compilation *cp, struct x86_reg ptr, struct x86_reg dataXMM, unsigned format ) { switch (format) { case EMIT_1F: case EMIT_1F_PSIZE: emit_store_R32(cp, ptr, dataXMM); break; case EMIT_2F: emit_store_R32G32(cp, ptr, dataXMM); break; case EMIT_3F: emit_store_R32G32B32(cp, ptr, dataXMM); break; case EMIT_4F: emit_store_R32G32B32A32(cp, ptr, dataXMM); break; case EMIT_4UB: if (1) { emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); } else { emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); } break; default: AOS_ERROR(cp, "unhandled output format"); return FALSE; } return TRUE; } boolean aos_emit_outputs( struct aos_compilation *cp ) { unsigned i; for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { unsigned format = cp->vaos->base.key.element[i].out.format; unsigned offset = cp->vaos->base.key.element[i].out.offset; unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; struct x86_reg data; if (format == EMIT_1F_PSIZE) { data = aos_get_internal_xmm( cp, IMM_PSIZE ); } else { data = aos_get_shader_reg( cp, TGSI_FILE_OUTPUT, vs_output ); } if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); sse_movaps(cp->func, tmp, data); data = tmp; } if (!emit_output( cp, x86_make_disp( cp->outbuf_ECX, offset ), data, format )) return FALSE; aos_release_xmm_reg( cp, data.idx ); cp->insn_counter++; } return TRUE; } #endif