summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c109
2 files changed, 92 insertions, 25 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index f1866b7658..2c46e88399 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -94,7 +94,6 @@ struct tgsi_exec_labels
#define TGSI_EXEC_NUM_TEMPS 128
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 6
#define TGSI_EXEC_NUM_IMMEDIATES 256
/*
@@ -162,9 +161,14 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_MASK_C 2
+/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
+#define TGSI_EXEC_NUM_TEMP_R 4
+
+#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
+#define TGSI_EXEC_NUM_ADDRS 1
+#define TGSI_EXEC_NUM_TEMP_EXTRAS 9
-#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
#define TGSI_EXEC_MAX_COND_NESTING 20
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ba2bfdef06..5084befc4e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -520,7 +520,7 @@ emit_coef_dady(
* that the stack pointer is 16 byte aligned, as expected.
*/
static void
-emit_func_call_dst(
+emit_func_call(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
@@ -534,11 +534,6 @@ emit_func_call_dst(
xmm_mask = (1 << xmm_save) - 1;
xmm_mask &= ~(1 << xmm_dst);
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
-
x86_push(
func,
x86_make_reg( file_REG32, reg_AX) );
@@ -549,6 +544,8 @@ emit_func_call_dst(
func,
x86_make_reg( file_REG32, reg_DX) );
+ /* Store XMM regs to the stack
+ */
for(i = 0, n = 0; i < 8; ++i)
if(xmm_mask & (1 << i))
++n;
@@ -567,16 +564,25 @@ emit_func_call_dst(
++n;
}
+ /* Load the address of the buffer we use for passing arguments and
+ * receiving results:
+ */
x86_lea(
func,
ecx,
get_temp( TEMP_R0, 0 ) );
+ /* Push actual function arguments (currently just the pointer to
+ * the buffer above), and call the function:
+ */
x86_push( func, ecx );
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
x86_pop(func, ecx );
+
+ /* Pop the saved XMM regs:
+ */
for(i = 0, n = 0; i < 8; ++i)
if(xmm_mask & (1 << i)) {
sse_movups(
@@ -602,6 +608,31 @@ emit_func_call_dst(
x86_pop(
func,
x86_make_reg( file_REG32, reg_AX) );
+}
+
+
+static void
+emit_func_call_dst_src1(
+ struct x86_function *func,
+ unsigned xmm_save,
+ unsigned xmm_dst,
+ unsigned xmm_src0,
+ void (PIPE_CDECL *code)() )
+{
+ /* Store our input parameters (in xmm regs) to the buffer we use
+ * for passing arguments. We will pass a pointer to this buffer as
+ * the actual function argument.
+ */
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, 0 ),
+ make_xmm( xmm_src0 ) );
+
+ emit_func_call(
+ func,
+ xmm_save,
+ xmm_dst,
+ code );
sse_movaps(
func,
@@ -609,27 +640,49 @@ emit_func_call_dst(
get_temp( TEMP_R0, 0 ) );
}
+
static void
-emit_func_call_dst_src(
+emit_func_call_dst_src2(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src,
+ unsigned xmm_src0,
+ unsigned xmm_src1,
void (PIPE_CDECL *code)() )
{
+ /* Store two inputs to parameter buffer.
+ */
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, 0 ),
+ make_xmm( xmm_src0 ) );
+
sse_movaps(
func,
get_temp( TEMP_R0, 1 ),
- make_xmm( xmm_src ) );
+ make_xmm( xmm_src1 ) );
- emit_func_call_dst(
+
+ /* Emit the call
+ */
+ emit_func_call(
func,
xmm_save,
xmm_dst,
code );
+
+ /* Retrieve the results:
+ */
+ sse_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ get_temp( TEMP_R0, 0 ) );
}
+
+
+
#if defined(PIPE_ARCH_SSE)
/*
@@ -782,10 +835,11 @@ emit_cos(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
cos4f );
}
@@ -812,10 +866,11 @@ emit_ex2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
ex24f );
}
@@ -857,10 +912,11 @@ emit_flr(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
flr4f );
}
@@ -880,10 +936,11 @@ emit_frc(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
frc4f );
}
@@ -910,10 +967,11 @@ emit_lg2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
lg24f );
}
@@ -975,13 +1033,15 @@ emit_pow(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src )
+ unsigned xmm_src0,
+ unsigned xmm_src1 )
{
- emit_func_call_dst_src(
+ emit_func_call_dst_src2(
func,
xmm_save,
xmm_dst,
- xmm_src,
+ xmm_src0,
+ xmm_src1,
pow4f );
}
@@ -1017,10 +1077,11 @@ emit_rnd(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
rnd4f );
}
@@ -1099,10 +1160,11 @@ emit_sgn(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sgn4f );
}
@@ -1121,10 +1183,11 @@ emit_sin (struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst)
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sin4f );
}
@@ -1573,7 +1636,7 @@ emit_instruction(
get_temp(
TGSI_EXEC_TEMP_MINUS_128_I,
TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 3, 1, 2 );
+ emit_pow( func, 3, 1, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
sse_xorps(
func,
@@ -1917,7 +1980,7 @@ emit_instruction(
/* TGSI_OPCODE_POW */
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 0, 1 );
+ emit_pow( func, 0, 0, 0, 1 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}