summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorBrian <brian.paul@tungstengraphics.com>2008-11-09 09:36:22 -0700
committerBrian <brian.paul@tungstengraphics.com>2008-11-09 09:36:22 -0700
commit399da3a337932c6074a69ac73e711138271308eb (patch)
tree4d91410e455989954ac4879af3b22414a0dbdf64 /src/gallium/auxiliary
parenta58dbf34ca88656739a8f8e5f4259e760365c9d0 (diff)
gallium: use PIPE_ARCH_SSE to protect use of SSE instrinsics only
This allows us to use SSE codegen with debug builds again. When PIPE_ARCH_SSE is set (w/ gcc -msse -msse2) we will also use the gcc SSE intrinsic functions.
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c42
2 files changed, 34 insertions, 10 deletions
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 0e2036f12a..77ba5152f9 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -37,7 +37,7 @@
#include "draw_vs.h"
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
#include "pipe/p_shader_tokens.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 3ce2c1c27b..f93db18114 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -27,12 +27,14 @@
#include "pipe/p_config.h"
-#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_X86)
#include "pipe/p_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#if defined(PIPE_ARCH_SSE)
#include "util/u_sse.h"
+#endif
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -627,6 +629,9 @@ emit_func_call_dst_src(
code );
}
+
+#if defined(PIPE_ARCH_SSE)
+
/*
* Fast SSE2 implementation of special math functions.
*/
@@ -678,6 +683,7 @@ exp2f4(__m128 x)
return _mm_mul_ps(expipart, expfpart);
}
+
/**
* See http://www.devmaster.net/forums/showthread.php?p=43580
*/
@@ -720,12 +726,16 @@ log2f4(__m128 x)
return _mm_add_ps(logmant, exp);
}
+
static INLINE __m128
powf4(__m128 x, __m128 y)
{
return exp2f4(_mm_mul_ps(log2f4(x), y));
}
+#endif /* PIPE_ARCH_SSE */
+
+
/**
* Low-level instruction translators.
@@ -780,13 +790,20 @@ emit_cos(
}
static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
__attribute__((force_align_arg_pointer))
#endif
ex24f(
float *store )
{
+#if defined(PIPE_ARCH_SSE)
_mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) ));
+#else
+ store[0] = util_fast_exp2( store[0] );
+ store[1] = util_fast_exp2( store[1] );
+ store[2] = util_fast_exp2( store[2] );
+ store[3] = util_fast_exp2( store[3] );
+#endif
}
static void
@@ -871,13 +888,20 @@ emit_frc(
}
static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
__attribute__((force_align_arg_pointer))
#endif
lg24f(
float *store )
{
+#if defined(PIPE_ARCH_SSE)
_mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) ));
+#else
+ store[0] = util_fast_log2( store[0] );
+ store[1] = util_fast_log2( store[1] );
+ store[2] = util_fast_log2( store[2] );
+ store[3] = util_fast_log2( store[3] );
+#endif
}
static void
@@ -930,19 +954,19 @@ emit_neg(
}
static void PIPE_CDECL
-#if defined(PIPE_CC_GCC)
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE)
__attribute__((force_align_arg_pointer))
#endif
pow4f(
float *store )
{
-#if 1
+#if defined(PIPE_ARCH_SSE)
_mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) ));
#else
- store[0] = powf( store[0], store[4] );
- store[1] = powf( store[1], store[5] );
- store[2] = powf( store[2], store[6] );
- store[3] = powf( store[3], store[7] );
+ store[0] = util_fast_pow( store[0], store[4] );
+ store[1] = util_fast_pow( store[1], store[5] );
+ store[2] = util_fast_pow( store[2], store[6] );
+ store[3] = util_fast_pow( store[3], store[7] );
#endif
}