diff options
author | Brian Paul <brian.paul@tungstengraphics.com> | 2008-04-18 11:15:18 -0600 |
---|---|---|
committer | Brian Paul <brian.paul@tungstengraphics.com> | 2008-04-18 11:46:00 -0600 |
commit | 19218e2195f3dffc9403f16a742ba8c63edbf8b4 (patch) | |
tree | 161803e974ee2b7147646575dd6fc578fa439c3b /src | |
parent | 26c27f6636069ca849a740c3969c577d841484e2 (diff) |
gallium: implement recip sqrt() with C code for now.
Some conformance lighting tests fail with the SSE rsqrt instruction.
Diffstat (limited to 'src')
-rwxr-xr-x | src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 0a3a7559ca..6f785be3f5 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,6 +36,8 @@ #if defined(__i386__) || defined(__386__) +#define HIGH_PRECISION 1 /* for 1/sqrt() */ + #define DUMP_SSE 0 #if DUMP_SSE @@ -1137,16 +1139,44 @@ emit_rcp ( make_xmm( xmm_src ) ); } +#if HIGH_PRECISION +static void XSTDCALL +rsqrt4f( + float *store ) +{ +#ifdef WIN32 + store[0] = 1.0F / (float) sqrt( (double) store[0] ); + store[1] = 1.0F / (float) sqrt( (double) store[1] ); + store[2] = 1.0F / (float) sqrt( (double) store[2] ); + store[3] = 1.0F / (float) sqrt( (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = 1.0F / sqrt( store[X + 0] ); + store[X + 1] = 1.0F / sqrt( store[X + 1] ); + store[X + 2] = 1.0F / sqrt( store[X + 2] ); + store[X + 3] = 1.0F / sqrt( store[X + 3] ); +#endif +} +#endif + static void emit_rsqrt( struct x86_function *func, unsigned xmm_dst, unsigned xmm_src ) { +#if HIGH_PRECISION + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + rsqrt4f ); +#else emit_rsqrtps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); +#endif } static void |