util: Fix util_fast_pow/exp2/log2.

- Use a lookup table for log2. - Compute (float) (1 << ipart) by tweaking with the exponent directly to avoid integer overflow and float conversion. - Also table negative exponents to avoid float division and branching. - Implement util_fast_exp as function of util_fast_exp2.
author: José Fonseca <jrfonseca@tungstengraphics.com> 2008-10-01 01:13:40 +0900
committer: José Fonseca <jrfonseca@tungstengraphics.com> 2008-10-01 01:13:40 +0900
commit: 8415d06d90a197e16554dab98d160334fd9f9f93 (patch)
tree: 25d004929ecac18de0e4b8bf4180bb2b33ba45f7 /src
parent: 5dc8e67078be8b8c42a809311debd275ac7d64a7 (diff)
2 files changed, 64 insertions, 69 deletions
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index 0729114d6a..5b3cab4642 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -30,7 +30,7 @@
 #include "util/u_math.h"
 
 
-
+/** 2^x, for x in [-1.0, 1.0[ */
 float pow2_table[POW2_TABLE_SIZE];
 
 
@@ -38,9 +38,21 @@ static void
 init_pow2_table(void)
 {
    int i;
-   for (i = 0; i < POW2_TABLE_SIZE; i++) {
-      pow2_table[i] = (float) pow(2.0, i / POW2_TABLE_SCALE);
-   }
+   for (i = 0; i < POW2_TABLE_SIZE; i++)
+      pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
+}
+
+
+/** log2(x), for x in [1.0, 2.0[ */
+float log2_table[LOG2_TABLE_SIZE];
+
+
+static void 
+init_log2_table(void)
+{
+   unsigned i;
+   for (i = 0; i < LOG2_TABLE_SIZE; i++)
+      log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE));
 }
 
 
@@ -53,6 +65,7 @@ util_init_math(void)
    static boolean initialized = FALSE;
    if (!initialized) {
       init_pow2_table();
+      init_log2_table();
       initialized = TRUE;
    }
 }
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 084655e6dd..be7303e550 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -174,8 +174,10 @@ static INLINE float logf( float f )
 
 
 
-#define POW2_TABLE_SIZE 256
-#define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1))
+#define POW2_TABLE_SIZE_LOG2 9
+#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
+#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
+#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
 extern float pow2_table[POW2_TABLE_SIZE];
 
 
@@ -186,98 +188,78 @@ util_init_math(void);
 
 union fi {
    float f;
-   int i;
-   unsigned ui;
+   int32_t i;
+   uint32_t ui;
 };
 
 
 /**
- * Fast approximation to exp(x).
- * Compute with base 2 exponents:  exp(x) = exp2(log2(e) * x)
- * Note: log2(e) is a constant, k = 1.44269
- * So, exp(x) = exp2(k * x);
+ * Fast version of 2^x
  * Identity: exp2(a + b) = exp2(a) * exp2(b)
- * Let ipart = int(k*x)
- * Let fpart = k*x - ipart;
- * So, exp2(k*x) = exp2(ipart) * exp2(fpart)
+ * Let ipart = int(x)
+ * Let fpart = x - ipart;
+ * So, exp2(x) = exp2(ipart) * exp2(fpart)
  * Compute exp2(ipart) with i << ipart
  * Compute exp2(fpart) with lookup table.
  */
 static INLINE float
-util_fast_exp(float x)
+util_fast_exp2(float x)
 {
-   if (x >= 0.0f) {
-      float k = 1.44269f; /* = log2(e) */
-      float kx = k * x;
-      int ipart = (int) kx;
-      float fpart = kx - (float) ipart;
-      float y = (float) (1 << ipart)
-         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
-      return y;
-   }
-   else {
-      /* exp(-x) = 1.0 / exp(x) */
-      float k = -1.44269f;
-      float kx = k * x;
-      int ipart = (int) kx;
-      float fpart = kx - (float) ipart;
-      float y = (float) (1 << ipart)
-         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
-      return 1.0f / y;
-   }
+   int32_t ipart;
+   float fpart, mpart;
+   union fi epart;
+   
+   if(x > 129.00000f)
+      return 3.402823466e+38f;
+   
+   if(x < -126.99999f)
+      return 0.0f;
+
+   ipart = (int32_t) x;
+   fpart = x - (float) ipart;
+   
+   /* same as
+    *   epart.f = (float) (1 << ipart)
+    * but faster and without integer overflow for ipart > 31 */
+   epart.i = (ipart + 127 ) << 23;
+   
+   mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
+   
+   return epart.f * mpart;
 }
 
 
 /**
- * Fast version of 2^x
- * XXX the above function could be implemented in terms of this one.
+ * Fast approximation to exp(x).
  */
 static INLINE float
-util_fast_exp2(float x)
+util_fast_exp(float x)
 {
-   if (x >= 0.0f) {
-      int ipart = (int) x;
-      float fpart = x - (float) ipart;
-      float y = (float) (1 << ipart)
-         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
-      return y;
-   }
-   else {
-      /* exp(-x) = 1.0 / exp(x) */
-      int ipart = (int) -x;
-      float fpart = -x - (float) ipart;
-      float y = (float) (1 << ipart)
-         * pow2_table[(int) (fpart * POW2_TABLE_SCALE)];
-      return 1.0f / y;
-   }
+   const float k = 1.44269f; /* = log2(e) */
+   return util_fast_exp2(k * x);
 }
 
 
-/**
- * Based on code from http://www.flipcode.com/archives/Fast_log_Function.shtml
- */
+#define LOG2_TABLE_SIZE_LOG2 8
+#define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2)
+extern float log2_table[LOG2_TABLE_SIZE];
+
+
 static INLINE float
-util_fast_log2(float val)
+util_fast_log2(float x)
 {
    union fi num;
-   int log_2;
-   num.f = val;
-   log_2 = ((num.i >> 23) & 255) - 128;
-   num.i &= ~(255 << 23);
-   num.i += 127 << 23;
-   num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
-   return num.f + log_2;
+   float epart, mpart;
+   num.f = x;
+   epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
+   mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)];
+   return epart + mpart;
 }
 
 
 static INLINE float
 util_fast_pow(float x, float y)
 {
-   /* XXX these tests may need adjustment */
-   if (y >= 3.0f && (-0.02f <= x && x <= 0.02f))
-      return 0.0f;
-   if (y >= 50.0f && (-0.9f <= x && x <= 0.9f))
-      return 0.0f;
    return util_fast_exp2(util_fast_log2(x) * y);
 }
author	José Fonseca <jrfonseca@tungstengraphics.com>	2008-10-01 01:13:40 +0900
committer	José Fonseca <jrfonseca@tungstengraphics.com>	2008-10-01 01:13:40 +0900
commit	8415d06d90a197e16554dab98d160334fd9f9f93 (patch)
tree	25d004929ecac18de0e4b8bf4180bb2b33ba45f7 /src
parent	5dc8e67078be8b8c42a809311debd275ac7d64a7 (diff)