diff options
author | Brian <brian@yutani.localnet.net> | 2006-12-13 14:49:41 -0700 |
---|---|---|
committer | Brian <brian@yutani.localnet.net> | 2006-12-13 14:49:41 -0700 |
commit | 8627bf14524a85cedc3d1794fce9f562fd12bf79 (patch) | |
tree | c7747789bb8dae6a2508a76e2cfd52a7c9703b73 /src | |
parent | aff8e204d205b5d424d2c39a5d9e004caaa1eab1 (diff) |
Rewrite/simplify most built-in functions to use updated set of __asm instructions.
Diffstat (limited to 'src')
-rwxr-xr-x | src/mesa/shader/slang/library/slang_builtin_vec4.gc | 20 | ||||
-rwxr-xr-x | src/mesa/shader/slang/library/slang_common_builtin.gc | 1504 | ||||
-rwxr-xr-x | src/mesa/shader/slang/library/slang_core.gc | 251 |
3 files changed, 1105 insertions, 670 deletions
diff --git a/src/mesa/shader/slang/library/slang_builtin_vec4.gc b/src/mesa/shader/slang/library/slang_builtin_vec4.gc index d549c0133a..f075a886bd 100755 --- a/src/mesa/shader/slang/library/slang_builtin_vec4.gc +++ b/src/mesa/shader/slang/library/slang_builtin_vec4.gc @@ -181,10 +181,10 @@ float dot (vec3 v, vec3 u) { return v4.x; } -float dot (vec4 v, vec4 u) { - __asm vec4_dot v, u; - return v.x; -} +//float dot (vec4 v, vec4 u) { +// __asm vec4_dot v, u; +// return v.x; +//} float length (vec3 v) { @@ -199,14 +199,10 @@ float length (vec4 v) { } -vec3 normalize (vec3 v) { - vec4 u = vec4 (v, 0.0); - vec4 w = u; - __asm vec4_dot u, u; - float l = sqrt (u.x); - __asm float_to_vec4 u, l; - __asm vec4_divide w, u; - return w.xyz; +vec3 normalize (vec3 v) +{ + float s = inversesqrt(dot(v,v)); + __retVal = v * s; } vec4 normalize (vec4 v) { diff --git a/src/mesa/shader/slang/library/slang_common_builtin.gc b/src/mesa/shader/slang/library/slang_common_builtin.gc index 768cef5474..0e94979d92 100755 --- a/src/mesa/shader/slang/library/slang_common_builtin.gc +++ b/src/mesa/shader/slang/library/slang_common_builtin.gc @@ -26,6 +26,8 @@ // From Shader Spec, ver. 1.10, rev. 59 // +//bp: XXX these will probably go away since the value needs to be +//determined at runtime and may vary from one GLcontext to another... const int gl_MaxLights = 8; const int gl_MaxClipPlanes = 6; const int gl_MaxTextureUnits = 8; @@ -155,128 +157,159 @@ struct gl_FogParameters { uniform gl_FogParameters gl_Fog; + + + + // // 8.1 Angle and Trigonometry Functions // -float radians (float deg) { - return 3.141593 * deg / 180.0; +//// radians + +float radians(const float deg) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal.x, deg, c; } -vec2 radians (vec2 deg) { - return vec2 (3.141593) * deg / vec2 (180.0); +vec2 radians(const vec2 deg) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal.xy, deg.xy, c.xx; } -vec3 radians (vec3 deg) { - return vec3 (3.141593) * deg / vec3 (180.0); +vec3 radians(const vec3 deg) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal.xyz, deg.xyz, c.xxx; } -vec4 radians (vec4 deg) { - return vec4 (3.141593) * deg / vec4 (180.0); +vec4 radians(const vec4 deg) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal, deg, c.xxxx; } -float degrees (float rad) { - return 180.0 * rad / 3.141593; + +//// degrees + +float degrees(const float rad) +{ + const float c = 180.0 / 3.1415926; + __asm vec4_multiply __retVal.x, rad, c; } -vec2 degrees (vec2 rad) { - return vec2 (180.0) * rad / vec2 (3.141593); +vec2 degrees(const vec2 rad) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal.xy, rad.xy, c.xx; } -vec3 degrees (vec3 rad) { - return vec3 (180.0) * rad / vec3 (3.141593); +vec3 degrees(const vec3 rad) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal.xyz, rad.xyz, c.xxx; } -vec4 degrees (vec4 rad) { - return vec4 (180.0) * rad / vec4 (3.141593); +vec4 degrees(const vec4 rad) +{ + const float c = 3.1415926 / 180.0; + __asm vec4_multiply __retVal, rad, c.xxxx; } -float sin (float angle) { - float x; - __asm float_sine x, angle; - return x; + +//// sin + +float sin(const float radians) +{ + __asm float_sine __retVal.x, radians; } -vec2 sin (vec2 angle) { - return vec2 ( - sin (angle.x), - sin (angle.y) - ); +vec2 sin(const vec2 radians) +{ + __asm float_sine __retVal.x, radians.x; + __asm float_sine __retVal.y, radians.y; } -vec3 sin (vec3 angle) { - return vec3 ( - sin (angle.x), - sin (angle.y), - sin (angle.z) - ); +vec3 sin(const vec3 radians) +{ + __asm float_sine __retVal.x, radians.x; + __asm float_sine __retVal.y, radians.y; + __asm float_sine __retVal.z, radians.z; } -vec4 sin (vec4 angle) { - return vec4 ( - sin (angle.x), - sin (angle.y), - sin (angle.z), - sin (angle.w) - ); +vec4 sin(const vec4 radians) +{ + __asm float_sine __retVal.x, radians.x; + __asm float_sine __retVal.y, radians.y; + __asm float_sine __retVal.z, radians.z; + __asm float_sine __retVal.w, radians.w; } -float cos (float angle) { - return sin (angle + 1.5708); + +//// cos + +float cos(const float radians) +{ + __asm float_cosine __retVal.x, radians; } -vec2 cos (vec2 angle) { - return vec2 ( - cos (angle.x), - cos (angle.y) - ); +vec2 cos(const vec2 radians) +{ + __asm float_cosine __retVal.x, radians.x; + __asm float_cosine __retVal.y, radians.y; } -vec3 cos (vec3 angle) { - return vec3 ( - cos (angle.x), - cos (angle.y), - cos (angle.z) - ); +vec3 cos(const vec3 radians) +{ + __asm float_cosine __retVal.x, radians.x; + __asm float_cosine __retVal.y, radians.y; + __asm float_cosine __retVal.z, radians.z; } -vec4 cos (vec4 angle) { - return vec4 ( - cos (angle.x), - cos (angle.y), - cos (angle.z), - cos (angle.w) - ); +vec4 cos(const vec4 radians) +{ + __asm float_cosine __retVal.x, radians.x; + __asm float_cosine __retVal.y, radians.y; + __asm float_cosine __retVal.z, radians.z; + __asm float_cosine __retVal.w, radians.w; } -float tan (float angle) { - return sin (angle) / cos (angle); + + +//// tan + +float tan(const float angle) +{ + const float s = sin(angle); + const float c = cos(angle); + return s / c; } -vec2 tan (vec2 angle) { - return vec2 ( - tan (angle.x), - tan (angle.y) - ); +vec2 tan(const vec2 angle) +{ + const vec2 s = sin(angle); + const vec2 c = cos(angle); + return s / c; } -vec3 tan (vec3 angle) { - return vec3 ( - tan (angle.x), - tan (angle.y), - tan (angle.z) - ); +vec3 tan(const vec3 angle) +{ + const vec3 s = sin(angle); + const vec3 c = cos(angle); + return s / c; } -vec4 tan (vec4 angle) { - return vec4 ( - tan (angle.x), - tan (angle.y), - tan (angle.z), - tan (angle.w) - ); +vec4 tan(const vec4 angle) +{ + const vec4 s = sin(angle); + const vec4 c = cos(angle); + return s / c; } + + float asin (float x) { float y; __asm float_arcsine y, x; @@ -404,675 +437,884 @@ vec4 atan (vec4 u, vec4 v) { // 8.2 Exponential Functions // -float pow (float x, float y) { - float p; - __asm float_power p, x, y; - return p; +//// pow + +float pow(const float a, const float b) +{ + __asm float_power __retVal.x, a, b; } -vec2 pow (vec2 v, vec2 u) { - return vec2 ( - pow (v.x, u.x), - pow (v.y, u.y) - ); +vec2 pow(const vec2 a, const vec2 b) +{ + __asm float_power __retVal.x, a.x, b.x; + __asm float_power __retVal.y, a.y, b.y; } -vec3 pow (vec3 v, vec3 u) { - return vec3 ( - pow (v.x, u.x), - pow (v.y, u.y), - pow (v.z, u.z) - ); +vec3 pow(const vec3 a, const vec3 b) +{ + __asm float_power __retVal.x, a.x, b.x; + __asm float_power __retVal.y, a.y, b.y; + __asm float_power __retVal.z, a.z, b.z; } -vec4 pow (vec4 v, vec4 u) { - return vec4 ( - pow (v.x, u.x), - pow (v.y, u.y), - pow (v.z, u.z), - pow (v.w, u.w) - ); +vec4 pow(const vec4 a, const vec4 b) +{ + __asm float_power __retVal.x, a.x, b.x; + __asm float_power __retVal.y, a.y, b.y; + __asm float_power __retVal.z, a.z, b.z; + __asm float_power __retVal.w, a.w, b.w; } -float exp (float x) { - return pow (2.71828183, x); + +//// exp + +float exp(const float a) +{ + __asm float_exp __retVal.x, a; } -vec2 exp (vec2 v) { - return pow (vec2 (2.71828183), v); +vec2 exp(const vec2 a) +{ + __asm float_exp __retVal.x, a.x; + __asm float_exp __retVal.y, a.y; } -vec3 exp (vec3 v) { - return pow (vec3 (2.71828183), v); +vec3 exp(const vec3 a) +{ + __asm float_exp __retVal.x, a.x; + __asm float_exp __retVal.y, a.y; + __asm float_exp __retVal.z, a.z; } -vec4 exp (vec4 v) { - return pow (vec4 (2.71828183), v); +vec4 exp(const vec4 a) +{ + __asm float_exp __retVal.x, a.x; + __asm float_exp __retVal.y, a.y; + __asm float_exp __retVal.z, a.z; + __asm float_exp __retVal.w, a.w; } -float log2 (float x) { - float y; - __asm float_log2 y, x; - return y; + + +//// log2 + +float log2(const float x) +{ + __asm float_log2 __retVal.x, x; } -vec2 log2 (vec2 v) { - return vec2 ( - log2 (v.x), - log2 (v.y) - ); +vec2 log2(const vec2 v) +{ + __asm float_log2 __retVal.x, v.x; + __asm float_log2 __retVal.y, v.y; } -vec3 log2 (vec3 v) { - return vec3 ( - log2 (v.x), - log2 (v.y), - log2 (v.z) - ); +vec3 log2(const vec3 v) +{ + __asm float_log2 __retVal.x, v.x; + __asm float_log2 __retVal.y, v.y; + __asm float_log2 __retVal.z, v.z; } -vec4 log2 (vec4 v) { - return vec4 ( - log2 (v.x), - log2 (v.y), - log2 (v.z), - log2 (v.w) - ); +vec4 log2(const vec4 v) +{ + __asm float_log2 __retVal.x, v.x; + __asm float_log2 __retVal.y, v.y; + __asm float_log2 __retVal.z, v.z; + __asm float_log2 __retVal.w, v.w; } -float log (float x) { - return log2 (x) / log2 (2.71828183); + +//// log (natural log) + +float log(const float x) +{ + // note: logBaseB(x) = logBaseN(x) / logBaseN(B) + // compute log(x) = log2(x) / log2(e) + // c = 1.0 / log2(e) = 0.693147181 + const float c = 0.693147181; + return log2(x) * c; } -vec2 log (vec2 v) { - return log2 (v) / log2 (vec2 (2.71828183)); +vec2 log(const vec2 v) +{ + const float c = 0.693147181; + return log2(v) * c; } -vec3 log (vec3 v) { - return log2 (v) / log2 (vec3 (2.71828183)); +vec3 log(const vec3 v) +{ + const float c = 0.693147181; + return log2(v) * c; } -vec4 log (vec4 v) { - return log2 (v) / log2 (vec4 (2.71828183)); +vec4 log(const vec4 v) +{ + const float c = 0.693147181; + return log2(v) * c; } -float exp2 (float x) { - return pow (2.0, x); + +//// exp2 + +float exp2(const float a) +{ + __asm float_exp2 __retVal.x, a; } -vec2 exp2 (vec2 v) { - return pow (vec2 (2.0), v); +vec2 exp2(const vec2 a) +{ + __asm float_exp2 __retVal.x, a.x; + __asm float_exp2 __retVal.y, a.y; } -vec3 exp2 (vec3 v) { - return pow (vec3 (2.0), v); +vec3 exp2(const vec3 a) +{ + __asm float_exp2 __retVal.x, a.x; + __asm float_exp2 __retVal.y, a.y; + __asm float_exp2 __retVal.z, a.z; } -vec4 exp2 (vec4 v) { - return pow (vec4 (2.0), v); +vec4 exp2(const vec4 a) +{ + __asm float_exp2 __retVal.x, a.x; + __asm float_exp2 __retVal.y, a.y; + __asm float_exp2 __retVal.z, a.z; + __asm float_exp2 __retVal.w, a.w; } -float sqrt (float x) { - return pow (x, 0.5); + +//// sqrt + +float sqrt(const float x) +{ + float r; + __asm float_rsq r, x; + __asm float_rcp __retVal.x, r; } -vec2 sqrt (vec2 v) { - return pow (v, vec2 (0.5)); +vec2 sqrt(const vec2 v) +{ + float r; + __asm float_rsq r, v.x; + __asm float_rcp __retVal.x, r; + __asm float_rsq r, v.y; + __asm float_rcp __retVal.y, r; } -vec3 sqrt (vec3 v) { - return pow (v, vec3 (0.5)); +vec3 sqrt(const vec3 v) +{ + float r; + __asm float_rsq r, v.x; + __asm float_rcp __retVal.x, r; + __asm float_rsq r, v.y; + __asm float_rcp __retVal.y, r; + __asm float_rsq r, v.z; + __asm float_rcp __retVal.z, r; } -vec4 sqrt (vec4 v) { - return pow (v, vec4 (0.5)); +vec4 sqrt(const vec4 v) +{ + float r; + __asm float_rsq r, v.x; + __asm float_rcp __retVal.x, r; + __asm float_rsq r, v.y; + __asm float_rcp __retVal.y, r; + __asm float_rsq r, v.z; + __asm float_rcp __retVal.z, r; + __asm float_rsq r, v.w; + __asm float_rcp __retVal.w, r; } -float inversesqrt (float x) { - return 1.0 / sqrt (x); + +//// inversesqrt + +float inversesqrt(const float x) +{ + __asm float_rsq __retVal.x, x; } -vec2 inversesqrt (vec2 v) { - return vec2 (1.0) / sqrt (v); +vec2 inversesqrt(const vec2 v) +{ + __asm float_rsq __retVal.x, v.x; + __asm float_rsq __retVal.y, v.y; } -vec3 inversesqrt (vec3 v) { - return vec3 (1.0) / sqrt (v); +vec3 inversesqrt(const vec3 v) +{ + __asm float_rsq __retVal.x, v.x; + __asm float_rsq __retVal.y, v.y; + __asm float_rsq __retVal.z, v.z; } -vec4 inversesqrt (vec4 v) { - return vec4 (1.0) / sqrt (v); +vec4 inversesqrt(const vec4 v) +{ + __asm float_rsq __retVal.x, v.x; + __asm float_rsq __retVal.y, v.y; + __asm float_rsq __retVal.z, v.z; + __asm float_rsq __retVal.w, v.w; } + +//// normalize + +float normalize(const float x) +{ + __retVal.x = 1.0; +} + +vec2 normalize(const vec2 v) +{ + const float s = inversesqrt(dot(v, v)); + __asm vec4_multiply __retVal.xy, v, s.xx; +} + +vec3 normalize(const vec3 v) +{ + const float s = inversesqrt(dot(v, v)); + __asm vec4_multiply __retVal.xyz, v, s.xxx; +} + +vec4 normalize(const vec4 v) +{ + const float s = inversesqrt(dot(v, v)); + __asm vec4_multiply __retVal, v, s.xxxx; +} + + + // // 8.3 Common Functions // -float abs (float x) { - return x >= 0.0 ? x : -x; + +//// abs + +float abs(const float a) +{ + __asm vec4_abs __retVal.x, a; } -vec2 abs (vec2 v) { - return vec2 ( - abs (v.x), - abs (v.y) - ); +vec2 abs(const vec2 a) +{ + __asm vec4_abs __retVal.xy, a; } -vec3 abs (vec3 v) { - return vec3 ( - abs (v.x), - abs (v.y), - abs (v.z) - ); +vec3 abs(const vec3 a) +{ + __asm vec4_abs __retVal.xyz, a; } -vec4 abs (vec4 v) { - return vec4 ( - abs (v.x), - abs (v.y), - abs (v.z), - abs (v.w) - ); +vec4 abs(const vec4 a) +{ + __asm vec4_abs __retVal, a; } -float sign (float x) { - return x > 0.0 ? 1.0 : x < 0.0 ? -1.0 : 0.0; + +//// sign + +float sign(const float x) +{ + float p, n; + __asm vec4_sgt p.x, x, 0.0; // p = (x > 0) + __asm vec4_sgt n.x, 0.0, x; // n = (x < 0) + __asm vec4_subtract __retVal.x, p, n; // sign = p - n } -vec2 sign (vec2 v) { - return vec2 ( - sign (v.x), - sign (v.y) - ); +vec2 sign(const vec2 v) +{ + vec2 p, n; + __asm vec4_sgt p.xy, v, 0.0; + __asm vec4_sgt n.xy, 0.0, v; + __asm vec4_subtract __retVal.xy, p, n; } -vec3 sign (vec3 v) { - return vec3 ( - sign (v.x), - sign (v.y), - sign (v.z) - ); +vec3 sign(const vec3 v) +{ + vec3 p, n; + __asm vec4_sgt p.xyz, v, 0.0; + __asm vec4_sgt n.xyz, 0.0, v; + __asm vec4_subtract __retVal.xyz, p, n; } -vec4 sign (vec4 v) { - return vec4 ( - sign (v.x), - sign (v.y), - sign (v.z), - sign (v.w) - ); +vec4 sign(const vec4 v) +{ + vec4 p, n; + __asm vec4_sgt p, v, 0.0; + __asm vec4_sgt n, 0.0, v; + __asm vec4_subtract __retVal, p, n; } -float floor (float x) { - float y; - __asm float_floor y, x; - return y; + +//// floor + +float floor(const float a) +{ + __asm vec4_floor __retVal.x, a; } -vec2 floor (vec2 v) { - return vec2 ( - floor (v.x), - floor (v.y) - ); +vec2 floor(const vec2 a) +{ + __asm vec4_floor __retVal.xy, a; } -vec3 floor (vec3 v) { - return vec3 ( - floor (v.x), - floor (v.y), - floor (v.z) - ); +vec3 floor(const vec3 a) +{ + __asm vec4_floor __retVal.xyz, a; } -vec4 floor (vec4 v) { - return vec4 ( - floor (v.x), - floor (v.y), - floor (v.z), - floor (v.w) - ); +vec4 floor(const vec4 a) +{ + __asm vec4_floor __retVal, a; } -float ceil (float x) { - float y; - __asm float_ceil y, x; - return y; + +//// ceil + +float ceil(const float a) +{ + // XXX this could be improved + float b = -a; + __asm vec4_floor b, b; + __retVal.x = -b; } -vec2 ceil (vec2 v) { - return vec2 ( - ceil (v.x), - ceil (v.y) - ); +vec2 ceil(const vec2 a) +{ + vec2 b = -a; + __asm vec4_floor b, b; + __retVal.xy = -b; } -vec3 ceil (vec3 v) { - return vec3 ( - ceil (v.x), - ceil (v.y), - ceil (v.z) - ); +vec3 ceil(const vec3 a) +{ + vec3 b = -a; + __asm vec4_floor b, b; + __retVal.xyz = -b; } -vec4 ceil (vec4 v) { - return vec4 ( - ceil (v.x), - ceil (v.y), - ceil (v.z), - ceil (v.w) - ); +vec4 ceil(const vec4 a) +{ + vec4 b = -a; + __asm vec4_floor b, b; + __retVal = -b; } -float fract (float x) { - return x - floor (x); + +//// fract + +float fract(const float a) +{ + __asm vec4_frac __retVal.x, a; } -vec2 fract (vec2 v) { - return v - floor (v); +vec2 fract(const vec2 a) +{ + __asm vec4_frac __retVal.xy, a; } -vec3 fract (vec3 v) { - return v - floor (v); +vec3 fract(const vec3 a) +{ + __asm vec4_frac __retVal.xyz, a; } -vec4 fract (vec4 v) { - return v - floor (v); +vec4 fract(const vec4 a) +{ + __asm vec4_frac __retVal, a; } -float mod (float x, float y) { - return x - y * floor (x / y); + +//// mod (very untested!) + +float mod(const float a, const float b) +{ + float oneOverB; + __asm float_rcp oneOverB, b; + __retVal.x = a - b * floor(a * oneOverB); } -vec2 mod (vec2 v, float u) { - return v - u * floor (v / u); +vec2 mod(const vec2 a, const float b) +{ + float oneOverB; + __asm float_rcp oneOverB, b; + __retVal.xy = a - b * floor(a * oneOverB); } -vec3 mod (vec3 v, float u) { - return v - u * floor (v / u); +vec3 mod(const vec3 a, const float b) +{ + float oneOverB; + __asm float_rcp oneOverB, b; + __retVal.xyz = a - b * floor(a * oneOverB); } -vec4 mod (vec4 v, float u) { - return v - u * floor (v / u); +vec4 mod(const vec4 a, const float b) +{ + float oneOverB; + __asm float_rcp oneOverB, b; + __retVal = a - b * floor(a * oneOverB); } -vec2 mod (vec2 v, vec2 u) { - return v - u * floor (v / u); +vec2 mod(const vec2 a, const vec2 b) +{ + float oneOverBx, oneOverBy; + __asm float_rcp oneOverBx, b.x; + __asm float_rcp oneOverBy, b.y; + __retVal.x = a.x - b.x * floor(a.x * oneOverBx); + __retVal.y = a.y - b.y * floor(a.y * oneOverBy); } -vec3 mod (vec3 v, vec3 u) { - return v - u * floor (v / u); +vec3 mod(const vec3 a, const vec3 b) +{ + float oneOverBx, oneOverBy, oneOverBz; + __asm float_rcp oneOverBx, b.x; + __asm float_rcp oneOverBy, b.y; + __asm float_rcp oneOverBz, b.z; + __retVal.x = a.x - b.x * floor(a.x * oneOverBx); + __retVal.y = a.y - b.y * floor(a.y * oneOverBy); + __retVal.z = a.z - b.z * floor(a.z * oneOverBz); } -vec4 mod (vec4 v, vec4 u) { - return v - u * floor (v / u); +vec4 mod(const vec4 a, const vec4 b) +{ + float oneOverBx, oneOverBy, oneOverBz, oneOverBw; + __asm float_rcp oneOverBx, b.x; + __asm float_rcp oneOverBy, b.y; + __asm float_rcp oneOverBz, b.z; + __asm float_rcp oneOverBw, b.w; + __retVal.x = a.x - b.x * floor(a.x * oneOverBx); + __retVal.y = a.y - b.y * floor(a.y * oneOverBy); + __retVal.z = a.z - b.z * floor(a.z * oneOverBz); + __retVal.w = a.w - b.w * floor(a.w * oneOverBz); } -float min (float x, float y) { - return x < y ? x : y; + +//// min + +float min(const float a, const float b) +{ + __asm vec4_min __retVal.x, a.x, b.x; } -vec2 min (vec2 v, vec2 u) { - return vec2 ( - min (v.x, u.x), - min (v.y, u.y) - ); +vec2 min(const vec2 a, const vec2 b) +{ + __asm vec4_min __retVal.xy, a.xy, b.xy; } -vec3 min (vec3 v, vec3 u) { - return vec3 ( - min (v.x, u.x), - min (v.y, u.y), - min (v.z, u.z) - ); +vec3 min(const vec3 a, const vec3 b) +{ + __asm vec4_min __retVal.xyz, a.xyz, b.xyz; } -vec4 min (vec4 v, vec4 u) { - return vec4 ( - min (v.x, u.x), - min (v.y, u.y), - min (v.z, u.z), - min (v.w, u.w) - ); +vec4 min(const vec4 a, const vec4 b) +{ + __asm vec4_min __retVal, a, b; } -vec2 min (vec2 v, float y) { - return min (v, vec2 (y)); +vec2 min(const vec2 a, const float b) +{ + __asm vec4_min __retVal, a.xy, b.xx; } -vec3 min (vec3 v, float y) { - return min (v, vec3 (y)); +vec3 min(const vec3 a, const float b) +{ + __asm vec4_min __retVal, a.xyz, b.xxx; } -vec4 min (vec4 v, float y) { - return min (v, vec4 (y)); +vec4 min(const vec4 a, const float b) +{ + __asm vec4_min __retVal, a, b.xxxx; } -float max (float x, float y) { - return x < y ? y : x; + +//// max + +float max(const float a, const float b) +{ + __asm vec4_max __retVal.x, a.x, b.x; } -vec2 max (vec2 v, vec2 u) { - return vec2 ( - max (v.x, u.x), - max (v.y, u.y) - ); +vec2 max(const vec2 a, const vec2 b) +{ + __asm vec4_max __retVal.xy, a.xy, b.xy; } -vec3 max (vec3 v, vec3 u) { - return vec3 ( - max (v.x, u.x), - max (v.y, u.y), - max (v.z, u.z) - ); +vec3 max(const vec3 a, const vec3 b) +{ + __asm vec4_max __retVal.xyz, a.xyz, b.xyz; } -vec4 max (vec4 v, vec4 u) { - return vec4 ( - max (v.x, u.x), - max (v.y, u.y), - max (v.z, u.z), - max (v.w, u.w) - ); +vec4 max(const vec4 a, const vec4 b) +{ + __asm vec4_max __retVal, a, b; } -vec2 max (vec2 v, float y) { - return max (v, vec2 (y)); +vec2 max(const vec2 a, const float b) +{ + __asm vec4_max __retVal, a.xy, b.xx; } -vec3 max (vec3 v, float y) { - return max (v, vec3 (y)); +vec3 max(const vec3 a, const float b) +{ + __asm vec4_max __retVal, a.xyz, b.xxx; } -vec4 max (vec4 v, float y) { - return max (v, vec4 (y)); +vec4 max(const vec4 a, const float b) +{ + __asm vec4_max __retVal, a, b.xxxx; } -float clamp (float x, float minVal, float maxVal) { - return min (max (x, minVal), maxVal); + +//// clamp + +float clamp(const float val, const float minVal, const float maxVal) +{ + float t; + __asm vec4_max t, val, minVal; + __asm vec4_min __retVal.x, t, maxVal; } -vec2 clamp (vec2 x, float minVal, float maxVal) { - return min (max (x, minVal), maxVal); +vec2 clamp(const vec2 val, const float minVal, const float maxVal) +{ + vec2 t; + __asm vec4_max t.xy, val.xy, minVal.xx; + __asm vec4_min __retVal.xy, t.xy, maxVal.xx; } -vec3 clamp (vec3 x, float minVal, float maxVal) { - return min (max (x, minVal), maxVal); +vec3 clamp(const vec3 val, const float minVal, const float maxVal) +{ + vec3 t; + __asm vec4_max t.xyz, val.xyz, minVal.xxx; + __asm vec4_min __retVal.xyz, t.xyz, maxVal.xxx; } -vec4 clamp (vec4 x, float minVal, float maxVal) { - return min (max (x, minVal), maxVal); +vec4 clamp(const vec4 val, const float minVal, const float maxVal) +{ + vec4 t; + __asm vec4_max t, val, minVal.xxxx; + __asm vec4_min __retVal, t, maxVal.xxxx; } -vec2 clamp (vec2 x, vec2 minVal, vec2 maxVal) { - return min (max (x, minVal), maxVal); +vec2 clamp(const vec2 val, const vec2 minVal, const vec2 maxVal) +{ + vec2 t; + __asm vec4_max t.xy, val.xy, minVal.xy; + __asm vec4_min __retVal.xy, t.xy, maxVal.xxxx; } -vec3 clamp (vec3 x, vec3 minVal, vec3 maxVal) { - return min (max (x, minVal), maxVal); +vec3 clamp(const vec3 val, const vec3 minVal, const vec3 maxVal) +{ + vec3 t; + __asm vec4_max t.xyz, val.xyz, minVal.xyz; + __asm vec4_min __retVal.xyz, t.xyz, maxVal.xxxx; } -vec4 clamp (vec4 x, vec4 minVal, vec4 maxVal) { - return min (max (x, minVal), maxVal); +vec4 clamp(const vec4 val, const vec4 minVal, const vec4 maxVal) +{ + vec4 t; + __asm vec4_max t, val, minVal; + __asm vec4_min __retVal, t, maxVal; } -float mix (float x, float y, float a) { - return x * (1.0 - a) + y * a; + +//// mix + +float mix(const float x, const float y, const float a) +{ + const float d = y - x; + return x + d * a; // MAD } -vec2 mix (vec2 x, vec2 y, float a) { - return x * (1.0 - a) + y * a; +vec2 mix(const vec2 x, const vec2 y, const float a) +{ + const vec2 d = y - x; + return x + d * a; // MAD } -vec3 mix (vec3 x, vec3 y, float a) { - return x * (1.0 - a) + y * a; +vec3 mix(const vec3 x, const vec3 y, const float a) +{ + const vec3 d = y - x; + return x + d * a; // MAD } -vec4 mix (vec4 x, vec4 y, float a) { - return x * (1.0 - a) + y * a; +vec4 mix(const vec4 x, const vec4 y, const float a) +{ + const vec4 d = y - x; + return x + d * a; // MAD } -vec2 mix (vec2 x, vec2 y, vec2 a) { - return x * (1.0 - a) + y * a; +vec2 mix(const vec2 x, const vec2 y, const vec2 a) +{ + const vec2 d = y - x; + return x + d * a; // MAD } -vec3 mix (vec3 x, vec3 y, vec3 a) { - return x * (1.0 - a) + y * a; +vec3 mix(const vec3 x, const vec3 y, const vec3 a) +{ + const vec3 d = y - x; + return x + d * a; // MAD } -vec4 mix (vec4 x, vec4 y, vec4 a) { - return x * (1.0 - a) + y * a; +vec4 mix(const vec4 x, const vec4 y, const vec4 a) +{ + const vec4 d = y - x; + return x + d * a; // MAD } -float step (float edge, float x) { - return x < edge ? 0.0 : 1.0; + +//// step (untested) + +float step(const float edge, const float x) +{ + __asm vec4_sgt __retVal.x, x, edge; } -vec2 step (vec2 edge, vec2 v) { - return vec2 ( - step (edge.x, v.x), - step (edge.y, v.y) - ); +vec2 step(const vec2 edge, const vec2 x) +{ + __asm vec4_sgt __retVal.xy, x, edge; } -vec3 step (vec3 edge, vec3 v) { - return vec3 ( - step (edge.x, v.x), - step (edge.y, v.y), - step (edge.z, v.z) - ); +vec3 step(const vec3 edge, const vec3 x) +{ + __asm vec4_sgt __retVal.xyz, x, edge; } -vec4 step (vec4 edge, vec4 v) { - return vec4 ( - step (edge.x, v.x), - step (edge.y, v.y), - step (edge.z, v.z), - step (edge.w, v.w) - ); +vec4 step(const vec4 edge, const vec4 x) +{ + __asm vec4_sgt __retVal, x, edge; } -vec2 step (float edge, vec2 v) { - return step (vec2 (edge), v); +vec2 step(const float edge, const vec2 v) +{ + __asm vec4_sgt __retVal.xy, v, edge.xx; } -vec3 step (float edge, vec3 v) { - return step (vec3 (edge), v); +vec3 step(const float edge, const vec3 v) +{ + __asm vec4_sgt __retVal.xyz, v, edge.xxx; } -vec4 step (float edge, vec4 v) { - return step (vec4 (edge), v); +vec4 step(const float edge, const vec4 v) +{ + __asm vec4_sgt __retVal, v, edge.xxxx; } -float smoothstep (float edge0, float edge1, float x) { - float t = clamp ((x - edge0) / (edge1 - edge0), 0.0, 1.0); + +//// smoothstep (untested) + +float smoothstep(const float edge0, const float edge1, const float x) +{ + float t = clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0); return t * t * (3.0 - 2.0 * t); } -vec2 smoothstep (vec2 edge0, vec2 edge1, vec2 v) { - return vec2 ( - smoothstep (edge0.x, edge1.x, v.x), - smoothstep (edge0.y, edge1.y, v.y) - ); +vec2 smoothstep(const vec2 edge0, const vec2 edge1, const vec2 v) +{ + vec2 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } -vec3 smoothstep (vec3 edge0, vec3 edge1, vec3 v) { - return vec3 ( - smoothstep (edge0.x, edge1.x, v.x), - smoothstep (edge0.y, edge1.y, v.y), - smoothstep (edge0.z, edge1.z, v.z) - ); +vec3 smoothstep(const vec3 edge0, const vec3 edge1, const vec3 v) +{ + vec3 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } -vec4 smoothstep (vec4 edge0, vec4 edge1, vec4 v) { - return vec4 ( - smoothstep (edge0.x, edge1.x, v.x), - smoothstep (edge0.y, edge1.y, v.y), - smoothstep (edge0.z, edge1.z, v.z), - smoothstep (edge0.w, edge1.w, v.w) - ); +vec4 smoothstep(const vec4 edge0, const vec4 edge1, const vec4 v) +{ + vec4 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } -vec2 smoothstep (float edge0, float edge1, vec2 v) { - return vec2 ( - smoothstep (edge0, edge1, v.x), - smoothstep (edge0, edge1, v.y) - ); +vec2 smoothstep(const float edge0, const float edge1, const vec2 v) +{ + vec2 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } -vec3 smoothstep (float edge0, float edge1, vec3 v) { - return vec3 ( - smoothstep (edge0, edge1, v.x), - smoothstep (edge0, edge1, v.y), - smoothstep (edge0, edge1, v.z) - ); +vec3 smoothstep(const float edge0, const float edge1, const vec3 v) +{ + vec3 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } -vec4 smoothstep (float edge0, float edge1, vec4 v) { - return vec4 ( - smoothstep (edge0, edge1, v.x), - smoothstep (edge0, edge1, v.y), - smoothstep (edge0, edge1, v.z), - smoothstep (edge0, edge1, v.w) - ); +vec4 smoothstep(const float edge0, const float edge1, const vec4 v) +{ + vec4 t = clamp((v - edge0) / (edge1 - edge0), 0.0, 1.0); + return t * t * (3.0 - 2.0 * t); } + + // // 8.4 Geometric Functions // -float dot (float x, float y) { - return x * y; -} -float dot (vec2 v, vec2 u) { - return v.x * u.x + v.y * u.y; -} +//// length -float dot (vec3 v, vec3 u) { - return v.x * u.x + v.y * u.y + v.z * u.z; +float length(const float x) +{ + return abs(x); } -float dot (vec4 v, vec4 u) { - return v.x * u.x + v.y * u.y + v.z * u.z + v.w * u.w; +float length(const vec2 v) +{ + float r; + const float p = dot(v, v); // p = v.x * v.x + v.y * v.y + __asm float_rsq r, p; // r = 1 / sqrt(p) + __asm float_rcp __retVal.x, r; // retVal = 1 / r } -float length (float x) { - return sqrt (dot (x, x)); +float length(const vec3 v) +{ + float r; + const float p = dot(v, v); // p = v.x * v.x + v.y * v.y + v.z * v.z + __asm float_rsq r, p; // r = 1 / sqrt(p) + __asm float_rcp __retVal.x, r; // retVal = 1 / r } -float length (vec2 v) { - return sqrt (dot (v, v)); +float length(const vec4 v) +{ + float r; + const float p = dot(v, v); // p = v.x * v.x + v.y * v.y + ... + __asm float_rsq r, p; // r = 1 / sqrt(p) + __asm float_rcp __retVal.x, r; // retVal = 1 / r } -float length (vec3 v) { - return sqrt (dot (v, v)); -} -float length (vec4 v) { - return sqrt (dot (v, v)); -} +//// distance -float distance (float x, float y) { - return length (x - y); +float distance(const float x, const float y) +{ + const float d = x - y; + return length(d); } -float distance (vec2 v, vec2 u) { - return length (v - u); +float distance(const vec2 v, const vec2 u) +{ + const vec2 d = v - u; + return length(d); } -float distance (vec3 v, vec3 u) { - return length (v - u); +float distance(const vec3 v, const vec3 u) +{ + const vec3 d = v - u; + return length(d); } -float distance (vec4 v, vec4 u) { - return length (v - u); +float distance(const vec4 v, const vec4 u) +{ + const vec4 d = v - u; + return length(d); } -vec3 cross (vec3 v, vec3 u) { - return vec3 ( - v.y * u.z - u.y * v.z, - v.z * u.x - u.z * v.x, - v.x * u.y - u.x * v.y - ); -} -float normalize (float x) { - return 1.0; -} +//// cross -vec2 normalize (vec2 v) { - return v / length (v); +vec3 cross(const vec3 v, const vec3 u) +{ + __asm vec3_cross __retVal.xyz, v, u; } -vec3 normalize (vec3 v) { - return v / length (v); -} -vec4 normalize (vec4 v) { - return v / length (v); -} +//// faceforward -float faceforward (float N, float I, float Nref) { - return dot (Nref, I) < 0.0 ? N : -N; +float faceforward(const float N, const float I, const float Nref) +{ + // this could probably be done better + const float d = dot(Nref, I); + float s; + __asm vec4_sgt s.x, 0.0, d; // s = (0.0 > d) ? 1 : 0 + return mix(-N, N, s); } -vec2 faceforward (vec2 N, vec2 I, vec2 Nref) { - return dot (Nref, I) < 0.0 ? N : -N; +vec2 faceforward(const vec2 N, const vec2 I, const vec2 Nref) +{ + // this could probably be done better + const float d = dot(Nref, I); + float s; + __asm vec4_sgt s.x, 0.0, d; // s = (0.0 > d) ? 1 : 0 + return mix(-N, N, s); } -vec3 faceforward (vec3 N, vec3 I, vec3 Nref) { - return dot (Nref, I) < 0.0 ? N : -N; +vec3 faceforward(const vec3 N, const vec3 I, const vec3 Nref) +{ + // this could probably be done better + const float d = dot(Nref, I); + float s; + __asm vec4_sgt s.x, 0.0, d; // s = (0.0 > d) ? 1 : 0 + return mix(-N, N, s); } -vec4 faceforward (vec4 N, vec4 I, vec4 Nref) { - return dot (Nref, I) < 0.0 ? N : -N; +vec4 faceforward(const vec4 N, const vec4 I, const vec4 Nref) +{ + // this could probably be done better + const float d = dot(Nref, I); + float s; + __asm vec4_sgt s.x, 0.0, d; // s = (0.0 > d) ? 1 : 0 + return mix(-N, N, s); } -float reflect (float I, float N) { - return I - 2.0 * dot (N, I) * N; + +//// reflect + +float reflect(const float I, const float N) +{ + return I - 2.0 * dot(N, I) * N; } -vec2 reflect (vec2 I, vec2 N) { - return I - 2.0 * dot (N, I) * N; +vec2 reflect(const vec2 I, const vec2 N) +{ + return I - 2.0 * dot(N, I) * N; } -vec3 reflect (vec3 I, vec3 N) { - return I - 2.0 * dot (N, I) * N; +vec3 reflect(const vec3 I, const vec3 N) +{ + return I - 2.0 * dot(N, I) * N; } -vec4 reflect (vec4 I, vec4 N) { - return I - 2.0 * dot (N, I) * N; +vec4 reflect(const vec4 I, const vec4 N) +{ + return I - 2.0 * dot(N, I) * N; } -float refract (float I, float N, float eta) { - float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I)); - if (k < 0.0) - return 0.0; - return eta * I - (eta * dot (N, I) + sqrt (k)) * N; +//// refract + +float refract(const float I, const float N, const float eta) +{ + float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)); + if (k < 0.0) + return 0.0; + return eta * I - (eta * dot(N, I) + sqrt(k)) * N; } -vec2 refract (vec2 I, vec2 N, float eta) { - float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I)); - if (k < 0.0) - return 0.0; - return eta * I - (eta * dot (N, I) + sqrt (k)) * N; +vec2 refract(const vec2 I, const vec2 N, const float eta) +{ + float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)); + if (k < 0.0) + return 0.0; + return eta * I - (eta * dot(N, I) + sqrt(k)) * N; } -vec3 refract (vec3 I, vec3 N, float eta) { - float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I)); - if (k < 0.0) - return 0.0; - return eta * I - (eta * dot (N, I) + sqrt (k)) * N; +vec3 refract(const vec3 I, const vec3 N, const float eta) +{ + float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)); + if (k < 0.0) + return 0.0; + return eta * I - (eta * dot(N, I) + sqrt(k)) * N; } -vec4 refract (vec4 I, vec4 N, float eta) { - float k = 1.0 - eta * eta * (1.0 - dot (N, I) * dot (N, I)); - if (k < 0.0) - return 0.0; - return eta * I - (eta * dot (N, I) + sqrt (k)) * N; +vec4 refract(const vec4 I, const vec4 N, const float eta) +{ + float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)); + if (k < 0.0) + return 0.0; + return eta * I - (eta * dot(N, I) + sqrt(k)) * N; } + + + // // 8.5 Matrix Functions // @@ -1089,190 +1331,286 @@ mat4 matrixCompMult (mat4 m, mat4 n) { return mat4 (m[0] * n[0], m[1] * n[1], m[2] * n[2], m[3] * n[3]); } + + + // // 8.6 Vector Relational Functions // -bvec2 lessThan (vec2 v, vec2 u) { - return bvec2 (v.x < u.x, v.y < u.y); +//// lessThan + +bvec2 lessThan(const vec2 v, const vec2 u) +{ + __asm vec4_sgt __retVal.xy, u, v; } -bvec3 lessThan (vec3 v, vec3 u) { - return bvec3 (v.x < u.x, v.y < u.y, v.z < u.z); +bvec3 lessThan(const vec3 v, const vec3 u) +{ + __asm vec4_sgt __retVal.xyz, u, v; } -bvec4 lessThan (vec4 v, vec4 u) { - return bvec4 (v.x < u.x, v.y < u.y, v.z < u.z, v.w < u.w); +bvec4 lessThan(const vec4 v, const vec4 u) +{ + __asm vec4_sgt __retVal, u, v; } -bvec2 lessThan (ivec2 v, ivec2 u) { - return bvec2 (v.x < u.x, v.y < u.y); +bvec2 lessThan(const ivec2 v, const ivec2 u) +{ + __asm vec4_sgt __retVal.xy, u, v; } -bvec3 lessThan (ivec3 v, ivec3 u) { - return bvec3 (v.x < u.x, v.y < u.y, v.z < u.z); +bvec3 lessThan(const ivec3 v, const ivec3 u) +{ + __asm vec4_sgt __retVal.xyz, u, v; } -bvec4 lessThan (ivec4 v, ivec4 u) { - return bvec4 (v.x < u.x, v.y < u.y, v.z < u.z, v.w < u.w); +bvec4 lessThan(const ivec4 v, const ivec4 u) +{ + __asm vec4_sgt __retVal, u, v; } -bvec2 lessThanEqual (vec2 v, vec2 u) { - return bvec2 (v.x <= u.x, v.y <= u.y); + +//// lessThanEqual + +bvec2 lessThanEqual(const vec2 v, const vec2 u) +{ + __asm vec4_sge __retVal.xy, u, v; } -bvec3 lessThanEqual (vec3 v, vec3 u) { - return bvec3 (v.x <= u.x, v.y <= u.y, v.z <= u.z); +bvec3 lessThanEqual(const vec3 v, const vec3 u) +{ + __asm vec4_sge __retVal.xyz, u, v; } -bvec4 lessThanEqual (vec4 v, vec4 u) { - return bvec4 (v.x <= u.x, v.y <= u.y, v.z <= u.z, v.w <= u.w); +bvec4 lessThanEqual(const vec4 v, const vec4 u) +{ + __asm vec4_sge __retVal, u, v; } -bvec2 lessThanEqual (ivec2 v, ivec2 u) { - return bvec2 (v.x <= u.x, v.y <= u.y); +bvec2 lessThanEqual(const ivec2 v, const ivec2 u) +{ + __asm vec4_sge __retVal.xy, u, v; } -bvec3 lessThanEqual (ivec3 v, ivec3 u) { - return bvec3 (v.x <= u.x, v.y <= u.y, v.z <= u.z); +bvec3 lessThanEqual(const ivec3 v, const ivec3 u) +{ + __asm vec4_sge __retVal.xyz, u, v; } -bvec4 lessThanEqual (ivec4 v, ivec4 u) { - return bvec4 (v.x <= u.x, v.y <= u.y, v.z <= u.z, v.w <= u.w); +bvec4 lessThanEqual(const ivec4 v, const ivec4 u) +{ + __asm vec4_sge __retVal, u, v; } -bvec2 greaterThan (vec2 v, vec2 u) { - return bvec2 (v.x > u.x, v.y > u.y); + +//// greaterThan + +bvec2 greaterThan(const vec2 v, const vec2 u) +{ + __asm vec4_sgt __retVal.xy, v, u; } -bvec3 greaterThan (vec3 v, vec3 u) { - return bvec3 (v.x > u.x, v.y > u.y, v.z > u.z); +bvec3 greaterThan(const vec3 v, const vec3 u) +{ + __asm vec4_sgt __retVal.xyz, v, u; } -bvec4 greaterThan (vec4 v, vec4 u) { - return bvec4 (v.x > u.x, v.y > u.y, v.z > u.z, v.w > u.w); +bvec4 greaterThan(const vec4 v, const vec4 u) +{ + __asm vec4_sgt __retVal, v, u; } -bvec2 greaterThan (ivec2 v, ivec2 u) { - return bvec2 (v.x > u.x, v.y > u.y); +bvec2 greaterThan(const ivec2 v, const ivec2 u) +{ + __asm vec4_sgt __retVal.xy, v, u; } -bvec3 greaterThan (ivec3 v, ivec3 u) { - return bvec3 (v.x > u.x, v.y > u.y, v.z > u.z); +bvec3 greaterThan(const ivec3 v, const ivec3 u) +{ + __asm vec4_sgt __retVal.xyz, v, u; } -bvec4 greaterThan (ivec4 v, ivec4 u) { - return bvec4 (v.x > u.x, v.y > u.y, v.z > u.z, v.w > u.w); +bvec4 greaterThan(const ivec4 v, const ivec4 u) +{ + __asm vec4_sgt __retVal, v, u; } -bvec2 greaterThanEqual (vec2 v, vec2 u) { - return bvec2 (v.x >= u.x, v.y >= u.y); + +//// greaterThanEqual + +bvec2 greaterThanEqual(const vec2 v, const vec2 u) +{ + __asm vec4_sge __retVal.xy, v, u; } -bvec3 greaterThanEqual (vec3 v, vec3 u) { - return bvec3 (v.x >= u.x, v.y >= u.y, v.z >= u.z); +bvec3 greaterThanEqual(const vec3 v, const vec3 u) +{ + __asm vec4_sge __retVal.xyz, v, u; } -bvec4 greaterThanEqual (vec4 v, vec4 u) { - return bvec4 (v.x >= u.x, v.y >= u.y, v.z >= u.z, v.w >= u.w); +bvec4 greaterThanEqual(const vec4 v, const vec4 u) +{ + __asm vec4_sge __retVal, v, u; } -bvec2 greaterThanEqual (ivec2 v, ivec2 u) { - return bvec2 (v.x >= u.x, v.y >= u.y); +bvec2 greaterThanEqual(const ivec2 v, const ivec2 u) +{ + __asm vec4_sge __retVal.xy, v, u; } -bvec3 greaterThanEqual (ivec3 v, ivec3 u) { - return bvec3 (v.x >= u.x, v.y >= u.y, v.z >= u.z); +bvec3 greaterThanEqual(const ivec3 v, const ivec3 u) +{ + __asm vec4_sge __retVal.xyz, v, u; } -bvec4 greaterThanEqual (ivec4 v, ivec4 u) { - return bvec4 (v.x >= u.x, v.y >= u.y, v.z >= u.z, v.w >= u.w); +bvec4 greaterThanEqual(const ivec4 v, const ivec4 u) +{ + __asm vec4_sge __retVal, v, u; } -bvec2 equal (vec2 v, vec2 u) { - return bvec2 (v.x == u.x, v.y == u.y); + +//// equal + +bvec2 equal(const vec2 v, const vec2 u) +{ + __asm vec4_seq __retVal.xy, v, u; } -bvec3 equal (vec3 v, vec3 u) { - return bvec3 (v.x == u.x, v.y == u.y, v.z == u.z); +bvec3 equal(const vec3 v, const vec3 u) +{ + __asm vec4_seq __retVal.xyz, v, u; } -bvec4 equal (vec4 v, vec4 u) { - return bvec4 (v.x == u.x, v.y == u.y, v.z == u.z, v.w == u.w); +bvec4 equal(const vec4 v, const vec4 u) +{ + __asm vec4_seq __retVal, v, u; } -bvec2 equal (ivec2 v, ivec2 u) { - return bvec2 (v.x == u.x, v.y == u.y); +bvec2 equal(const ivec2 v, const ivec2 u) +{ + __asm vec4_seq __retVal.xy, v, u; } -bvec3 equal (ivec3 v, ivec3 u) { - return bvec3 (v.x == u.x, v.y == u.y, v.z == u.z); +bvec3 equal(const ivec3 v, const ivec3 u) +{ + __asm vec4_seq __retVal.xyz, v, u; } -bvec4 equal (ivec4 v, ivec4 u) { - return bvec4 (v.x == u.x, v.y == u.y, v.z == u.z, v.w == u.w); +bvec4 equal(const ivec4 v, const ivec4 u) +{ + __asm vec4_seq __retVal, v, u; } -bvec2 notEqual (vec2 v, vec2 u) { - return bvec2 (v.x != u.x, v.y != u.y); + +//// notEqual + +bvec2 notEqual(const vec2 v, const vec2 u) +{ + __asm vec4_sne __retVal.xy, v, u; } -bvec3 notEqual (vec3 v, vec3 u) { - return bvec3 (v.x != u.x, v.y != u.y, v.z != u.z); +bvec3 notEqual(const vec3 v, const vec3 u) +{ + __asm vec4_sne __retVal.xyz, v, u; } -bvec4 notEqual (vec4 v, vec4 u) { - return bvec4 (v.x != u.x, v.y != u.y, v.z != u.z, v.w != u.w); +bvec4 notEqual(const vec4 v, const vec4 u) +{ + __asm vec4_sne __retVal, v, u; } -bvec2 notEqual (ivec2 v, ivec2 u) { - return bvec2 (v.x != u.x, v.y != u.y); +bvec2 notEqual(const ivec2 v, const ivec2 u) +{ + __asm vec4_sne __retVal.xy, v, u; } -bvec3 notEqual (ivec3 v, ivec3 u) { - return bvec3 (v.x != u.x, v.y != u.y, v.z != u.z); +bvec3 notEqual(const ivec3 v, const ivec3 u) +{ + __asm vec4_sne __retVal.xyz, v, u; } -bvec4 notEqual (ivec4 v, ivec4 u) { - return bvec4 (v.x != u.x, v.y != u.y, v.z != u.z, v.w != u.w); +bvec4 notEqual(const ivec4 v, const ivec4 u) +{ + __asm vec4_sne __retVal, v, u; } -bool any (bvec2 v) { - return v.x || v.y; + +//// any + +bool any(const bvec2 v) +{ + float sum; + __asm vec4_add sum.x, v.x, v.y; + __asm vec4_sne __retVal.x, sum.x, 0.0; } -bool any (bvec3 v) { - return v.x || v.y || v.z; +bool any(const bvec3 v) +{ + float sum; + __asm vec4_add sum.x, v.x, v.y; + __asm vec4_add sum.x, sum.x, v.z; + __asm vec4_sne __retVal.x, sum.x, 0.0; } -bool any (bvec4 v) { - return v.x || v.y || v.z || v.w; +bool any(const bvec4 v) +{ + float sum; + __asm vec4_add sum.x, v.x, v.y; + __asm vec4_add sum.x, sum.x, v.z; + __asm vec4_add sum.x, sum.x, v.w; + __asm vec4_sne __retVal.x, sum.x, 0.0; } -bool all (bvec2 v) { + +//// all + +bool all (const vec2 v) +{ + float prod; + __asm vec4_multiply prod.x, v.x, v.y; + __asm vec4_sne __retVal.x, prod.x, 0.0; return v.x && v.y; } -bool all (bvec3 v) { - return v.x && v.y && v.z; +bool all (const bvec3 v) +{ + float prod; + __asm vec4_multiply prod.x, v.x, v.y; + __asm vec4_multiply prod.x, prod.x, v.z; + __asm vec4_sne __retVal.x, prod.x, 0.0; } -bool all (bvec4 v) { - return v.x && v.y && v.z && v.w; +bool all (const bvec4 v) +{ + float prod; + __asm vec4_multiply prod.x, v.x, v.y; + __asm vec4_multiply prod.x, prod.x, v.z; + __asm vec4_multiply prod.x, prod.x, v.w; + __asm vec4_sne __retVal.x, prod.x, 0.0; } -bvec2 not (bvec2 v) { - return bvec2 (!v.x, !v.y); + + +//// not + +bvec2 not (const bvec2 v) +{ + __asm vec4_seq __retVal.xy, v, 0.0; } -bvec3 not (bvec3 v) { - return bvec3 (!v.x, !v.y, !v.z); +bvec3 not (const bvec3 v) +{ + __asm vec4_seq __retVal.xyz, v, 0.0; } -bvec4 not (bvec4 v) { - return bvec4 (!v.x, !v.y, !v.z, !v.w); +bvec4 not (const bvec4 v) +{ + __asm vec4_seq __retVal, v, 0.0; } + + // // 8.7 Texture Lookup Functions // diff --git a/src/mesa/shader/slang/library/slang_core.gc b/src/mesa/shader/slang/library/slang_core.gc index 7a721a5a1d..8f1b0c2d3c 100755 --- a/src/mesa/shader/slang/library/slang_core.gc +++ b/src/mesa/shader/slang/library/slang_core.gc @@ -23,69 +23,93 @@ */ // -// This file defines nearly all constructors and operators for built-in data types, using -// extended language syntax. In general, compiler treats constructors and operators as -// ordinary functions with some exceptions. For example, the language does not allow -// functions to be called in constant expressions - here the exception is made to allow it. +// This file defines nearly all constructors and operators for built-in data +// types, using extended language syntax. In general, compiler treats +// constructors and operators as ordinary functions with some exceptions. +// For example, the language does not allow functions to be called in +// constant expressions - here the exception is made to allow it. // -// Each implementation provides its own version of this file. Each implementation can define -// the required set of operators and constructors in its own fashion. +// Each implementation provides its own version of this file. Each +// implementation can define the required set of operators and constructors +// in its own fashion. // -// The extended language syntax is only present when compiling this file. It is implicitly -// included at the very beginning of the compiled shader, so no built-in functions can be -// used. +// The extended language syntax is only present when compiling this file. +// It is implicitly included at the very beginning of the compiled shader, +// so no built-in functions can be used. // -// To communicate with the implementation, a special extended "__asm" keyword is used, followed -// by an instruction name (any valid identifier), a destination variable identifier and a -// a list of zero or more source variable identifiers. A variable identifier is a variable name -// declared earlier in the code (as a function parameter, local or global variable). -// An instruction name designates an instruction that must be exported by the implementation. -// Each instruction receives data from source variable identifiers and returns data in the -// destination variable identifier. +// To communicate with the implementation, a special extended "__asm" keyword +// is used, followed by an instruction name (any valid identifier), a +// destination variable identifier and a list of zero or more source +// variable identifiers. // -// It is up to the implementation how to define a particular operator or constructor. If it is -// expected to being used rarely, it can be defined in terms of other operators and constructors, +// A variable identifier is a variable name declared earlier in the code +// (as a function parameter, local or global variable). +// +// An instruction name designates an instruction that must be exported +// by the implementation. Each instruction receives data from source +// variable identifiers and returns data in the destination variable +// identifier. +// +// It is up to the implementation how to define a particular operator +// or constructor. If it is expected to being used rarely, it can be +// defined in terms of other operators and constructors, // for example: // // ivec2 __operator + (const ivec2 x, const ivec2 y) { // return ivec2 (x[0] + y[0], x[1] + y[1]); // } // -// If a particular operator or constructor is expected to be used very often or is an atomic -// operation (that is, an operation that cannot be expressed in terms of other operations or -// would create a dependency cycle) it must be defined using one or more __asm constructs. +// If a particular operator or constructor is expected to be used very +// often or is an atomic operation (that is, an operation that cannot be +// expressed in terms of other operations or would create a dependency +// cycle) it must be defined using one or more __asm constructs. // -// Each implementation must define constructors for all scalar types (bool, float, int). -// There are 9 scalar-to-scalar constructors (including identity constructors). However, -// since the language introduces special constructors (like matrix constructor with a single +// Each implementation must define constructors for all scalar types +// (bool, float, int). There are 9 scalar-to-scalar constructors +// (including identity constructors). However, since the language +// introduces special constructors (like matrix constructor with a single // scalar value), implementations must also implement these cases. // The compiler provides the following algorithm when resolving a constructor: // - try to find a constructor with a prototype matching ours, -// - if no constructor is found and this is a scalar-to-scalar constructor, raise an error, +// - if no constructor is found and this is a scalar-to-scalar constructor, +// raise an error, // - if a constructor is found, execute it and return, -// - count the size of the constructor parameter list - if it is less than the size of -// our constructor's type, raise an error, -// - for each parameter in the list do a recursive constructor matching for appropriate -// scalar fields in the constructed variable, +// - count the size of the constructor parameter list - if it is less than +// the size of our constructor's type, raise an error, +// - for each parameter in the list do a recursive constructor matching for +// appropriate scalar fields in the constructed variable, // -// Each implementation must also define a set of operators that deal with built-in data types. +// Each implementation must also define a set of operators that deal with +// built-in data types. // There are four kinds of operators: -// 1) Operators that are implemented only by the compiler: "()" (function call), "," (sequence) -// and "?:" (selection). -// 2) Operators that are implemented by the compiler by expressing it in terms of other operators: +// 1) Operators that are implemented only by the compiler: "()" (function +// call), "," (sequence) and "?:" (selection). +// 2) Operators that are implemented by the compiler by expressing it in +// terms of other operators: // - "." (field selection) - translated to subscript access, -// - "&&" (logical and) - translated to "<left_expr> ? <right_expr> : false", +// - "&&" (logical and) - translated to "<left_expr> ? <right_expr> : +// false", // - "||" (logical or) - translated to "<left_expr> ? true : <right_expr>", -// 3) Operators that can be defined by the implementation and if the required prototype is not -// found, standard behaviour is used: -// - "==", "!=", "=" (equality, assignment) - compare or assign matching fields one-by-one; -// note that at least operators for scalar data types must be defined by the implementation -// to get it work, -// 4) All other operators not mentioned above. If no required prototype is found, an error is -// raised. An implementation must follow the language specification to provide all valid -// operator prototypes. +// 3) Operators that can be defined by the implementation and if the required +// prototype is not found, standard behaviour is used: +// - "==", "!=", "=" (equality, assignment) - compare or assign +// matching fields one-by-one; +// note that at least operators for scalar data types must be defined +// by the implementation to get it work, +// 4) All other operators not mentioned above. If no required prototype is +// found, an error is raised. An implementation must follow the language +// specification to provide all valid operator prototypes. // +//bp: +vec4 vec4(const float a1, const float b1, const float c1, const float d1) +{ + __retVal.x = a1; + __retVal.y = b1; + __retVal.z = c1; + __retVal.w = d1; +} + int __constructor (const float f) { int i; __asm float_to_int i, f; @@ -154,6 +178,7 @@ vec3 __constructor (const bool b) { return vec3 (b ? 1.0 : 0.0); } +//bp: TODO replace with asm == f.xxxx vec4 __constructor (const float f) { return vec4 (f, f, f, f); } @@ -307,9 +332,11 @@ void __operator /= (inout float a, const float b) { } float __operator + (const float a, const float b) { - float c; - __asm float_add c, a, b; - return c; +// float c; +// __asm float_add c, a, b; +// return c; +//bp: + __asm float_add __retVal, a, b; } void __operator += (inout int a, const int b) { @@ -330,9 +357,11 @@ void __operator -= (inout int a, const int b) { } float __operator * (const float a, const float b) { - float c; - __asm float_multiply c, a, b; - return c; +// float c; +// __asm float_multiply c, a, b; +// return c; +//bp: + __asm float_multiply __retVal, a, b; } void __operator *= (inout int a, const int b) { @@ -340,9 +369,11 @@ void __operator *= (inout int a, const int b) { } float __operator / (const float a, const float b) { - float c; - __asm float_divide c, a, b; - return c; +// float c; +// __asm float_divide c, a, b; +// return c; +//bp: + __asm float_divide __retVal, a, b; } void __operator /= (inout int a, const int b) { @@ -535,12 +566,22 @@ void __operator -= (inout mat3 m, const mat3 n) { m[2] -= n[2]; } -vec3 __operator * (const mat3 m, const vec3 v) { - return vec3 ( - v.x * m[0].x + v.y * m[1].x + v.z * m[2].x, - v.x * m[0].y + v.y * m[1].y + v.z * m[2].y, - v.x * m[0].z + v.y * m[1].z + v.z * m[2].z - ); +//bp: +vec3 __operator * (const mat3 m, const vec3 v) +{ + vec3 r1, r2, r3; + r1.x = m[0].x; + r1.y = m[1].x; + r1.z = m[2].x; + r2.x = m[0].y; + r2.y = m[1].y; + r2.z = m[2].y; + r3.x = m[0].z; + r3.y = m[1].z; + r3.z = m[2].z; + __asm vec3_dot __retVal.x, r1, v; + __asm vec3_dot __retVal.y, r2, v; + __asm vec3_dot __retVal.z, r3, v; } mat3 __operator * (const mat3 m, const mat3 n) { @@ -571,13 +612,57 @@ void __operator -= (inout mat4 m, const mat4 n) { m[3] -= n[3]; } -vec4 __operator * (const mat4 m, const vec4 v) { - return vec4 ( - v.x * m[0].x + v.y * m[1].x + v.z * m[2].x + v.w * m[3].x, - v.x * m[0].y + v.y * m[1].y + v.z * m[2].y + v.w * m[3].y, - v.x * m[0].z + v.y * m[1].z + v.z * m[2].z + v.w * m[3].z, - v.x * m[0].w + v.y * m[1].w + v.z * m[2].w + v.w * m[3].w - ); + + + +//// dot (formerly in slang_common_builtin.gc) + +float dot(const float a, const float b) +{ + return a * b; +} + +float dot(const vec2 a, const vec2 b) +{ + return a.x * b.x + a.y * b.y; +} + +float dot(const vec3 a, const vec3 b) +{ + __asm vec3_dot __retVal, a, b; +} + +float dot(const vec4 a, const vec4 b) +{ + __asm vec4_dot __retVal, a, b; +} + + + + +vec4 __operator * (const mat4 m, const vec4 v) +{ + vec4 r1, r2, r3, r4; + r1.x = m[0].x; + r1.y = m[1].x; + r1.z = m[2].x; + r1.w = m[3].x; + r2.x = m[0].y; + r2.y = m[1].y; + r2.z = m[2].y; + r2.w = m[3].y; + r3.x = m[0].z; + r3.y = m[1].z; + r3.z = m[2].z; + r3.w = m[3].z; + r4.x = m[0].w; + r4.y = m[1].w; + r4.z = m[2].w; + r4.w = m[3].w; + __asm vec4_dot __retVal.x, r1, v; + __asm vec4_dot __retVal.y, r2, v; + __asm vec4_dot __retVal.z, r3, v; + __asm vec4_dot __retVal.w, r4, v; } mat4 __operator * (const mat4 m, const mat4 n) { @@ -768,6 +853,11 @@ vec4 __operator * (const vec4 v, const mat4 m) { v.x * m[1].x + v.y * m[1].y + v.z * m[1].z + v.w * m[1].w, v.x * m[2].x + v.y * m[2].y + v.z * m[2].z + v.w * m[2].w, v.x * m[3].x + v.y * m[3].y + v.z * m[3].z + v.w * m[3].w +//bp: +// dot(v, m[0]), +// dot(v, m[1]), +// dot(v, m[2]), +// dot(v, m[3]) ); } @@ -776,10 +866,12 @@ void __operator *= (inout vec4 v, const mat4 m) { } float __operator - (const float a, const float b) { - float c; - __asm float_negate c, b; - __asm float_add c, a, c; - return c; +// float c; +// __asm float_negate c, b; +// __asm float_add c, a, c; +// return c; +//bp: + __asm float_subtract __retVal, a, b; } int __operator + (const int a, const int b) { @@ -855,8 +947,10 @@ vec3 __operator / (const vec3 v, const vec3 u) { return vec3 (v.x / u.x, v.y / u.y, v.z / u.z); } -vec4 __operator + (const vec4 v, const vec4 u) { - return vec4 (v.x + u.x, v.y + u.y, v.z + u.z, v.w + u.w); +vec4 __operator + (const vec4 vadd, const vec4 uadd) { +// return vec4 (v.x + u.x, v.y + u.y, v.z + u.z, v.w + u.w); +//bp: + __asm vec4_add __retVal, vadd, uadd; } vec4 __operator - (const vec4 v, const vec4 u) { @@ -864,7 +958,10 @@ vec4 __operator - (const vec4 v, const vec4 u) { } vec4 __operator * (const vec4 v, const vec4 u) { - return vec4 (v.x * u.x, v.y * u.y, v.z * u.z, v.w * u.w); +// return vec4 (v.x * u.x, v.y * u.y, v.z * u.z, v.w * u.w); +// return v; +//bp: + __asm vec4_multiply __retVal, v, u; } vec4 __operator / (const vec4 v, const vec4 u) { @@ -1007,8 +1104,10 @@ vec3 __operator * (const float a, const vec3 u) { return vec3 (a * u.x, a * u.y, a * u.z); } -vec3 __operator * (const vec3 v, const float b) { - return vec3 (v.x * b, v.y * b, v.z * b); +//bp: +vec3 __operator * (const vec3 v, const float b) +{ + __retVal.xyz = v.xyz * b.xxx; } vec3 __operator / (const float a, const vec3 u) { @@ -1039,8 +1138,10 @@ vec4 __operator * (const float a, const vec4 u) { return vec4 (a * u.x, a * u.y, a * u.z, a * u.w); } -vec4 __operator * (const vec4 v, const float b) { - return vec4 (v.x * b, v.y * b, v.z * b, v.w * b); +//bp: +vec4 __operator * (const vec4 v, const float b) +{ + __asm vec4_multiply __retVal.xyzw, v.xyzw, b.xxxx; } vec4 __operator / (const float a, const vec4 u) { |