diff options
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r-- | src/gallium/auxiliary/tgsi/Makefile | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 1036 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_build.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump.c | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_exec.c | 326 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_info.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_iterate.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_parse.c | 17 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sanity.c | 32 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_scan.c | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_sse2.c | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_text.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_transform.c | 2 | ||||
-rw-r--r-- | src/gallium/auxiliary/tgsi/tgsi_util.c | 2 |
16 files changed, 1312 insertions, 140 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index d7df9490cf..b4900e8dba 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -19,6 +19,3 @@ C_SOURCES = \ tgsi_util.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt new file mode 100644 index 0000000000..b83abd4093 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -0,0 +1,1036 @@ +TGSI Instruction Specification +============================== +============================== + + +1 Instruction Set Operations +============================= + + +1.1 GL_NV_vertex_program +------------------------- + + +1.1.1 ARL - Address Register Load + + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + dst.w = floor(src.w) + + +1.1.2 MOV - Move + + dst.x = src.x + dst.y = src.y + dst.z = src.z + dst.w = src.w + + +1.1.3 LIT - Light Coefficients + + dst.x = 1.0 + dst.y = max(src.x, 0.0) + dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0 + dst.w = 1.0 + + +1.1.4 RCP - Reciprocal + + dst.x = 1.0 / src.x + dst.y = 1.0 / src.x + dst.z = 1.0 / src.x + dst.w = 1.0 / src.x + + +1.1.5 RSQ - Reciprocal Square Root + + dst.x = 1.0 / sqrt(abs(src.x)) + dst.y = 1.0 / sqrt(abs(src.x)) + dst.z = 1.0 / sqrt(abs(src.x)) + dst.w = 1.0 / sqrt(abs(src.x)) + + +1.1.6 EXP - Approximate Exponential Base 2 + + dst.x = pow(2.0, floor(src.x)) + dst.y = src.x - floor(src.x) + dst.z = pow(2.0, src.x) + dst.w = 1.0 + + +1.1.7 LOG - Approximate Logarithm Base 2 + + dst.x = floor(lg2(abs(src.x))) + dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x)))) + dst.z = lg2(abs(src.x)) + dst.w = 1.0 + + +1.1.8 MUL - Multiply + + dst.x = src0.x * src1.x + dst.y = src0.y * src1.y + dst.z = src0.z * src1.z + dst.w = src0.w * src1.w + + +1.1.9 ADD - Add + + dst.x = src0.x + src1.x + dst.y = src0.y + src1.y + dst.z = src0.z + src1.z + dst.w = src0.w + src1.w + + +1.1.10 DP3 - 3-component Dot Product + + dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + + +1.1.11 DP4 - 4-component Dot Product + + dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w + dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w + dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w + dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w + + +1.1.12 DST - Distance Vector + + dst.x = 1.0 + dst.y = src0.y * src1.y + dst.z = src0.z + dst.w = src1.w + + +1.1.13 MIN - Minimum + + dst.x = min(src0.x, src1.x) + dst.y = min(src0.y, src1.y) + dst.z = min(src0.z, src1.z) + dst.w = min(src0.w, src1.w) + + +1.1.14 MAX - Maximum + + dst.x = max(src0.x, src1.x) + dst.y = max(src0.y, src1.y) + dst.z = max(src0.z, src1.z) + dst.w = max(src0.w, src1.w) + + +1.1.15 SLT - Set On Less Than + + dst.x = (src0.x < src1.x) ? 1.0 : 0.0 + dst.y = (src0.y < src1.y) ? 1.0 : 0.0 + dst.z = (src0.z < src1.z) ? 1.0 : 0.0 + dst.w = (src0.w < src1.w) ? 1.0 : 0.0 + + +1.1.16 SGE - Set On Greater Equal Than + + dst.x = (src0.x >= src1.x) ? 1.0 : 0.0 + dst.y = (src0.y >= src1.y) ? 1.0 : 0.0 + dst.z = (src0.z >= src1.z) ? 1.0 : 0.0 + dst.w = (src0.w >= src1.w) ? 1.0 : 0.0 + + +1.1.17 MAD - Multiply And Add + + dst.x = src0.x * src1.x + src2.x + dst.y = src0.y * src1.y + src2.y + dst.z = src0.z * src1.z + src2.z + dst.w = src0.w * src1.w + src2.w + + +1.2 GL_ATI_fragment_shader +--------------------------- + + +1.2.1 SUB - Subtract + + dst.x = src0.x - src1.x + dst.y = src0.y - src1.y + dst.z = src0.z - src1.z + dst.w = src0.w - src1.w + + +1.2.2 DOT3 - 3-component Dot Product + + Alias for DP3. + + +1.2.3 DOT4 - 4-component Dot Product + + Alias for DP4. + + +1.2.4 LERP - Linear Interpolate + + dst.x = src0.x * (src1.x - src2.x) + src2.x + dst.y = src0.y * (src1.y - src2.y) + src2.y + dst.z = src0.z * (src1.z - src2.z) + src2.z + dst.w = src0.w * (src1.w - src2.w) + src2.w + + +1.2.5 CND - Condition + + dst.x = (src2.x > 0.5) ? src0.x : src1.x + dst.y = (src2.y > 0.5) ? src0.y : src1.y + dst.z = (src2.z > 0.5) ? src0.z : src1.z + dst.w = (src2.w > 0.5) ? src0.w : src1.w + + +1.2.6 CND0 - Condition Zero + + dst.x = (src2.x >= 0.0) ? src0.x : src1.x + dst.y = (src2.y >= 0.0) ? src0.y : src1.y + dst.z = (src2.z >= 0.0) ? src0.z : src1.z + dst.w = (src2.w >= 0.0) ? src0.w : src1.w + + +1.2.7 DOT2ADD - 2-component Dot Product And Add + + dst.x = src0.x * src1.x + src0.y * src1.y + src2.x + dst.y = src0.x * src1.x + src0.y * src1.y + src2.x + dst.z = src0.x * src1.x + src0.y * src1.y + src2.x + dst.w = src0.x * src1.x + src0.y * src1.y + src2.x + + +1.3 GL_EXT_vertex_shader +------------------------- + + +1.3.1 INDEX - Array Lookup + + Considered for removal from language. + + +1.3.2 NEGATE - Negate + + Considered for removal from language. + + +1.3.3 MADD - Multiply And Add + + Alias for MAD. + + +1.3.4 FRAC - Fraction + + dst.x = src.x - floor(src.x) + dst.y = src.y - floor(src.y) + dst.z = src.z - floor(src.z) + dst.w = src.w - floor(src.w) + + +1.3.5 SETGE - Set On Greater Equal + + Alias for SGE. + + +1.3.6 SETLT - Set On Less Than + + Alias for SLT. + + +1.3.7 CLAMP - Clamp + + dst.x = clamp(src0.x, src1.x, src2.x) + dst.y = clamp(src0.y, src1.y, src2.y) + dst.z = clamp(src0.z, src1.z, src2.z) + dst.w = clamp(src0.w, src1.w, src2.w) + + +1.3.8 FLOOR - Floor + + dst.x = floor(src.x) + dst.y = floor(src.y) + dst.z = floor(src.z) + dst.w = floor(src.w) + + +1.3.9 ROUND - Round + + dst.x = round(src.x) + dst.y = round(src.y) + dst.z = round(src.z) + dst.w = round(src.w) + + +1.3.10 EXPBASE2 - Exponent Base 2 + + dst.x = pow(2.0, src.x) + dst.y = pow(2.0, src.x) + dst.z = pow(2.0, src.x) + dst.w = pow(2.0, src.x) + + +1.3.11 LOGBASE2 - Logarithm Base 2 + + dst.x = lg2(src.x) + dst.y = lg2(src.x) + dst.z = lg2(src.x) + dst.w = lg2(src.x) + + +1.3.12 POWER - Power + + dst.x = pow(src0.x, src1.x) + dst.y = pow(src0.x, src1.x) + dst.z = pow(src0.x, src1.x) + dst.w = pow(src0.x, src1.x) + + +1.3.13 RECIP - Reciprocal + + Alias for RCP. + + +1.3.14 RECIPSQRT - Reciprocal Square Root + + Alias for RSQ. + + +1.3.15 CROSSPRODUCT - Cross Product + + dst.x = src0.y * src1.z - src1.y * src0.z + dst.y = src0.z * src1.x - src1.z * src0.x + dst.z = src0.x * src1.y - src1.x * src0.y + dst.w = 1.0 + + +1.3.16 MULTIPLYMATRIX - Multiply Matrix + + Considered for removal from language. + + +1.4 GL_NV_vertex_program1_1 +---------------------------- + + +1.4.1 ABS - Absolute + + dst.x = abs(src.x) + dst.y = abs(src.y) + dst.z = abs(src.z) + dst.w = abs(src.w) + + +1.4.2 RCC - Reciprocal Clamped + + dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) + dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) + dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) + dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) + + +1.4.3 DPH - Homogeneous Dot Product + + dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w + dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w + dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w + dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w + + +1.5 GL_NV_fragment_program +--------------------------- + + +1.5.1 COS - Cosine + + dst.x = cos(src.x) + dst.y = cos(src.x) + dst.z = cos(src.x) + dst.w = cos(src.w) + + +1.5.2 DDX - Derivative Relative To X + + dst.x = partialx(src.x) + dst.y = partialx(src.y) + dst.z = partialx(src.z) + dst.w = partialx(src.w) + + +1.5.3 DDY - Derivative Relative To Y + + dst.x = partialy(src.x) + dst.y = partialy(src.y) + dst.z = partialy(src.z) + dst.w = partialy(src.w) + + +1.5.4 EX2 - Exponential Base 2 + + Alias for EXPBASE2. + + +1.5.5 FLR - Floor + + Alias for FLOOR. + + +1.5.6 FRC - Fraction + + Alias for FRAC. + + +1.5.7 KILP - Predicated Discard + + TBD + + +1.5.8 LG2 - Logarithm Base 2 + + Alias for LOGBASE2. + + +1.5.9 LRP - Linear Interpolate + + Alias for LERP. + + +1.5.10 PK2H - Pack Two 16-bit Floats + + TBD + + +1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars + + TBD + + +1.5.12 PK4B - Pack Four Signed 8-bit Scalars + + TBD + + +1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars + + TBD + + +1.5.14 POW - Power + + Alias for POWER. + + +1.5.15 RFL - Reflection Vector + + dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x + dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y + dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z + dst.w = 1.0 + + +1.5.16 SEQ - Set On Equal + + dst.x = (src0.x == src1.x) ? 1.0 : 0.0 + dst.y = (src0.y == src1.y) ? 1.0 : 0.0 + dst.z = (src0.z == src1.z) ? 1.0 : 0.0 + dst.w = (src0.w == src1.w) ? 1.0 : 0.0 + + +1.5.17 SFL - Set On False + + dst.x = 0.0 + dst.y = 0.0 + dst.z = 0.0 + dst.w = 0.0 + + +1.5.18 SGT - Set On Greater Than + + dst.x = (src0.x > src1.x) ? 1.0 : 0.0 + dst.y = (src0.y > src1.y) ? 1.0 : 0.0 + dst.z = (src0.z > src1.z) ? 1.0 : 0.0 + dst.w = (src0.w > src1.w) ? 1.0 : 0.0 + + +1.5.19 SIN - Sine + + dst.x = sin(src.x) + dst.y = sin(src.x) + dst.z = sin(src.x) + dst.w = sin(src.w) + + +1.5.20 SLE - Set On Less Equal Than + + dst.x = (src0.x <= src1.x) ? 1.0 : 0.0 + dst.y = (src0.y <= src1.y) ? 1.0 : 0.0 + dst.z = (src0.z <= src1.z) ? 1.0 : 0.0 + dst.w = (src0.w <= src1.w) ? 1.0 : 0.0 + + +1.5.21 SNE - Set On Not Equal + + dst.x = (src0.x != src1.x) ? 1.0 : 0.0 + dst.y = (src0.y != src1.y) ? 1.0 : 0.0 + dst.z = (src0.z != src1.z) ? 1.0 : 0.0 + dst.w = (src0.w != src1.w) ? 1.0 : 0.0 + + +1.5.22 STR - Set On True + + dst.x = 1.0 + dst.y = 1.0 + dst.z = 1.0 + dst.w = 1.0 + + +1.5.23 TEX - Texture Lookup + + TBD + + +1.5.24 TXD - Texture Lookup with Derivatives + + TBD + + +1.5.25 TXP - Projective Texture Lookup + + TBD + + +1.5.26 UP2H - Unpack Two 16-Bit Floats + + TBD + + +1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars + + TBD + + +1.5.28 UP4B - Unpack Four Signed 8-Bit Values + + TBD + + +1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars + + TBD + + +1.5.30 X2D - 2D Coordinate Transformation + + dst.x = src0.x + src1.x * src2.x + src1.y * src2.y + dst.y = src0.y + src1.x * src2.z + src1.y * src2.w + dst.z = src0.x + src1.x * src2.x + src1.y * src2.y + dst.w = src0.y + src1.x * src2.z + src1.y * src2.w + + +1.6 GL_NV_vertex_program2 +-------------------------- + + +1.6.1 ARA - Address Register Add + + TBD + + +1.6.2 ARR - Address Register Load With Round + + dst.x = round(src.x) + dst.y = round(src.y) + dst.z = round(src.z) + dst.w = round(src.w) + + +1.6.3 BRA - Branch + + TBD + + +1.6.4 CAL - Subroutine Call + + push(pc) + pc = target + + +1.6.5 RET - Subroutine Call Return + + pc = pop() + + +1.6.6 SSG - Set Sign + + dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 + dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 + dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 + dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 + + +1.7 GL_ARB_vertex_program +-------------------------- + + +1.7.1 SWZ - Extended Swizzle + + dst.x = src.x + dst.y = src.y + dst.z = src.z + dst.w = src.w + + +1.7.2 XPD - Cross Product + + Alias for CROSSPRODUCT. + + +1.8 GL_ARB_fragment_program +---------------------------- + + +1.8.1 CMP - Compare + + dst.x = (src0.x < 0.0) ? src1.x : src2.x + dst.y = (src0.y < 0.0) ? src1.y : src2.y + dst.z = (src0.z < 0.0) ? src1.z : src2.z + dst.w = (src0.w < 0.0) ? src1.w : src2.w + + +1.8.2 KIL - Conditional Discard + + TBD + + +1.8.3 SCS - Sine Cosine + + dst.x = cos(src.x) + dst.y = sin(src.x) + dst.z = 0.0 + dst.y = 1.0 + + +1.8.4 TXB - Texture Lookup With Bias + + TBD + + +1.9 GL_NV_fragment_program2 +---------------------------- + + +1.9.1 NRM - 3-component Vector Normalise + + dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z) + dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z) + dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z) + dst.w = 1.0 + + +1.9.2 DIV - Divide + + dst.x = src0.x / src1.x + dst.y = src0.y / src1.y + dst.z = src0.z / src1.z + dst.w = src0.w / src1.w + + +1.9.3 DP2 - 2-component Dot Product + + dst.x = src0.x * src1.x + src0.y * src1.y + dst.y = src0.x * src1.x + src0.y * src1.y + dst.z = src0.x * src1.x + src0.y * src1.y + dst.w = src0.x * src1.x + src0.y * src1.y + + +1.9.4 DP2A - 2-component Dot Product And Add + + Alias for DOT2ADD. + + +1.9.5 TXL - Texture Lookup With LOD + + TBD + + +1.9.6 BRK - Break + + TBD + + +1.9.7 IF - If + + TBD + + +1.9.8 LOOP - Loop + + TBD + + +1.9.9 REP - Repeat + + TBD + + +1.9.10 ELSE - Else + + TBD + + +1.9.11 ENDIF - End If + + TBD + + +1.9.12 ENDLOOP - End Loop + + TBD + + +1.9.13 ENDREP - End Repeat + + TBD + + +1.10 GL_NV_vertex_program3 +--------------------------- + + +1.10.1 PUSHA - Push Address Register On Stack + + push(src.x) + push(src.y) + push(src.z) + push(src.w) + + +1.10.2 POPA - Pop Address Register From Stack + + dst.w = pop() + dst.z = pop() + dst.y = pop() + dst.x = pop() + + +1.11 GL_NV_gpu_program4 +------------------------ + + +1.11.1 CEIL - Ceiling + + dst.x = ceil(src.x) + dst.y = ceil(src.y) + dst.z = ceil(src.z) + dst.w = ceil(src.w) + + +1.11.2 I2F - Integer To Float + + dst.x = (float) src.x + dst.y = (float) src.y + dst.z = (float) src.z + dst.w = (float) src.w + + +1.11.3 NOT - Bitwise Not + + dst.x = ~src.x + dst.y = ~src.y + dst.z = ~src.z + dst.w = ~src.w + + +1.11.4 TRUNC - Truncate + + dst.x = trunc(src.x) + dst.y = trunc(src.y) + dst.z = trunc(src.z) + dst.w = trunc(src.w) + + +1.11.5 SHL - Shift Left + + dst.x = src0.x << src1.x + dst.y = src0.y << src1.x + dst.z = src0.z << src1.x + dst.w = src0.w << src1.x + + +1.11.6 SHR - Shift Right + + dst.x = src0.x >> src1.x + dst.y = src0.y >> src1.x + dst.z = src0.z >> src1.x + dst.w = src0.w >> src1.x + + +1.11.7 AND - Bitwise And + + dst.x = src0.x & src1.x + dst.y = src0.y & src1.y + dst.z = src0.z & src1.z + dst.w = src0.w & src1.w + + +1.11.8 OR - Bitwise Or + + dst.x = src0.x | src1.x + dst.y = src0.y | src1.y + dst.z = src0.z | src1.z + dst.w = src0.w | src1.w + + +1.11.9 MOD - Modulus + + dst.x = src0.x % src1.x + dst.y = src0.y % src1.y + dst.z = src0.z % src1.z + dst.w = src0.w % src1.w + + +1.11.10 XOR - Bitwise Xor + + dst.x = src0.x ^ src1.x + dst.y = src0.y ^ src1.y + dst.z = src0.z ^ src1.z + dst.w = src0.w ^ src1.w + + +1.11.11 SAD - Sum Of Absolute Differences + + dst.x = abs(src0.x - src1.x) + src2.x + dst.y = abs(src0.y - src1.y) + src2.y + dst.z = abs(src0.z - src1.z) + src2.z + dst.w = abs(src0.w - src1.w) + src2.w + + +1.11.12 TXF - Texel Fetch + + TBD + + +1.11.13 TXQ - Texture Size Query + + TBD + + +1.11.14 CONT - Continue + + TBD + + +1.12 GL_NV_geometry_program4 +----------------------------- + + +1.12.1 EMIT - Emit + + TBD + + +1.12.2 ENDPRIM - End Primitive + + TBD + + +1.13 GLSL +---------- + + +1.13.1 BGNLOOP2 - Begin Loop + + TBD + + +1.13.2 BGNSUB - Begin Subroutine + + TBD + + +1.13.3 ENDLOOP2 - End Loop + + TBD + + +1.13.4 ENDSUB - End Subroutine + + TBD + + +1.13.5 INT - Truncate + + Alias for TRUNC. + + +1.13.6 NOISE1 - 1D Noise + + TBD + + +1.13.7 NOISE2 - 2D Noise + + TBD + + +1.13.8 NOISE3 - 3D Noise + + TBD + + +1.13.9 NOISE4 - 4D Noise + + TBD + + +1.13.10 NOP - No Operation + + Do nothing. + + +1.14 ps_1_1 +------------ + + +1.14.1 TEXKILL - Conditional Discard + + Alias for KIL. + + +1.15 ps_1_4 +------------ + + +1.15.1 TEXLD - Texture Lookup + + Alias for TEX. + + +1.16 ps_2_0 +------------ + + +1.16.1 M4X4 - Multiply Matrix + + Alias for MULTIPLYMATRIX. + + +1.16.2 M4X3 - Multiply Matrix + + Considered for removal from language. + + +1.16.3 M3X4 - Multiply Matrix + + Considered for removal from language. + + +1.16.4 M3X3 - Multiply Matrix + + Considered for removal from language. + + +1.16.5 M3X2 - Multiply Matrix + + Considered for removal from language. + + +1.16.6 CRS - Cross Product + + Alias for XPD. + + +1.16.7 NRM4 - 4-component Vector Normalise + + dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w) + dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w) + dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w) + dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w) + + +1.16.8 SINCOS - Sine Cosine + + Alias for SCS. + + +1.16.9 TEXLDB - Texture Lookup With Bias + + Alias for TXB. + + +1.16.10 DP2ADD - 2-component Dot Product And Add + + Alias for DP2A. + + +1.17 ps_2_x +------------ + + +1.17.1 CALL - Subroutine Call + + Alias for CAL. + + +1.17.2 CALLNZ - Subroutine Call If Not Zero + + TBD + + +1.17.3 IFC - If + + TBD + + +1.17.4 BREAK - Break + + Alias for BRK. + + +1.17.5 BREAKC - Break Conditional + + TBD + + +1.17.6 DSX - Derivative Relative To X + + Alias for DDX. + + +1.17.7 DSY - Derivative Relative To Y + + Alias for DDY. + + +1.17.8 TEXLDD - Texture Lookup with Derivatives + + Alias for TXD. + + +1.18 vs_1_1 +------------ + + +1.18.1 EXPP - Approximate Exponential Base 2 + + Alias for EXP. + + +1.18.2 LOGP - Logarithm Base 2 + + Alias for LG2. + + +1.19 vs_2_0 +------------ + + +1.19.1 SGN - Set Sign + + Alias for SSG. + + +1.19.2 MOVA - Move Address Register + + Alias for ARR. + diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 17886540cf..a1891a140a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index ab2b1f2c58..a784b7cc3c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_info.h" @@ -245,8 +245,12 @@ iter_declaration( } } - TXT( ", " ); - ENM( decl->Declaration.Interpolate, interpolate_names ); + if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && + decl->Declaration.File == TGSI_FILE_INPUT) + { + TXT( ", " ); + ENM( decl->Declaration.Interpolate, interpolate_names ); + } if (decl->Declaration.Centroid) { TXT( ", CENTROID" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 2ecf1e2f14..3dc61c48ca 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ab641efb60..ba807e498f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -486,6 +486,32 @@ micro_f2ut( #endif static void +micro_float_clamp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + uint i; + + for (i = 0; i < 4; i++) { + if (src->f[i] > 0.0f) { + if (src->f[i] > 1.884467e+019f) + dst->f[i] = 1.884467e+019f; + else if (src->f[i] < 5.42101e-020f) + dst->f[i] = 5.42101e-020f; + else + dst->f[i] = src->f[i]; + } + else { + if (src->f[i] < -1.884467e+019f) + dst->f[i] = -1.884467e+019f; + else if (src->f[i] > -5.42101e-020f) + dst->f[i] = -5.42101e-020f; + else + dst->f[i] = src->f[i]; + } + } +} + +static void micro_flr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) @@ -508,20 +534,6 @@ micro_frc( } static void -micro_ge( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void micro_i2f( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) @@ -1614,6 +1626,7 @@ exec_tex(struct tgsi_exec_machine *mach, switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: FETCH(&r[0], 0, CHAN_X); @@ -1636,6 +1649,8 @@ exec_tex(struct tgsi_exec_machine *mach, case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); @@ -1826,12 +1841,14 @@ exec_instruction( int *pc ) { uint chan_index; - union tgsi_exec_channel r[8]; + union tgsi_exec_channel r[10]; (*pc)++; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: + /* TGSI_OPCODE_FLOOR */ + /* TGSI_OPCODE_FLR */ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_flr( &r[0], &r[0] ); @@ -1849,31 +1866,31 @@ exec_instruction( case TGSI_OPCODE_LIT: if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); - } + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - FETCH( &r[2], 0, CHAN_W ); - micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); - micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); - micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); - } + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; @@ -1882,17 +1899,18 @@ exec_instruction( FETCH( &r[0], 0, CHAN_X ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; @@ -2005,30 +2023,30 @@ exec_instruction( micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_DST: if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); } break; @@ -2071,7 +2089,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; @@ -2115,15 +2133,27 @@ exec_instruction( break; case TGSI_OPCODE_CND: - assert (0); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); + STORE(&r[0], 0, chan_index); + } break; case TGSI_OPCODE_CND0: - assert (0); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]); + STORE(&r[0], 0, chan_index); + } break; case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ + /* TGSI_OPCODE_DP2A */ FETCH( &r[0], 0, CHAN_X ); FETCH( &r[1], 1, CHAN_X ); micro_mul( &r[0], &r[0], &r[1] ); @@ -2142,10 +2172,12 @@ exec_instruction( break; case TGSI_OPCODE_INDEX: + /* XXX: considered for removal */ assert (0); break; case TGSI_OPCODE_NEGATE: + /* XXX: considered for removal */ assert (0); break; @@ -2159,15 +2191,13 @@ exec_instruction( break; case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + micro_max(&r[0], &r[0], &r[1]); + FETCH(&r[1], 2, chan_index); + micro_min(&r[0], &r[0], &r[1]); + STORE(&r[0], 0, chan_index); } break; @@ -2181,7 +2211,7 @@ exec_instruction( break; case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ + /* TGSI_OPCODE_EX2 */ FETCH(&r[0], 0, CHAN_X); #if FAST_MATH @@ -2191,7 +2221,7 @@ exec_instruction( #endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; @@ -2205,19 +2235,19 @@ exec_instruction( break; case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ + /* TGSI_OPCODE_POW */ FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 1, CHAN_X); micro_pow( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ + /* TGSI_OPCODE_XPD */ FETCH(&r[0], 0, CHAN_Y); FETCH(&r[1], 1, CHAN_Z); @@ -2260,6 +2290,7 @@ exec_instruction( break; case TGSI_OPCODE_MULTIPLYMATRIX: + /* XXX: considered for removal */ assert (0); break; @@ -2274,7 +2305,12 @@ exec_instruction( break; case TGSI_OPCODE_RCC: - assert (0); + FETCH(&r[0], 0, CHAN_X); + micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); + micro_float_clamp(&r[0], &r[0]); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&r[0], 0, chan_index); + } break; case TGSI_OPCODE_DPH: @@ -2300,7 +2336,7 @@ exec_instruction( micro_add( &r[0], &r[0], &r[1] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; @@ -2310,7 +2346,7 @@ exec_instruction( micro_cos( &r[0], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); } break; @@ -2355,7 +2391,52 @@ exec_instruction( break; case TGSI_OPCODE_RFL: - assert (0); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + /* r0 = dp3(src0, src0) */ + FETCH(&r[2], 0, CHAN_X); + micro_mul(&r[0], &r[2], &r[2]); + FETCH(&r[4], 0, CHAN_Y); + micro_mul(&r[8], &r[4], &r[4]); + micro_add(&r[0], &r[0], &r[8]); + FETCH(&r[6], 0, CHAN_Z); + micro_mul(&r[8], &r[6], &r[6]); + micro_add(&r[0], &r[0], &r[8]); + + /* r1 = dp3(src0, src1) */ + FETCH(&r[3], 1, CHAN_X); + micro_mul(&r[1], &r[2], &r[3]); + FETCH(&r[5], 1, CHAN_Y); + micro_mul(&r[8], &r[4], &r[5]); + micro_add(&r[1], &r[1], &r[8]); + FETCH(&r[7], 1, CHAN_Z); + micro_mul(&r[8], &r[6], &r[7]); + micro_add(&r[1], &r[1], &r[8]); + + /* r1 = 2 * r1 / r0 */ + micro_add(&r[1], &r[1], &r[1]); + micro_div(&r[1], &r[1], &r[0]); + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + micro_mul(&r[2], &r[2], &r[1]); + micro_sub(&r[2], &r[2], &r[3]); + STORE(&r[2], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + micro_mul(&r[4], &r[4], &r[1]); + micro_sub(&r[4], &r[4], &r[5]); + STORE(&r[4], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + micro_mul(&r[6], &r[6], &r[1]); + micro_sub(&r[6], &r[6], &r[7]); + STORE(&r[6], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); + } break; case TGSI_OPCODE_SEQ: @@ -2370,7 +2451,9 @@ exec_instruction( break; case TGSI_OPCODE_SFL: - assert (0); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); + } break; case TGSI_OPCODE_SGT: @@ -2409,7 +2492,9 @@ exec_instruction( break; case TGSI_OPCODE_STR: - assert (0); + FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); + } break; case TGSI_OPCODE_TEX: @@ -2466,7 +2551,40 @@ exec_instruction( break; case TGSI_OPCODE_X2D: - assert (0); + FETCH(&r[0], 1, CHAN_X); + FETCH(&r[1], 1, CHAN_Y); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + FETCH(&r[2], 2, CHAN_X); + micro_mul(&r[2], &r[2], &r[0]); + FETCH(&r[3], 2, CHAN_Y); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + FETCH(&r[3], 0, CHAN_X); + micro_add(&r[2], &r[2], &r[3]); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(&r[2], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + STORE(&r[2], 0, CHAN_Z); + } + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + FETCH(&r[2], 2, CHAN_Z); + micro_mul(&r[2], &r[2], &r[0]); + FETCH(&r[3], 2, CHAN_W); + micro_mul(&r[3], &r[3], &r[1]); + micro_add(&r[2], &r[2], &r[3]); + FETCH(&r[3], 0, CHAN_Y); + micro_add(&r[2], &r[2], &r[3]); + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(&r[2], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&r[2], 0, CHAN_W); + } + } break; case TGSI_OPCODE_ARA: @@ -2551,14 +2669,14 @@ exec_instruction( case TGSI_OPCODE_SCS: if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - micro_cos( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - micro_sin( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_Y ); + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + micro_cos(&r[1], &r[0]); + STORE(&r[1], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + micro_sin(&r[1], &r[0]); + STORE(&r[1], 0, CHAN_Y); + } } if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); @@ -2570,32 +2688,36 @@ exec_instruction( case TGSI_OPCODE_NRM: /* 3-component vector normalize */ - { - union tgsi_exec_channel tmp, dot; - - /* tmp = dp3(src0, src0): */ - FETCH( &r[0], 0, CHAN_X ); - micro_mul( &tmp, &r[0], &r[0] ); - - FETCH( &r[1], 0, CHAN_Y ); - micro_mul( &dot, &r[1], &r[1] ); - micro_add( &tmp, &tmp, &dot ); - - FETCH( &r[2], 0, CHAN_Z ); - micro_mul( &dot, &r[2], &r[2] ); - micro_add( &tmp, &tmp, &dot ); - - /* tmp = 1 / sqrt(tmp) */ - micro_sqrt( &tmp, &tmp ); - micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); - - /* note: w channel is undefined */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - /* chan = chan * tmp */ - micro_mul( &r[chan_index], &tmp, &r[chan_index] ); - STORE( &r[chan_index], 0, chan_index ); + if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + /* r3 = sqrt(dp3(src0, src0)) */ + FETCH(&r[0], 0, CHAN_X); + micro_mul(&r[3], &r[0], &r[0]); + FETCH(&r[1], 0, CHAN_Y); + micro_mul(&r[4], &r[1], &r[1]); + micro_add(&r[3], &r[3], &r[4]); + FETCH(&r[2], 0, CHAN_Z); + micro_mul(&r[4], &r[2], &r[2]); + micro_add(&r[3], &r[3], &r[4]); + micro_sqrt(&r[3], &r[3]); + + if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { + micro_div(&r[0], &r[0], &r[3]); + STORE(&r[0], 0, CHAN_X); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { + micro_div(&r[1], &r[1], &r[3]); + STORE(&r[1], 0, CHAN_Y); + } + if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { + micro_div(&r[2], &r[2], &r[3]); + STORE(&r[2], 0, CHAN_Z); } } + if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); + } break; case TGSI_OPCODE_NRM4: @@ -2956,5 +3078,3 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; } - - diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 68c7a6b7f5..2b8a6f0fb1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_info.h" static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c index 5371a88b96..d88c2558d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_iterate.h" boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index d374b16f9a..0081f74ffc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -154,10 +154,17 @@ tgsi_parse_token( switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: - imm->u.Pointer = MALLOC( - sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) ); - for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { - next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); + { + uint imm_count = imm->Immediate.NrTokens - 1; + struct tgsi_immediate_float32 *data; + + data = (struct tgsi_immediate_float32 *) MALLOC(sizeof(struct tgsi_immediate_float32) * imm_count); + if (data) { + for (i = 0; i < imm_count; i++) { + next_token(ctx, &data[i]); + } + imm->u.ImmediateFloat32 = data; + } } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index f365030e52..0c64ae5713 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -33,7 +33,7 @@ #if defined(PIPE_ARCH_PPC) -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index bc7b941b78..6f1f5c2b4b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -25,23 +25,24 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" -#define MAX_REGISTERS 256 - typedef uint reg_flag; #define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) +#define MAX_REGISTERS 256 +#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG) + struct sanity_check_ctx { struct tgsi_iterate_context iter; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS]; boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; @@ -89,7 +90,7 @@ check_file_name( uint file ) { if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { - report_error( ctx, "Invalid register file name" ); + report_error( ctx, "(%u): Invalid register file name", file ); return FALSE; } return TRUE; @@ -113,7 +114,7 @@ is_any_register_declared( { uint i; - for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) + for (i = 0; i < MAX_REG_FLAGS; i++) if (ctx->regs_decl[file][i]) return TRUE; return FALSE; @@ -162,9 +163,8 @@ check_register_usage( ctx->regs_ind_used[file] = TRUE; } else { - if (index < 0 || index > MAX_REGISTERS) { - report_error( ctx, "%s[%i]: Invalid index %s", - file_names[file], index, name ); + if (index < 0 || index >= MAX_REGISTERS) { + report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name ); return FALSE; } @@ -193,15 +193,15 @@ iter_instruction( info = tgsi_get_opcode_info( inst->Instruction.Opcode ); if (info == NULL) { - report_error( ctx, "Invalid instruction opcode" ); + report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode ); return TRUE; } if (info->num_dst != inst->Instruction.NumDstRegs) { - report_error( ctx, "Invalid number of destination operands" ); + report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst ); } if (info->num_src != inst->Instruction.NumSrcRegs) { - report_error( ctx, "Invalid number of source operands" ); + report_error( ctx, "Invalid number of source operands, should be %u", info->num_src ); } /* Check destination and source registers' validity. @@ -266,7 +266,7 @@ iter_declaration( return TRUE; for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { if (is_register_declared( ctx, file, i )) - report_error( ctx, "The same register declared twice" ); + report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i ); ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); } @@ -295,7 +295,7 @@ iter_immediate( /* Check data type validity. */ if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { - report_error( ctx, "Invalid immediate data type" ); + report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType ); return TRUE; } @@ -322,7 +322,7 @@ epilog( for (i = 0; i < MAX_REGISTERS; i++) { if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "Register never used" ); + report_warning( ctx, "%s[%u]: Register never used", file_names[file], i ); } } } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index d02205a63e..c535788819 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -151,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_IMMEDIATE: - info->immediate_count++; + { + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + } break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 481ba89c5e..d70bcd03c5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -29,7 +29,7 @@ #if defined(PIPE_ARCH_X86) -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #if defined(PIPE_ARCH_SSE) @@ -1575,6 +1575,7 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 1, 0, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 1e822fbbea..58fe07c11d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index ea87da31e5..062c1be938 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,7 +31,7 @@ * Authors: Brian Paul */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 50101a9bb0..71f8a6ca40 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" |