summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt1036
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c10
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c326
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_iterate.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c17
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_scan.c9
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_transform.c2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_util.c2
16 files changed, 1312 insertions, 140 deletions
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index d7df9490cf..b4900e8dba 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -19,6 +19,3 @@ C_SOURCES = \
tgsi_util.c
include ../../Makefile.template
-
-symlinks:
-
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
new file mode 100644
index 0000000000..b83abd4093
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -0,0 +1,1036 @@
+TGSI Instruction Specification
+==============================
+==============================
+
+
+1 Instruction Set Operations
+=============================
+
+
+1.1 GL_NV_vertex_program
+-------------------------
+
+
+1.1.1 ARL - Address Register Load
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.1.2 MOV - Move
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.1.3 LIT - Light Coefficients
+
+ dst.x = 1.0
+ dst.y = max(src.x, 0.0)
+ dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
+ dst.w = 1.0
+
+
+1.1.4 RCP - Reciprocal
+
+ dst.x = 1.0 / src.x
+ dst.y = 1.0 / src.x
+ dst.z = 1.0 / src.x
+ dst.w = 1.0 / src.x
+
+
+1.1.5 RSQ - Reciprocal Square Root
+
+ dst.x = 1.0 / sqrt(abs(src.x))
+ dst.y = 1.0 / sqrt(abs(src.x))
+ dst.z = 1.0 / sqrt(abs(src.x))
+ dst.w = 1.0 / sqrt(abs(src.x))
+
+
+1.1.6 EXP - Approximate Exponential Base 2
+
+ dst.x = pow(2.0, floor(src.x))
+ dst.y = src.x - floor(src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = 1.0
+
+
+1.1.7 LOG - Approximate Logarithm Base 2
+
+ dst.x = floor(lg2(abs(src.x)))
+ dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
+ dst.z = lg2(abs(src.x))
+ dst.w = 1.0
+
+
+1.1.8 MUL - Multiply
+
+ dst.x = src0.x * src1.x
+ dst.y = src0.y * src1.y
+ dst.z = src0.z * src1.z
+ dst.w = src0.w * src1.w
+
+
+1.1.9 ADD - Add
+
+ dst.x = src0.x + src1.x
+ dst.y = src0.y + src1.y
+ dst.z = src0.z + src1.z
+ dst.w = src0.w + src1.w
+
+
+1.1.10 DP3 - 3-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+
+
+1.1.11 DP4 - 4-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+
+
+1.1.12 DST - Distance Vector
+
+ dst.x = 1.0
+ dst.y = src0.y * src1.y
+ dst.z = src0.z
+ dst.w = src1.w
+
+
+1.1.13 MIN - Minimum
+
+ dst.x = min(src0.x, src1.x)
+ dst.y = min(src0.y, src1.y)
+ dst.z = min(src0.z, src1.z)
+ dst.w = min(src0.w, src1.w)
+
+
+1.1.14 MAX - Maximum
+
+ dst.x = max(src0.x, src1.x)
+ dst.y = max(src0.y, src1.y)
+ dst.z = max(src0.z, src1.z)
+ dst.w = max(src0.w, src1.w)
+
+
+1.1.15 SLT - Set On Less Than
+
+ dst.x = (src0.x < src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y < src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z < src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w < src1.w) ? 1.0 : 0.0
+
+
+1.1.16 SGE - Set On Greater Equal Than
+
+ dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
+
+
+1.1.17 MAD - Multiply And Add
+
+ dst.x = src0.x * src1.x + src2.x
+ dst.y = src0.y * src1.y + src2.y
+ dst.z = src0.z * src1.z + src2.z
+ dst.w = src0.w * src1.w + src2.w
+
+
+1.2 GL_ATI_fragment_shader
+---------------------------
+
+
+1.2.1 SUB - Subtract
+
+ dst.x = src0.x - src1.x
+ dst.y = src0.y - src1.y
+ dst.z = src0.z - src1.z
+ dst.w = src0.w - src1.w
+
+
+1.2.2 DOT3 - 3-component Dot Product
+
+ Alias for DP3.
+
+
+1.2.3 DOT4 - 4-component Dot Product
+
+ Alias for DP4.
+
+
+1.2.4 LERP - Linear Interpolate
+
+ dst.x = src0.x * (src1.x - src2.x) + src2.x
+ dst.y = src0.y * (src1.y - src2.y) + src2.y
+ dst.z = src0.z * (src1.z - src2.z) + src2.z
+ dst.w = src0.w * (src1.w - src2.w) + src2.w
+
+
+1.2.5 CND - Condition
+
+ dst.x = (src2.x > 0.5) ? src0.x : src1.x
+ dst.y = (src2.y > 0.5) ? src0.y : src1.y
+ dst.z = (src2.z > 0.5) ? src0.z : src1.z
+ dst.w = (src2.w > 0.5) ? src0.w : src1.w
+
+
+1.2.6 CND0 - Condition Zero
+
+ dst.x = (src2.x >= 0.0) ? src0.x : src1.x
+ dst.y = (src2.y >= 0.0) ? src0.y : src1.y
+ dst.z = (src2.z >= 0.0) ? src0.z : src1.z
+ dst.w = (src2.w >= 0.0) ? src0.w : src1.w
+
+
+1.2.7 DOT2ADD - 2-component Dot Product And Add
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+
+
+1.3 GL_EXT_vertex_shader
+-------------------------
+
+
+1.3.1 INDEX - Array Lookup
+
+ Considered for removal from language.
+
+
+1.3.2 NEGATE - Negate
+
+ Considered for removal from language.
+
+
+1.3.3 MADD - Multiply And Add
+
+ Alias for MAD.
+
+
+1.3.4 FRAC - Fraction
+
+ dst.x = src.x - floor(src.x)
+ dst.y = src.y - floor(src.y)
+ dst.z = src.z - floor(src.z)
+ dst.w = src.w - floor(src.w)
+
+
+1.3.5 SETGE - Set On Greater Equal
+
+ Alias for SGE.
+
+
+1.3.6 SETLT - Set On Less Than
+
+ Alias for SLT.
+
+
+1.3.7 CLAMP - Clamp
+
+ dst.x = clamp(src0.x, src1.x, src2.x)
+ dst.y = clamp(src0.y, src1.y, src2.y)
+ dst.z = clamp(src0.z, src1.z, src2.z)
+ dst.w = clamp(src0.w, src1.w, src2.w)
+
+
+1.3.8 FLOOR - Floor
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.3.9 ROUND - Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.3.10 EXPBASE2 - Exponent Base 2
+
+ dst.x = pow(2.0, src.x)
+ dst.y = pow(2.0, src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = pow(2.0, src.x)
+
+
+1.3.11 LOGBASE2 - Logarithm Base 2
+
+ dst.x = lg2(src.x)
+ dst.y = lg2(src.x)
+ dst.z = lg2(src.x)
+ dst.w = lg2(src.x)
+
+
+1.3.12 POWER - Power
+
+ dst.x = pow(src0.x, src1.x)
+ dst.y = pow(src0.x, src1.x)
+ dst.z = pow(src0.x, src1.x)
+ dst.w = pow(src0.x, src1.x)
+
+
+1.3.13 RECIP - Reciprocal
+
+ Alias for RCP.
+
+
+1.3.14 RECIPSQRT - Reciprocal Square Root
+
+ Alias for RSQ.
+
+
+1.3.15 CROSSPRODUCT - Cross Product
+
+ dst.x = src0.y * src1.z - src1.y * src0.z
+ dst.y = src0.z * src1.x - src1.z * src0.x
+ dst.z = src0.x * src1.y - src1.x * src0.y
+ dst.w = 1.0
+
+
+1.3.16 MULTIPLYMATRIX - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.4 GL_NV_vertex_program1_1
+----------------------------
+
+
+1.4.1 ABS - Absolute
+
+ dst.x = abs(src.x)
+ dst.y = abs(src.y)
+ dst.z = abs(src.z)
+ dst.w = abs(src.w)
+
+
+1.4.2 RCC - Reciprocal Clamped
+
+ dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+
+
+1.4.3 DPH - Homogeneous Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+
+
+1.5 GL_NV_fragment_program
+---------------------------
+
+
+1.5.1 COS - Cosine
+
+ dst.x = cos(src.x)
+ dst.y = cos(src.x)
+ dst.z = cos(src.x)
+ dst.w = cos(src.w)
+
+
+1.5.2 DDX - Derivative Relative To X
+
+ dst.x = partialx(src.x)
+ dst.y = partialx(src.y)
+ dst.z = partialx(src.z)
+ dst.w = partialx(src.w)
+
+
+1.5.3 DDY - Derivative Relative To Y
+
+ dst.x = partialy(src.x)
+ dst.y = partialy(src.y)
+ dst.z = partialy(src.z)
+ dst.w = partialy(src.w)
+
+
+1.5.4 EX2 - Exponential Base 2
+
+ Alias for EXPBASE2.
+
+
+1.5.5 FLR - Floor
+
+ Alias for FLOOR.
+
+
+1.5.6 FRC - Fraction
+
+ Alias for FRAC.
+
+
+1.5.7 KILP - Predicated Discard
+
+ TBD
+
+
+1.5.8 LG2 - Logarithm Base 2
+
+ Alias for LOGBASE2.
+
+
+1.5.9 LRP - Linear Interpolate
+
+ Alias for LERP.
+
+
+1.5.10 PK2H - Pack Two 16-bit Floats
+
+ TBD
+
+
+1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
+
+ TBD
+
+
+1.5.12 PK4B - Pack Four Signed 8-bit Scalars
+
+ TBD
+
+
+1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
+
+ TBD
+
+
+1.5.14 POW - Power
+
+ Alias for POWER.
+
+
+1.5.15 RFL - Reflection Vector
+
+ dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
+ dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
+ dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
+ dst.w = 1.0
+
+
+1.5.16 SEQ - Set On Equal
+
+ dst.x = (src0.x == src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y == src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z == src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w == src1.w) ? 1.0 : 0.0
+
+
+1.5.17 SFL - Set On False
+
+ dst.x = 0.0
+ dst.y = 0.0
+ dst.z = 0.0
+ dst.w = 0.0
+
+
+1.5.18 SGT - Set On Greater Than
+
+ dst.x = (src0.x > src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y > src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z > src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w > src1.w) ? 1.0 : 0.0
+
+
+1.5.19 SIN - Sine
+
+ dst.x = sin(src.x)
+ dst.y = sin(src.x)
+ dst.z = sin(src.x)
+ dst.w = sin(src.w)
+
+
+1.5.20 SLE - Set On Less Equal Than
+
+ dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
+
+
+1.5.21 SNE - Set On Not Equal
+
+ dst.x = (src0.x != src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y != src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z != src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w != src1.w) ? 1.0 : 0.0
+
+
+1.5.22 STR - Set On True
+
+ dst.x = 1.0
+ dst.y = 1.0
+ dst.z = 1.0
+ dst.w = 1.0
+
+
+1.5.23 TEX - Texture Lookup
+
+ TBD
+
+
+1.5.24 TXD - Texture Lookup with Derivatives
+
+ TBD
+
+
+1.5.25 TXP - Projective Texture Lookup
+
+ TBD
+
+
+1.5.26 UP2H - Unpack Two 16-Bit Floats
+
+ TBD
+
+
+1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
+
+ TBD
+
+
+1.5.28 UP4B - Unpack Four Signed 8-Bit Values
+
+ TBD
+
+
+1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
+
+ TBD
+
+
+1.5.30 X2D - 2D Coordinate Transformation
+
+ dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
+ dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
+
+
+1.6 GL_NV_vertex_program2
+--------------------------
+
+
+1.6.1 ARA - Address Register Add
+
+ TBD
+
+
+1.6.2 ARR - Address Register Load With Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.6.3 BRA - Branch
+
+ TBD
+
+
+1.6.4 CAL - Subroutine Call
+
+ push(pc)
+ pc = target
+
+
+1.6.5 RET - Subroutine Call Return
+
+ pc = pop()
+
+
+1.6.6 SSG - Set Sign
+
+ dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+ dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+ dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+ dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+
+
+1.7 GL_ARB_vertex_program
+--------------------------
+
+
+1.7.1 SWZ - Extended Swizzle
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.7.2 XPD - Cross Product
+
+ Alias for CROSSPRODUCT.
+
+
+1.8 GL_ARB_fragment_program
+----------------------------
+
+
+1.8.1 CMP - Compare
+
+ dst.x = (src0.x < 0.0) ? src1.x : src2.x
+ dst.y = (src0.y < 0.0) ? src1.y : src2.y
+ dst.z = (src0.z < 0.0) ? src1.z : src2.z
+ dst.w = (src0.w < 0.0) ? src1.w : src2.w
+
+
+1.8.2 KIL - Conditional Discard
+
+ TBD
+
+
+1.8.3 SCS - Sine Cosine
+
+ dst.x = cos(src.x)
+ dst.y = sin(src.x)
+ dst.z = 0.0
+ dst.y = 1.0
+
+
+1.8.4 TXB - Texture Lookup With Bias
+
+ TBD
+
+
+1.9 GL_NV_fragment_program2
+----------------------------
+
+
+1.9.1 NRM - 3-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.w = 1.0
+
+
+1.9.2 DIV - Divide
+
+ dst.x = src0.x / src1.x
+ dst.y = src0.y / src1.y
+ dst.z = src0.z / src1.z
+ dst.w = src0.w / src1.w
+
+
+1.9.3 DP2 - 2-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y
+ dst.y = src0.x * src1.x + src0.y * src1.y
+ dst.z = src0.x * src1.x + src0.y * src1.y
+ dst.w = src0.x * src1.x + src0.y * src1.y
+
+
+1.9.4 DP2A - 2-component Dot Product And Add
+
+ Alias for DOT2ADD.
+
+
+1.9.5 TXL - Texture Lookup With LOD
+
+ TBD
+
+
+1.9.6 BRK - Break
+
+ TBD
+
+
+1.9.7 IF - If
+
+ TBD
+
+
+1.9.8 LOOP - Loop
+
+ TBD
+
+
+1.9.9 REP - Repeat
+
+ TBD
+
+
+1.9.10 ELSE - Else
+
+ TBD
+
+
+1.9.11 ENDIF - End If
+
+ TBD
+
+
+1.9.12 ENDLOOP - End Loop
+
+ TBD
+
+
+1.9.13 ENDREP - End Repeat
+
+ TBD
+
+
+1.10 GL_NV_vertex_program3
+---------------------------
+
+
+1.10.1 PUSHA - Push Address Register On Stack
+
+ push(src.x)
+ push(src.y)
+ push(src.z)
+ push(src.w)
+
+
+1.10.2 POPA - Pop Address Register From Stack
+
+ dst.w = pop()
+ dst.z = pop()
+ dst.y = pop()
+ dst.x = pop()
+
+
+1.11 GL_NV_gpu_program4
+------------------------
+
+
+1.11.1 CEIL - Ceiling
+
+ dst.x = ceil(src.x)
+ dst.y = ceil(src.y)
+ dst.z = ceil(src.z)
+ dst.w = ceil(src.w)
+
+
+1.11.2 I2F - Integer To Float
+
+ dst.x = (float) src.x
+ dst.y = (float) src.y
+ dst.z = (float) src.z
+ dst.w = (float) src.w
+
+
+1.11.3 NOT - Bitwise Not
+
+ dst.x = ~src.x
+ dst.y = ~src.y
+ dst.z = ~src.z
+ dst.w = ~src.w
+
+
+1.11.4 TRUNC - Truncate
+
+ dst.x = trunc(src.x)
+ dst.y = trunc(src.y)
+ dst.z = trunc(src.z)
+ dst.w = trunc(src.w)
+
+
+1.11.5 SHL - Shift Left
+
+ dst.x = src0.x << src1.x
+ dst.y = src0.y << src1.x
+ dst.z = src0.z << src1.x
+ dst.w = src0.w << src1.x
+
+
+1.11.6 SHR - Shift Right
+
+ dst.x = src0.x >> src1.x
+ dst.y = src0.y >> src1.x
+ dst.z = src0.z >> src1.x
+ dst.w = src0.w >> src1.x
+
+
+1.11.7 AND - Bitwise And
+
+ dst.x = src0.x & src1.x
+ dst.y = src0.y & src1.y
+ dst.z = src0.z & src1.z
+ dst.w = src0.w & src1.w
+
+
+1.11.8 OR - Bitwise Or
+
+ dst.x = src0.x | src1.x
+ dst.y = src0.y | src1.y
+ dst.z = src0.z | src1.z
+ dst.w = src0.w | src1.w
+
+
+1.11.9 MOD - Modulus
+
+ dst.x = src0.x % src1.x
+ dst.y = src0.y % src1.y
+ dst.z = src0.z % src1.z
+ dst.w = src0.w % src1.w
+
+
+1.11.10 XOR - Bitwise Xor
+
+ dst.x = src0.x ^ src1.x
+ dst.y = src0.y ^ src1.y
+ dst.z = src0.z ^ src1.z
+ dst.w = src0.w ^ src1.w
+
+
+1.11.11 SAD - Sum Of Absolute Differences
+
+ dst.x = abs(src0.x - src1.x) + src2.x
+ dst.y = abs(src0.y - src1.y) + src2.y
+ dst.z = abs(src0.z - src1.z) + src2.z
+ dst.w = abs(src0.w - src1.w) + src2.w
+
+
+1.11.12 TXF - Texel Fetch
+
+ TBD
+
+
+1.11.13 TXQ - Texture Size Query
+
+ TBD
+
+
+1.11.14 CONT - Continue
+
+ TBD
+
+
+1.12 GL_NV_geometry_program4
+-----------------------------
+
+
+1.12.1 EMIT - Emit
+
+ TBD
+
+
+1.12.2 ENDPRIM - End Primitive
+
+ TBD
+
+
+1.13 GLSL
+----------
+
+
+1.13.1 BGNLOOP2 - Begin Loop
+
+ TBD
+
+
+1.13.2 BGNSUB - Begin Subroutine
+
+ TBD
+
+
+1.13.3 ENDLOOP2 - End Loop
+
+ TBD
+
+
+1.13.4 ENDSUB - End Subroutine
+
+ TBD
+
+
+1.13.5 INT - Truncate
+
+ Alias for TRUNC.
+
+
+1.13.6 NOISE1 - 1D Noise
+
+ TBD
+
+
+1.13.7 NOISE2 - 2D Noise
+
+ TBD
+
+
+1.13.8 NOISE3 - 3D Noise
+
+ TBD
+
+
+1.13.9 NOISE4 - 4D Noise
+
+ TBD
+
+
+1.13.10 NOP - No Operation
+
+ Do nothing.
+
+
+1.14 ps_1_1
+------------
+
+
+1.14.1 TEXKILL - Conditional Discard
+
+ Alias for KIL.
+
+
+1.15 ps_1_4
+------------
+
+
+1.15.1 TEXLD - Texture Lookup
+
+ Alias for TEX.
+
+
+1.16 ps_2_0
+------------
+
+
+1.16.1 M4X4 - Multiply Matrix
+
+ Alias for MULTIPLYMATRIX.
+
+
+1.16.2 M4X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.3 M3X4 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.4 M3X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.5 M3X2 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.6 CRS - Cross Product
+
+ Alias for XPD.
+
+
+1.16.7 NRM4 - 4-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+
+
+1.16.8 SINCOS - Sine Cosine
+
+ Alias for SCS.
+
+
+1.16.9 TEXLDB - Texture Lookup With Bias
+
+ Alias for TXB.
+
+
+1.16.10 DP2ADD - 2-component Dot Product And Add
+
+ Alias for DP2A.
+
+
+1.17 ps_2_x
+------------
+
+
+1.17.1 CALL - Subroutine Call
+
+ Alias for CAL.
+
+
+1.17.2 CALLNZ - Subroutine Call If Not Zero
+
+ TBD
+
+
+1.17.3 IFC - If
+
+ TBD
+
+
+1.17.4 BREAK - Break
+
+ Alias for BRK.
+
+
+1.17.5 BREAKC - Break Conditional
+
+ TBD
+
+
+1.17.6 DSX - Derivative Relative To X
+
+ Alias for DDX.
+
+
+1.17.7 DSY - Derivative Relative To Y
+
+ Alias for DDY.
+
+
+1.17.8 TEXLDD - Texture Lookup with Derivatives
+
+ Alias for TXD.
+
+
+1.18 vs_1_1
+------------
+
+
+1.18.1 EXPP - Approximate Exponential Base 2
+
+ Alias for EXP.
+
+
+1.18.2 LOGP - Logarithm Base 2
+
+ Alias for LG2.
+
+
+1.19 vs_2_0
+------------
+
+
+1.19.1 SGN - Set Sign
+
+ Alias for SSG.
+
+
+1.19.2 MOVA - Move Address Register
+
+ Alias for ARR.
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 17886540cf..a1891a140a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
#include "tgsi_parse.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index ab2b1f2c58..a784b7cc3c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
@@ -245,8 +245,12 @@ iter_declaration(
}
}
- TXT( ", " );
- ENM( decl->Declaration.Interpolate, interpolate_names );
+ if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT &&
+ decl->Declaration.File == TGSI_FILE_INPUT)
+ {
+ TXT( ", " );
+ ENM( decl->Declaration.Interpolate, interpolate_names );
+ }
if (decl->Declaration.Centroid) {
TXT( ", CENTROID" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 2ecf1e2f14..3dc61c48ca 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "util/u_string.h"
#include "tgsi_dump_c.h"
#include "tgsi_build.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ab641efb60..ba807e498f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -486,6 +486,32 @@ micro_f2ut(
#endif
static void
+micro_float_clamp(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ uint i;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 0.0f) {
+ if (src->f[i] > 1.884467e+019f)
+ dst->f[i] = 1.884467e+019f;
+ else if (src->f[i] < 5.42101e-020f)
+ dst->f[i] = 5.42101e-020f;
+ else
+ dst->f[i] = src->f[i];
+ }
+ else {
+ if (src->f[i] < -1.884467e+019f)
+ dst->f[i] = -1.884467e+019f;
+ else if (src->f[i] > -5.42101e-020f)
+ dst->f[i] = -5.42101e-020f;
+ else
+ dst->f[i] = src->f[i];
+ }
+ }
+}
+
+static void
micro_flr(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
@@ -508,20 +534,6 @@ micro_frc(
}
static void
-micro_ge(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
micro_i2f(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src )
@@ -1614,6 +1626,7 @@ exec_tex(struct tgsi_exec_machine *mach,
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
FETCH(&r[0], 0, CHAN_X);
@@ -1636,6 +1649,8 @@ exec_tex(struct tgsi_exec_machine *mach,
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 0, CHAN_Y);
@@ -1826,12 +1841,14 @@ exec_instruction(
int *pc )
{
uint chan_index;
- union tgsi_exec_channel r[8];
+ union tgsi_exec_channel r[10];
(*pc)++;
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
+ /* TGSI_OPCODE_FLOOR */
+ /* TGSI_OPCODE_FLR */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_flr( &r[0], &r[0] );
@@ -1849,31 +1866,31 @@ exec_instruction(
case TGSI_OPCODE_LIT:
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
- }
+ FETCH( &r[0], 0, CHAN_X );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Y );
+ }
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[1], 0, CHAN_Y );
+ micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- FETCH( &r[2], 0, CHAN_W );
- micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
- micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
- micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
- }
+ FETCH( &r[2], 0, CHAN_W );
+ micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
+ micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
+ micro_pow( &r[1], &r[1], &r[2] );
+ micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Z );
+ }
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
break;
@@ -1882,17 +1899,18 @@ exec_instruction(
FETCH( &r[0], 0, CHAN_X );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( &r[0], 0, CHAN_X );
+ micro_abs( &r[0], &r[0] );
micro_sqrt( &r[0], &r[0] );
micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -2005,30 +2023,30 @@ exec_instruction(
micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_DST:
if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
+ FETCH( &r[0], 0, CHAN_Y );
+ FETCH( &r[1], 1, CHAN_Y);
+ micro_mul( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, CHAN_Y );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
+ FETCH( &r[0], 0, CHAN_Z );
+ STORE( &r[0], 0, CHAN_Z );
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
+ FETCH( &r[0], 1, CHAN_W );
+ STORE( &r[0], 0, CHAN_W );
}
break;
@@ -2071,7 +2089,7 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
STORE( &r[0], 0, chan_index );
}
break;
@@ -2115,15 +2133,27 @@ exec_instruction(
break;
case TGSI_OPCODE_CND:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+ micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
+ }
break;
case TGSI_OPCODE_CND0:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+ micro_le(&r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[2], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
+ }
break;
case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ /* TGSI_OPCODE_DP2A */
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
micro_mul( &r[0], &r[0], &r[1] );
@@ -2142,10 +2172,12 @@ exec_instruction(
break;
case TGSI_OPCODE_INDEX:
+ /* XXX: considered for removal */
assert (0);
break;
case TGSI_OPCODE_NEGATE:
+ /* XXX: considered for removal */
assert (0);
break;
@@ -2159,15 +2191,13 @@ exec_instruction(
break;
case TGSI_OPCODE_CLAMP:
- assert (0);
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ micro_max(&r[0], &r[0], &r[1]);
+ FETCH(&r[1], 2, chan_index);
+ micro_min(&r[0], &r[0], &r[1]);
+ STORE(&r[0], 0, chan_index);
}
break;
@@ -2181,7 +2211,7 @@ exec_instruction(
break;
case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ /* TGSI_OPCODE_EX2 */
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
@@ -2191,7 +2221,7 @@ exec_instruction(
#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -2205,19 +2235,19 @@ exec_instruction(
break;
case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ /* TGSI_OPCODE_POW */
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
micro_pow( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ /* TGSI_OPCODE_XPD */
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
@@ -2260,6 +2290,7 @@ exec_instruction(
break;
case TGSI_OPCODE_MULTIPLYMATRIX:
+ /* XXX: considered for removal */
assert (0);
break;
@@ -2274,7 +2305,12 @@ exec_instruction(
break;
case TGSI_OPCODE_RCC:
- assert (0);
+ FETCH(&r[0], 0, CHAN_X);
+ micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]);
+ micro_float_clamp(&r[0], &r[0]);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[0], 0, chan_index);
+ }
break;
case TGSI_OPCODE_DPH:
@@ -2300,7 +2336,7 @@ exec_instruction(
micro_add( &r[0], &r[0], &r[1] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -2310,7 +2346,7 @@ exec_instruction(
micro_cos( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
}
break;
@@ -2355,7 +2391,52 @@ exec_instruction(
break;
case TGSI_OPCODE_RFL:
- assert (0);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ /* r0 = dp3(src0, src0) */
+ FETCH(&r[2], 0, CHAN_X);
+ micro_mul(&r[0], &r[2], &r[2]);
+ FETCH(&r[4], 0, CHAN_Y);
+ micro_mul(&r[8], &r[4], &r[4]);
+ micro_add(&r[0], &r[0], &r[8]);
+ FETCH(&r[6], 0, CHAN_Z);
+ micro_mul(&r[8], &r[6], &r[6]);
+ micro_add(&r[0], &r[0], &r[8]);
+
+ /* r1 = dp3(src0, src1) */
+ FETCH(&r[3], 1, CHAN_X);
+ micro_mul(&r[1], &r[2], &r[3]);
+ FETCH(&r[5], 1, CHAN_Y);
+ micro_mul(&r[8], &r[4], &r[5]);
+ micro_add(&r[1], &r[1], &r[8]);
+ FETCH(&r[7], 1, CHAN_Z);
+ micro_mul(&r[8], &r[6], &r[7]);
+ micro_add(&r[1], &r[1], &r[8]);
+
+ /* r1 = 2 * r1 / r0 */
+ micro_add(&r[1], &r[1], &r[1]);
+ micro_div(&r[1], &r[1], &r[0]);
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_mul(&r[2], &r[2], &r[1]);
+ micro_sub(&r[2], &r[2], &r[3]);
+ STORE(&r[2], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_mul(&r[4], &r[4], &r[1]);
+ micro_sub(&r[4], &r[4], &r[5]);
+ STORE(&r[4], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ micro_mul(&r[6], &r[6], &r[1]);
+ micro_sub(&r[6], &r[6], &r[7]);
+ STORE(&r[6], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
+ }
break;
case TGSI_OPCODE_SEQ:
@@ -2370,7 +2451,9 @@ exec_instruction(
break;
case TGSI_OPCODE_SFL:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index);
+ }
break;
case TGSI_OPCODE_SGT:
@@ -2409,7 +2492,9 @@ exec_instruction(
break;
case TGSI_OPCODE_STR:
- assert (0);
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index);
+ }
break;
case TGSI_OPCODE_TEX:
@@ -2466,7 +2551,40 @@ exec_instruction(
break;
case TGSI_OPCODE_X2D:
- assert (0);
+ FETCH(&r[0], 1, CHAN_X);
+ FETCH(&r[1], 1, CHAN_Y);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ FETCH(&r[2], 2, CHAN_X);
+ micro_mul(&r[2], &r[2], &r[0]);
+ FETCH(&r[3], 2, CHAN_Y);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ FETCH(&r[3], 0, CHAN_X);
+ micro_add(&r[2], &r[2], &r[3]);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&r[2], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&r[2], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ FETCH(&r[2], 2, CHAN_Z);
+ micro_mul(&r[2], &r[2], &r[0]);
+ FETCH(&r[3], 2, CHAN_W);
+ micro_mul(&r[3], &r[3], &r[1]);
+ micro_add(&r[2], &r[2], &r[3]);
+ FETCH(&r[3], 0, CHAN_Y);
+ micro_add(&r[2], &r[2], &r[3]);
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&r[2], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&r[2], 0, CHAN_W);
+ }
+ }
break;
case TGSI_OPCODE_ARA:
@@ -2551,14 +2669,14 @@ exec_instruction(
case TGSI_OPCODE_SCS:
if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( &r[0], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- micro_cos( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- micro_sin( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_Y );
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_cos(&r[1], &r[0]);
+ STORE(&r[1], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_sin(&r[1], &r[0]);
+ STORE(&r[1], 0, CHAN_Y);
+ }
}
if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
@@ -2570,32 +2688,36 @@ exec_instruction(
case TGSI_OPCODE_NRM:
/* 3-component vector normalize */
- {
- union tgsi_exec_channel tmp, dot;
-
- /* tmp = dp3(src0, src0): */
- FETCH( &r[0], 0, CHAN_X );
- micro_mul( &tmp, &r[0], &r[0] );
-
- FETCH( &r[1], 0, CHAN_Y );
- micro_mul( &dot, &r[1], &r[1] );
- micro_add( &tmp, &tmp, &dot );
-
- FETCH( &r[2], 0, CHAN_Z );
- micro_mul( &dot, &r[2], &r[2] );
- micro_add( &tmp, &tmp, &dot );
-
- /* tmp = 1 / sqrt(tmp) */
- micro_sqrt( &tmp, &tmp );
- micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp );
-
- /* note: w channel is undefined */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- /* chan = chan * tmp */
- micro_mul( &r[chan_index], &tmp, &r[chan_index] );
- STORE( &r[chan_index], 0, chan_index );
+ if(IS_CHANNEL_ENABLED(*inst, CHAN_X) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
+ IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ /* r3 = sqrt(dp3(src0, src0)) */
+ FETCH(&r[0], 0, CHAN_X);
+ micro_mul(&r[3], &r[0], &r[0]);
+ FETCH(&r[1], 0, CHAN_Y);
+ micro_mul(&r[4], &r[1], &r[1]);
+ micro_add(&r[3], &r[3], &r[4]);
+ FETCH(&r[2], 0, CHAN_Z);
+ micro_mul(&r[4], &r[2], &r[2]);
+ micro_add(&r[3], &r[3], &r[4]);
+ micro_sqrt(&r[3], &r[3]);
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ micro_div(&r[0], &r[0], &r[3]);
+ STORE(&r[0], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ micro_div(&r[1], &r[1], &r[3]);
+ STORE(&r[1], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ micro_div(&r[2], &r[2], &r[3]);
+ STORE(&r[2], 0, CHAN_Z);
}
}
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W);
+ }
break;
case TGSI_OPCODE_NRM4:
@@ -2956,5 +3078,3 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
}
-
-
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 68c7a6b7f5..2b8a6f0fb1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_info.h"
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
index 5371a88b96..d88c2558d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_iterate.h"
boolean
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index d374b16f9a..0081f74ffc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"
@@ -154,10 +154,17 @@ tgsi_parse_token(
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- imm->u.Pointer = MALLOC(
- sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) );
- for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
- next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] );
+ {
+ uint imm_count = imm->Immediate.NrTokens - 1;
+ struct tgsi_immediate_float32 *data;
+
+ data = (struct tgsi_immediate_float32 *) MALLOC(sizeof(struct tgsi_immediate_float32) * imm_count);
+ if (data) {
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &data[i]);
+ }
+ imm->u.ImmediateFloat32 = data;
+ }
}
break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index f365030e52..0c64ae5713 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -33,7 +33,7 @@
#if defined(PIPE_ARCH_PPC)
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#include "util/u_memory.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index bc7b941b78..6f1f5c2b4b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -25,23 +25,24 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_sanity.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
-#define MAX_REGISTERS 256
-
typedef uint reg_flag;
#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8)
+#define MAX_REGISTERS 256
+#define MAX_REG_FLAGS ((MAX_REGISTERS + BITS_IN_REG_FLAG - 1) / BITS_IN_REG_FLAG)
+
struct sanity_check_ctx
{
struct tgsi_iterate_context iter;
- reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
- reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG];
+ reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REG_FLAGS];
+ reg_flag regs_used[TGSI_FILE_COUNT][MAX_REG_FLAGS];
boolean regs_ind_used[TGSI_FILE_COUNT];
uint num_imms;
uint num_instructions;
@@ -89,7 +90,7 @@ check_file_name(
uint file )
{
if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) {
- report_error( ctx, "Invalid register file name" );
+ report_error( ctx, "(%u): Invalid register file name", file );
return FALSE;
}
return TRUE;
@@ -113,7 +114,7 @@ is_any_register_declared(
{
uint i;
- for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++)
+ for (i = 0; i < MAX_REG_FLAGS; i++)
if (ctx->regs_decl[file][i])
return TRUE;
return FALSE;
@@ -162,9 +163,8 @@ check_register_usage(
ctx->regs_ind_used[file] = TRUE;
}
else {
- if (index < 0 || index > MAX_REGISTERS) {
- report_error( ctx, "%s[%i]: Invalid index %s",
- file_names[file], index, name );
+ if (index < 0 || index >= MAX_REGISTERS) {
+ report_error( ctx, "%s[%d]: Invalid %s index", file_names[file], index, name );
return FALSE;
}
@@ -193,15 +193,15 @@ iter_instruction(
info = tgsi_get_opcode_info( inst->Instruction.Opcode );
if (info == NULL) {
- report_error( ctx, "Invalid instruction opcode" );
+ report_error( ctx, "(%u): Invalid instruction opcode", inst->Instruction.Opcode );
return TRUE;
}
if (info->num_dst != inst->Instruction.NumDstRegs) {
- report_error( ctx, "Invalid number of destination operands" );
+ report_error( ctx, "Invalid number of destination operands, should be %u", info->num_dst );
}
if (info->num_src != inst->Instruction.NumSrcRegs) {
- report_error( ctx, "Invalid number of source operands" );
+ report_error( ctx, "Invalid number of source operands, should be %u", info->num_src );
}
/* Check destination and source registers' validity.
@@ -266,7 +266,7 @@ iter_declaration(
return TRUE;
for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
if (is_register_declared( ctx, file, i ))
- report_error( ctx, "The same register declared twice" );
+ report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i );
ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
}
@@ -295,7 +295,7 @@ iter_immediate(
/* Check data type validity.
*/
if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) {
- report_error( ctx, "Invalid immediate data type" );
+ report_error( ctx, "(%u): Invalid immediate data type", imm->Immediate.DataType );
return TRUE;
}
@@ -322,7 +322,7 @@ epilog(
for (i = 0; i < MAX_REGISTERS; i++) {
if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) {
- report_warning( ctx, "Register never used" );
+ report_warning( ctx, "%s[%u]: Register never used", file_names[file], i );
}
}
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index d02205a63e..c535788819 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -151,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
break;
case TGSI_TOKEN_TYPE_IMMEDIATE:
- info->immediate_count++;
+ {
+ uint reg = info->immediate_count++;
+ uint file = TGSI_FILE_IMMEDIATE;
+
+ info->file_mask[file] |= (1 << reg);
+ info->file_count[file]++;
+ info->file_max[file] = MAX2(info->file_max[file], (int)reg);
+ }
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 481ba89c5e..d70bcd03c5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -29,7 +29,7 @@
#if defined(PIPE_ARCH_X86)
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
#if defined(PIPE_ARCH_SSE)
@@ -1575,6 +1575,7 @@ emit_instruction(
case TGSI_OPCODE_RSQ:
/* TGSI_OPCODE_RECIPSQRT */
FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_abs( func, 0 );
emit_rsqrt( func, 1, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 1, 0, chan_index );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 1e822fbbea..58fe07c11d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ea87da31e5..062c1be938 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -31,7 +31,7 @@
* Authors: Brian Paul
*/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "tgsi_transform.h"
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 50101a9bb0..71f8a6ca40 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -25,7 +25,7 @@
*
**************************************************************************/
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
#include "tgsi_build.h"