summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/llvmpipe
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/llvmpipe')
-rw-r--r--src/gallium/drivers/llvmpipe/.gitignore1
-rw-r--r--src/gallium/drivers/llvmpipe/Makefile10
-rw-r--r--src/gallium/drivers/llvmpipe/README36
-rw-r--r--src/gallium/drivers/llvmpipe/SConscript14
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.c478
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_arit.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_const.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.c243
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_conv.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_debug.c13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format.h80
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_aos.c205
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_query.c72
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_format_soa.c119
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.c34
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.c6
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_logic.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.cpp61
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_misc.h56
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.c418
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_pack.h95
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c375
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_swizzle.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c81
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_type.h98
-rw-r--r--src/gallium/drivers/llvmpipe/lp_clear.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.c59
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h13
-rw-r--r--src/gallium/drivers/llvmpipe/lp_draw_arrays.c50
-rw-r--r--src/gallium/drivers/llvmpipe/lp_flush.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_jit.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_setup.c190
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_setup.h85
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_vbuf.c107
-rw-r--r--src/gallium/drivers/llvmpipe/lp_prim_vbuf.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c64
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c68
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state.h9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_blend.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c88
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c87
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_sampler.c63
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_surface.c5
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_vs.c1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test.h20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_blend.c25
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_conv.c9
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_format.c94
-rw-r--r--src/gallium/drivers/llvmpipe/lp_test_main.c20
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_cache.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tex_sample_c.c30
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c91
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.c129
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_cache.h37
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.c931
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.h16
-rw-r--r--src/gallium/drivers/llvmpipe/lp_tile_soa.py278
61 files changed, 2898 insertions, 2490 deletions
diff --git a/src/gallium/drivers/llvmpipe/.gitignore b/src/gallium/drivers/llvmpipe/.gitignore
new file mode 100644
index 0000000000..257b72d7b2
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/.gitignore
@@ -0,0 +1 @@
+lp_tile_soa.c
diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index cd7b6356d2..e038a5229e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -17,10 +17,13 @@ C_SOURCES = \
lp_bld_depth.c \
lp_bld_flow.c \
lp_bld_format_aos.c \
+ lp_bld_format_query.c \
lp_bld_format_soa.c \
lp_bld_interp.c \
lp_bld_intr.c \
lp_bld_logic.c \
+ lp_bld_pack.c \
+ lp_bld_sample.c \
lp_bld_sample_soa.c \
lp_bld_swizzle.c \
lp_bld_struct.c \
@@ -32,7 +35,6 @@ C_SOURCES = \
lp_draw_arrays.c \
lp_flush.c \
lp_jit.c \
- lp_prim_setup.c \
lp_prim_vbuf.c \
lp_setup.c \
lp_query.c \
@@ -54,4 +56,10 @@ C_SOURCES = \
lp_tile_cache.c \
lp_tile_soa.c
+CPP_SOURCES = \
+ lp_bld_misc.cpp
+
include ../../Makefile.template
+
+lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
+ python lp_tile_soa.py ../../auxiliary/util/u_format.csv > $@
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 89d08834a3..0c3f00fd58 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,21 +51,22 @@ Requirements
- Linux
- - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes
- opcodes not yet supported by upstream.
+ - A x86 or amd64 processor. 64bit mode is preferred.
- git clone git://people.freedesktop.org/~jrfonseca/udis86
- cd udis86
- ./configure --with-pic
- make
- sudo make install
+ Support for sse2 is strongly encouraged. Support for ssse3, and sse4.1 will
+ yield the most efficient code. The less features the CPU has the more
+ likely is that you ran into underperforming, buggy, or incomplete code.
+
+ See /proc/cpuinfo to know what your CPU supports.
+
+ - LLVM 2.5 or greater. LLVM 2.6 is preferred.
- - LLVM 2.5. On Debian based distributions do:
+ On Debian based distributions do:
aptitude install llvm-dev
- There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
- errors in the debug build:
+ There is a typo in one of the llvm 2.5 headers, that may cause compilation
+ errors. To fix it apply the change:
--- /usr/include/llvm-c/Core.h.orig 2009-08-10 15:38:54.000000000 +0100
+++ /usr/include/llvm-c/Core.h 2009-08-10 15:38:25.000000000 +0100
@@ -79,12 +80,17 @@ Requirements
#endif
return reinterpret_cast<T**>(Vals);
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
- instructions. This is necessary because we emit several SSE intrinsics for
- convenience. See /proc/cpuinfo to know what your CPU supports.
-
- - scons
+ - scons (optional)
+ - udis86, http://udis86.sourceforge.net/ (optional):
+
+ git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86
+ cd udis86
+ ./autogen.sh
+ ./configure --with-pic
+ make
+ sudo make install
+
Building
========
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index f4a9a3b22e..3bd2e70013 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,13 @@ if not env.has_key('LLVM_VERSION'):
env.Tool('udis86')
+env.CodeGenerate(
+ target = 'lp_tile_soa.c',
+ script = 'lp_tile_soa.py',
+ source = ['#src/gallium/auxiliary/util/u_format.csv'],
+ command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
llvmpipe = env.ConvenienceLibrary(
target = 'llvmpipe',
source = [
@@ -23,9 +30,13 @@ llvmpipe = env.ConvenienceLibrary(
'lp_bld_depth.c',
'lp_bld_flow.c',
'lp_bld_format_aos.c',
+ 'lp_bld_format_query.c',
'lp_bld_format_soa.c',
'lp_bld_interp.c',
'lp_bld_intr.c',
+ 'lp_bld_misc.cpp',
+ 'lp_bld_pack.c',
+ 'lp_bld_sample.c',
'lp_bld_sample_soa.c',
'lp_bld_struct.c',
'lp_bld_logic.c',
@@ -38,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary(
'lp_draw_arrays.c',
'lp_flush.c',
'lp_jit.c',
- 'lp_prim_setup.c',
'lp_prim_vbuf.c',
'lp_setup.c',
'lp_query.c',
@@ -68,7 +78,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries)
env.Program(
target = 'lp_test_format',
- source = ['lp_test_format.c'],
+ source = ['lp_test_format.c', 'lp_test_main.c'],
)
env.Program(
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 0b115fc9b0..9c59677a74 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,12 +47,16 @@
#include "util/u_memory.h"
#include "util/u_debug.h"
+#include "util/u_math.h"
#include "util/u_string.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
+#include "lp_bld_debug.h"
#include "lp_bld_arit.h"
@@ -71,30 +75,28 @@ lp_build_min_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating) {
- if(type.width == 32)
+ if(type.width == 32 && util_cpu_caps.has_sse)
intrinsic = "llvm.x86.sse.min.ps";
- if(type.width == 64)
+ if(type.width == 64 && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.min.pd";
}
else {
- if(type.width == 8 && !type.sign)
+ if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pminu.b";
- if(type.width == 8 && type.sign)
+ if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminsb";
- if(type.width == 16 && !type.sign)
+ if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminuw";
- if(type.width == 16 && type.sign)
+ if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmins.w";
- if(type.width == 32 && !type.sign)
+ if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminud";
- if(type.width == 32 && type.sign)
+ if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pminsd";
}
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -119,30 +121,28 @@ lp_build_max_simple(struct lp_build_context *bld,
/* TODO: optimize the constant case */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating) {
- if(type.width == 32)
+ if(type.width == 32 && util_cpu_caps.has_sse)
intrinsic = "llvm.x86.sse.max.ps";
- if(type.width == 64)
+ if(type.width == 64 && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.max.pd";
}
else {
- if(type.width == 8 && !type.sign)
+ if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmaxu.b";
- if(type.width == 8 && type.sign)
+ if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxsb";
- if(type.width == 16 && !type.sign)
+ if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxuw";
- if(type.width == 16 && type.sign)
+ if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
intrinsic = "llvm.x86.sse2.pmaxs.w";
- if(type.width == 32 && !type.sign)
+ if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxud";
- if(type.width == 32 && type.sign)
+ if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
intrinsic = "llvm.x86.sse41.pmaxsd";
}
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -204,15 +204,14 @@ lp_build_add(struct lp_build_context *bld,
if(a == bld->one || b == bld->one)
return bld->one;
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width * type.length == 128 &&
+ if(util_cpu_caps.has_sse2 &&
+ type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -257,15 +256,14 @@ lp_build_sub(struct lp_build_context *bld,
if(b == bld->one)
return bld->zero;
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width * type.length == 128 &&
+ if(util_cpu_caps.has_sse2 &&
+ type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
}
-#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
@@ -284,45 +282,6 @@ lp_build_sub(struct lp_build_context *bld,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build constant int vector of width 'n' and value 'c'.
- */
-static LLVMValueRef
-lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(type, c, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
* Normalized 8bit multiplication.
*
* - alpha plus one
@@ -365,33 +324,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
*/
static LLVMValueRef
lp_build_mul_u8n(LLVMBuilderRef builder,
+ struct lp_type i16_type,
LLVMValueRef a, LLVMValueRef b)
{
- static LLVMValueRef c01 = NULL;
- static LLVMValueRef c08 = NULL;
- static LLVMValueRef c80 = NULL;
+ LLVMValueRef c8;
LLVMValueRef ab;
- if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
- if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
- if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
+ c8 = lp_build_int_const_scalar(i16_type, 8);
#if 0
/* a*b/255 ~= (a*(b + 1)) >> 256 */
- b = LLVMBuildAdd(builder, b, c01, "");
+ b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
ab = LLVMBuildMul(builder, a, b, "");
#else
- /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
+ /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
ab = LLVMBuildMul(builder, a, b, "");
- ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
- ab = LLVMBuildAdd(builder, ab, c80, "");
+ ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
+ ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
#endif
- ab = LLVMBuildLShr(builder, ab, c08, "");
+ ab = LLVMBuildLShr(builder, ab, c8, "");
return ab;
}
@@ -406,6 +362,8 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b)
{
const struct lp_type type = bld->type;
+ LLVMValueRef shift;
+ LLVMValueRef res;
if(a == bld->zero)
return bld->zero;
@@ -419,53 +377,104 @@ lp_build_mul(struct lp_build_context *bld,
return bld->undef;
if(!type.floating && !type.fixed && type.norm) {
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 8 && type.length == 16) {
- LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
- LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
- static LLVMValueRef ml = NULL;
- static LLVMValueRef mh = NULL;
- LLVMValueRef al, ah, bl, bh;
- LLVMValueRef abl, abh;
- LLVMValueRef ab;
-
- if(!ml) ml = lp_build_unpack_shuffle(16, 0);
- if(!mh) mh = lp_build_unpack_shuffle(16, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
- bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
- ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
- bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
+ if(type.width == 8) {
+ struct lp_type i16_type = lp_wider_type(type);
+ LLVMValueRef al, ah, bl, bh, abl, abh, ab;
- /* NOP */
- al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
- bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
- ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
- bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
+ lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
+ lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);
/* PMULLW, PSRLW, PADDW */
- abl = lp_build_mul_u8n(bld->builder, al, bl);
- abh = lp_build_mul_u8n(bld->builder, ah, bh);
+ abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
+ abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);
- /* PACKUSWB */
- ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
-
- /* NOP */
- ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
+ ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
return ab;
}
-#endif
/* FIXME */
assert(0);
}
- if(LLVMIsConstant(a) && LLVMIsConstant(b))
- return LLVMConstMul(a, b);
+ if(type.fixed)
+ shift = lp_build_int_const_scalar(type, type.width/2);
+ else
+ shift = NULL;
+
+ if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+ res = LLVMConstMul(a, b);
+ if(shift) {
+ if(type.sign)
+ res = LLVMConstAShr(res, shift);
+ else
+ res = LLVMConstLShr(res, shift);
+ }
+ }
+ else {
+ res = LLVMBuildMul(bld->builder, a, b, "");
+ if(shift) {
+ if(type.sign)
+ res = LLVMBuildAShr(bld->builder, res, shift, "");
+ else
+ res = LLVMBuildLShr(bld->builder, res, shift, "");
+ }
+ }
+
+ return res;
+}
+
+
+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b)
+{
+ LLVMValueRef factor;
+
+ if(b == 0)
+ return bld->zero;
+
+ if(b == 1)
+ return a;
- return LLVMBuildMul(bld->builder, a, b, "");
+ if(b == -1)
+ return LLVMBuildNeg(bld->builder, a, "");
+
+ if(b == 2 && bld->type.floating)
+ return lp_build_add(bld, a, a);
+
+ if(util_is_pot(b)) {
+ unsigned shift = ffs(b) - 1;
+
+ if(bld->type.floating) {
+#if 0
+ /*
+ * Power of two multiplication by directly manipulating the mantissa.
+ *
+ * XXX: This might not be always faster, it will introduce a small error
+ * for multiplication by zero, and it will produce wrong results
+ * for Inf and NaN.
+ */
+ unsigned mantissa = lp_mantissa(bld->type);
+ factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+ a = LLVMBuildAdd(bld->builder, a, factor, "");
+ a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+ return a;
+#endif
+ }
+ else {
+ factor = lp_build_const_scalar(bld->type, shift);
+ return LLVMBuildShl(bld->builder, a, factor, "");
+ }
+ }
+
+ factor = lp_build_const_scalar(bld->type, (double)b);
+ return lp_build_mul(bld, a, factor);
}
@@ -493,22 +502,43 @@ lp_build_div(struct lp_build_context *bld,
if(LLVMIsConstant(a) && LLVMIsConstant(b))
return LLVMConstFDiv(a, b);
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_mul(bld, a, lp_build_rcp(bld, b));
-#endif
return LLVMBuildFDiv(bld->builder, a, b, "");
}
+/**
+ * Linear interpolation.
+ *
+ * This also works for integer values with a few caveats.
+ *
+ * @sa http://www.stereopsis.com/doubleblend.html
+ */
LLVMValueRef
lp_build_lerp(struct lp_build_context *bld,
LLVMValueRef x,
LLVMValueRef v0,
LLVMValueRef v1)
{
- return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+ LLVMValueRef delta;
+ LLVMValueRef res;
+
+ delta = lp_build_sub(bld, v1, v0);
+
+ res = lp_build_mul(bld, x, delta);
+
+ res = lp_build_add(bld, v0, res);
+
+ if(bld->type.fixed)
+ /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
+ * but it will be wrong for other uses. Basically we need a more
+ * powerful lp_type, capable of further distinguishing the values
+ * interpretation from the value storage. */
+ res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+
+ return res;
}
@@ -606,8 +636,7 @@ lp_build_abs(struct lp_build_context *bld,
return a;
}
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width*type.length == 128) {
+ if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
switch(type.width) {
case 8:
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
@@ -617,7 +646,6 @@ lp_build_abs(struct lp_build_context *bld,
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
}
}
-#endif
return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
}
@@ -684,6 +712,8 @@ lp_build_round_sse41(struct lp_build_context *bld,
assert(type.floating);
assert(type.width*type.length == 128);
+ assert(lp_check_value(type, a));
+ assert(util_cpu_caps.has_sse4_1);
switch(type.width) {
case 32:
@@ -703,20 +733,45 @@ lp_build_round_sse41(struct lp_build_context *bld,
LLVMValueRef
-lp_build_round(struct lp_build_context *bld,
+lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a)
{
const struct lp_type type = bld->type;
assert(type.floating);
+ assert(lp_check_value(type, a));
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
-#endif
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
+ res = LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
+}
- /* FIXME */
- assert(0);
- return bld->undef;
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_iround(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
}
@@ -728,13 +783,15 @@ lp_build_floor(struct lp_build_context *bld,
assert(type.floating);
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
-#endif
-
- /* FIXME */
- assert(0);
- return bld->undef;
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_ifloor(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
}
@@ -745,59 +802,143 @@ lp_build_ceil(struct lp_build_context *bld,
const struct lp_type type = bld->type;
assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1)
+ return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef res;
+ res = lp_build_iceil(bld, a);
+ res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
+ return res;
+ }
+}
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
-#endif
- /* FIXME */
- assert(0);
- return bld->undef;
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_itrunc(struct lp_build_context *bld,
+ LLVMValueRef a)
+{
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
}
LLVMValueRef
-lp_build_trunc(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_iround(struct lp_build_context *bld,
+ LLVMValueRef a)
{
const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
assert(type.floating);
+ assert(lp_check_value(type, a));
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
-#endif
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+ }
+ else {
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef half;
+
+ /* get sign bit */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+
+ /* sign * 0.5 */
+ half = lp_build_const_scalar(type, 0.5);
+ half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
+ half = LLVMBuildOr(bld->builder, sign, half, "");
+ half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
+
+ res = LLVMBuildAdd(bld->builder, a, half, "");
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
- /* FIXME */
- assert(0);
- return bld->undef;
+ return res;
}
-/**
- * Convert to integer, through whichever rounding method that's fastest,
- * typically truncating to zero.
- */
LLVMValueRef
-lp_build_int(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a)
{
const struct lp_type type = bld->type;
LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
assert(type.floating);
+ assert(lp_check_value(type, a));
- return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+ }
+ else {
+ /* Take the sign bit and add it to 1 constant */
+ LLVMTypeRef vec_type = lp_build_vec_type(type);
+ unsigned mantissa = lp_mantissa(type);
+ LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+ LLVMValueRef sign;
+ LLVMValueRef offset;
+
+ /* sign = a < 0 ? ~0 : 0 */
+ sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+ sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+ sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
+
+ /* offset = -0.99999(9)f */
+ offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+ offset = LLVMConstBitCast(offset, int_vec_type);
+
+ /* offset = a < 0 ? -0.99999(9)f : 0.0f */
+ offset = LLVMBuildAnd(bld->builder, offset, sign, "");
+ offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
+
+ res = LLVMBuildAdd(bld->builder, a, offset, "");
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+
+ return res;
}
LLVMValueRef
-lp_build_ifloor(struct lp_build_context *bld,
- LLVMValueRef a)
+lp_build_iceil(struct lp_build_context *bld,
+ LLVMValueRef a)
{
- a = lp_build_floor(bld, a);
- a = lp_build_int(bld, a);
- return a;
+ const struct lp_type type = bld->type;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMValueRef res;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if(util_cpu_caps.has_sse4_1) {
+ res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+ }
+ else {
+ assert(0);
+ res = bld->undef;
+ }
+
+ res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
+
+ return res;
}
@@ -837,11 +978,9 @@ lp_build_rcp(struct lp_build_context *bld,
if(LLVMIsConstant(a))
return LLVMConstFDiv(bld->one, a);
- /* XXX: is this really necessary? */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
+ /* FIXME: improve precision */
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rcp.ps", lp_build_vec_type(type), a);
-#endif
return LLVMBuildFDiv(bld->builder, bld->one, a, "");
}
@@ -858,11 +997,8 @@ lp_build_rsqrt(struct lp_build_context *bld,
assert(type.floating);
- /* XXX: is this really necessary? */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width == 32 && type.length == 4)
+ if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);
-#endif
return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
@@ -918,7 +1054,8 @@ lp_build_pow(struct lp_build_context *bld,
{
/* TODO: optimize the constant case */
if(LLVMIsConstant(x) && LLVMIsConstant(y))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y));
}
@@ -972,7 +1109,8 @@ lp_build_polynomial(struct lp_build_context *bld,
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
for (i = num_coeffs; i--; ) {
LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
@@ -1026,7 +1164,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
if(p_exp2_int_part || p_frac_part || p_exp2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
assert(type.floating && type.width == 32);
@@ -1125,7 +1264,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
if(p_exp || p_floor_log2 || p_log2) {
/* TODO: optimize the constant case */
if(LLVMIsConstant(x))
- debug_printf("%s: inefficient/imprecise constant arithmetic\n");
+ debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+ __FUNCTION__);
assert(type.floating && type.width == 32);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index d68a97c4b8..62be4b9aee 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
@@ -67,6 +67,11 @@ lp_build_mul(struct lp_build_context *bld,
LLVMValueRef b);
LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+ LLVMValueRef a,
+ int b);
+
+LLVMValueRef
lp_build_div(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
@@ -126,11 +131,18 @@ lp_build_trunc(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
-lp_build_int(struct lp_build_context *bld,
- LLVMValueRef a);
+lp_build_ifloor(struct lp_build_context *bld,
+ LLVMValueRef a);
+LLVMValueRef
+lp_build_iceil(struct lp_build_context *bld,
+ LLVMValueRef a);
LLVMValueRef
-lp_build_ifloor(struct lp_build_context *bld,
+lp_build_iround(struct lp_build_context *bld,
+ LLVMValueRef a);
+
+LLVMValueRef
+lp_build_itrunc(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index ffb302f736..cb8e1c7b00 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,7 +42,7 @@
#include <pipe/p_compiler.h>
-struct lp_type type;
+struct lp_type;
unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index 186cac70f6..9935209437 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -63,11 +63,13 @@
#include "util/u_debug.h"
#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
#include "lp_bld_conv.h"
@@ -198,243 +200,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
- assert(lo_hi < 2);
-
- /* TODO: cache results in a static table */
-
- for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
- elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
- elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
- }
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build shuffle vectors that match PACKxx instructions.
- */
-static LLVMValueRef
-lp_build_const_pack_shuffle(unsigned n)
-{
- LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- assert(n <= LP_MAX_VECTOR_LENGTH);
-
- /* TODO: cache results in a static table */
-
- for(i = 0; i < n; ++i)
- elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
-
- return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Expand the bit width.
- *
- * This will only change the number of bits the values are represented, not the
- * values themselves.
- */
-static void
-lp_build_expand(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- LLVMValueRef src,
- LLVMValueRef *dst, unsigned num_dsts)
-{
- unsigned num_tmps;
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length == dst_type.length * num_dsts);
-
- num_tmps = 1;
- dst[0] = src;
-
- while(src_type.width < dst_type.width) {
- struct lp_type new_type = src_type;
- LLVMTypeRef new_vec_type;
-
- new_type.width *= 2;
- new_type.length /= 2;
- new_vec_type = lp_build_vec_type(new_type);
-
- for(i = num_tmps; i--; ) {
- LLVMValueRef zero;
- LLVMValueRef shuffle_lo;
- LLVMValueRef shuffle_hi;
- LLVMValueRef lo;
- LLVMValueRef hi;
-
- zero = lp_build_zero(src_type);
- shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0);
- shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1);
-
- /* PUNPCKLBW, PUNPCKHBW */
- lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, "");
- hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, "");
-
- dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, "");
- dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, "");
- }
-
- src_type = new_type;
-
- num_tmps *= 2;
- }
-
- assert(num_tmps == num_dsts);
-}
-
-
-/**
- * Non-interleaved pack.
- *
- * This will move values as
- *
- * lo = __ l0 __ l1 __ l2 __.. __ ln
- * hi = __ h0 __ h1 __ h2 __.. __ hn
- * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
- *
- * TODO: handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack2(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- LLVMValueRef lo,
- LLVMValueRef hi)
-{
- LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
- LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
- LLVMValueRef shuffle;
- LLVMValueRef res;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * 2 == dst_type.length);
-
- assert(!src_type.floating);
- assert(!dst_type.floating);
-
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(src_type.width * src_type.length == 128) {
- /* All X86 non-interleaved pack instructions all take signed inputs and
- * saturate them, so saturate beforehand. */
- if(!src_type.sign && !clamped) {
- struct lp_build_context bld;
- unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
- LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
- lp_build_context_init(&bld, builder, src_type);
- lo = lp_build_min(&bld, lo, dst_max);
- hi = lp_build_min(&bld, hi, dst_max);
- }
-
- switch(src_type.width) {
- case 32:
- if(dst_type.sign)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
- else
- /* PACKUSDW is the only instrinsic with a consistent signature */
- return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
- break;
-
- case 16:
- if(dst_type.sign)
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
- else
- res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
- break;
-
- default:
- assert(0);
- return LLVMGetUndef(dst_vec_type);
- break;
- }
-
- res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
- return res;
- }
-#endif
-
- lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
- hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
-
- shuffle = lp_build_const_pack_shuffle(dst_type.length);
-
- res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
-
- return res;
-}
-
-
-/**
- * Truncate the bit width.
- *
- * TODO: Handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack(LLVMBuilderRef builder,
- struct lp_type src_type,
- struct lp_type dst_type,
- boolean clamped,
- const LLVMValueRef *src, unsigned num_srcs)
-{
- LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
- unsigned i;
-
- /* Register width must remain constant */
- assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
- /* We must not loose or gain channels. Only precision */
- assert(src_type.length * num_srcs == dst_type.length);
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = src[i];
-
- while(src_type.width > dst_type.width) {
- struct lp_type new_type = src_type;
-
- new_type.width /= 2;
- new_type.length *= 2;
-
- /* Take in consideration the sign changes only in the last step */
- if(new_type.width == dst_type.width)
- new_type.sign = dst_type.sign;
-
- num_srcs /= 2;
-
- for(i = 0; i < num_srcs; ++i)
- tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped,
- tmp[2*i + 0], tmp[2*i + 1]);
-
- src_type = new_type;
- }
-
- assert(num_srcs == 1);
-
- return tmp[0];
-}
-
-
-/**
* Generic type conversion.
*
* TODO: Take a precision argument, or even better, add a new precision member
@@ -573,7 +338,7 @@ lp_build_conv(LLVMBuilderRef builder,
if(tmp_type.width < dst_type.width) {
assert(num_tmps == 1);
- lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
+ lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
tmp_type.width = dst_type.width;
tmp_type.length = dst_type.length;
num_tmps = num_dsts;
@@ -693,7 +458,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
}
else if(src_type.width < dst_type.width) {
assert(num_srcs == 1);
- lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts);
+ lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts);
}
else {
assert(num_srcs == num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index ca378804d2..948e68fae4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 59d8f492e6..39dfc51e50 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -77,10 +77,10 @@ lp_disassemble(const void* func)
while (ud_disassemble(&ud_obj)) {
#ifdef PIPE_ARCH_X86
- debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj));
+ debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
#endif
#ifdef PIPE_ARCH_X86_64
- debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj));
+ debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
#endif
#if 0
@@ -115,9 +115,16 @@ lp_disassemble(const void* func)
}
}
- if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret)
+ if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
+ ud_obj.mnemonic == UD_Iinvalid)
break;
}
+
+#if 0
+ /* Print GDB command, useful to verify udis86 output */
+ debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc);
+#endif
+
debug_printf("\n");
#else
(void)func;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 21c665c4d4..d438c0e63d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
type.floating = TRUE;
- assert(swizzle = 0);
+ assert(swizzle == 0);
assert(format_desc->channel[swizzle].size == format_desc->block.bits);
}
else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
@@ -210,7 +210,4 @@ lp_build_depth_test(LLVMBuilderRef builder,
dst = lp_build_select(&bld, z_bitmask, src, dst);
LLVMBuildStore(builder, dst, dst_ptr);
}
-
- /* FIXME */
- assert(!state->occlusion_count);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 6d3f692619..970bee379f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -25,8 +25,8 @@
*
**************************************************************************/
-#ifndef LP_BLD_H
-#define LP_BLD_H
+#ifndef LP_BLD_FORMAT_H
+#define LP_BLD_FORMAT_H
/**
@@ -42,62 +42,34 @@ struct util_format_description;
struct lp_type;
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef packed);
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc);
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-LLVMValueRef
-lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef rgba);
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled);
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr);
+lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef packed);
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba);
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed);
+
LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets);
+lp_build_pack_rgba_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ LLVMValueRef rgba);
void
@@ -108,12 +80,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
LLVMValueRef *rgba);
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba);
-
-#endif /* !LP_BLD_H */
+#endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index b9b5d84bed..5836e0173f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -25,18 +25,39 @@
*
**************************************************************************/
+/**
+ * @file
+ * AoS pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
#include "lp_bld_format.h"
+/**
+ * Unpack a single pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef packed)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef shifted, casted, scaled, masked;
LLVMValueRef shifts[4];
@@ -49,8 +70,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i;
- desc = util_format_description(format);
-
/* FIXME: Support more formats */
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
@@ -151,12 +170,130 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
}
+/**
+ * Take a vector with packed pixels and unpack into a rgba8 vector.
+ *
+ * Formats with bit depth smaller than 32bits are accepted, but they must be
+ * padded to 32bits.
+ */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+ const struct util_format_description *desc,
+ struct lp_type type,
+ LLVMValueRef packed)
+{
+ struct lp_build_context bld;
+ bool rgba8;
+ LLVMValueRef res;
+ unsigned i;
+
+ lp_build_context_init(&bld, builder, type);
+
+ /* FIXME: Support more formats */
+ assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(desc->block.width == 1);
+ assert(desc->block.height == 1);
+ assert(desc->block.bits <= 32);
+
+ assert(!type.floating);
+ assert(!type.fixed);
+ assert(type.norm);
+ assert(type.width == 8);
+ assert(type.length % 4 == 0);
+
+ rgba8 = TRUE;
+ for(i = 0; i < 4; ++i) {
+ assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
+ if(desc->channel[0].size != 8)
+ rgba8 = FALSE;
+ }
+
+ if(rgba8) {
+ /*
+ * The pixel is already in a rgba8 format variant. All it is necessary
+ * is to swizzle the channels.
+ */
+
+ unsigned char swizzles[4];
+ boolean zeros[4]; /* bitwise AND mask */
+ boolean ones[4]; /* bitwise OR mask */
+ boolean swizzles_needed = FALSE;
+ boolean zeros_needed = FALSE;
+ boolean ones_needed = FALSE;
+
+ for(i = 0; i < 4; ++i) {
+ enum util_format_swizzle swizzle = desc->swizzle[i];
+
+ /* Initialize with the no-op case */
+ swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
+ zeros[i] = TRUE;
+ ones[i] = FALSE;
+
+ switch (swizzle) {
+ case UTIL_FORMAT_SWIZZLE_X:
+ case UTIL_FORMAT_SWIZZLE_Y:
+ case UTIL_FORMAT_SWIZZLE_Z:
+ case UTIL_FORMAT_SWIZZLE_W:
+ if(swizzle != swizzles[i]) {
+ swizzles[i] = swizzle;
+ swizzles_needed = TRUE;
+ }
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ zeros[i] = FALSE;
+ zeros_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ ones[i] = TRUE;
+ ones_needed = TRUE;
+ break;
+ case UTIL_FORMAT_SWIZZLE_NONE:
+ assert(0);
+ break;
+ }
+ }
+
+ res = packed;
+
+ if(swizzles_needed)
+ res = lp_build_swizzle1_aos(&bld, res, swizzles);
+
+ if(zeros_needed) {
+ /* Mask out zero channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
+ res = LLVMBuildAnd(builder, res, mask, "");
+ }
+
+ if(ones_needed) {
+ /* Or one channels */
+ LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
+ res = LLVMBuildOr(builder, res, mask, "");
+ }
+ }
+ else {
+ /* FIXME */
+ assert(0);
+ res = lp_build_undef(type);
+ }
+
+ return res;
+}
+
+
+/**
+ * Pack a single pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
LLVMValueRef
lp_build_pack_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
+ const struct util_format_description *desc,
LLVMValueRef rgba)
{
- const struct util_format_description *desc;
LLVMTypeRef type;
LLVMValueRef packed = NULL;
LLVMValueRef swizzles[4];
@@ -167,8 +304,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
unsigned shift;
unsigned i, j;
- desc = util_format_description(format);
-
assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
assert(desc->block.width == 1);
assert(desc->block.height == 1);
@@ -247,57 +382,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
return packed;
}
-
-
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- /* FIXME: Support more formats */
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
- assert(desc->block.bits <= 32);
-
- type = LLVMIntType(desc->block.bits);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- packed = LLVMBuildLoad(builder, ptr, "");
-
- return lp_build_unpack_rgba_aos(builder, format, packed);
-}
-
-
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
- enum pipe_format format,
- LLVMValueRef ptr,
- LLVMValueRef rgba)
-{
- const struct util_format_description *desc;
- LLVMTypeRef type;
- LLVMValueRef packed;
-
- desc = util_format_description(format);
-
- assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(desc->block.width == 1);
- assert(desc->block.height == 1);
-
- type = LLVMIntType(desc->block.bits);
-
- packed = lp_build_pack_rgba_aos(builder, format, rgba);
-
- ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
- LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
new file mode 100644
index 0000000000..f3832d07ff
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Utility functions to make assertions about formats.
+ *
+ * This module centralizes most of logic used when determining what algorithm
+ * is most suitable (i.e., most efficient yet correct) for a given format.
+ *
+ * It might be possible to move some of these functions to u_format module,
+ * but since tiny differences in the format my render it more/less
+ * appropriate to a given algorithm it is impossible to make any long term
+ * guarantee about the semantics of these functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+/**
+ * Whether this format is a 4 rgba8 variant
+ */
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc)
+{
+ unsigned chan;
+
+ if(desc->block.width != 1 ||
+ desc->block.height != 1 ||
+ desc->block.bits != 32)
+ return FALSE;
+
+ for(chan = 0; chan < 4; ++chan) {
+ if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
+ desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+ return FALSE;
+ if(desc->channel[chan].size != 8)
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index b5ff434e1a..64151d169d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -34,66 +34,17 @@
#include "lp_bld_format.h"
-/**
- * Gather elements from scatter positions in memory into a single vector.
- *
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
- unsigned length,
- unsigned src_width,
- unsigned dst_width,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets)
-{
- LLVMTypeRef src_type = LLVMIntType(src_width);
- LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
- LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
- LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
- LLVMValueRef res;
- unsigned i;
-
- res = LLVMGetUndef(dst_vec_type);
- for(i = 0; i < length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- LLVMValueRef elem_offset;
- LLVMValueRef elem_ptr;
- LLVMValueRef elem;
-
- elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
- elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
- elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
- elem = LLVMBuildLoad(builder, elem_ptr, "");
-
- assert(src_width <= dst_width);
- if(src_width > dst_width)
- elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
- if(src_width < dst_width)
- elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
- res = LLVMBuildInsertElement(builder, res, elem, index, "");
- }
-
- return res;
-}
-
-
static LLVMValueRef
-lp_build_format_swizzle(struct lp_type type,
- const LLVMValueRef *inputs,
- enum util_format_swizzle swizzle)
+lp_build_format_swizzle_chan_soa(struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ enum util_format_swizzle swizzle)
{
switch (swizzle) {
case UTIL_FORMAT_SWIZZLE_X:
case UTIL_FORMAT_SWIZZLE_Y:
case UTIL_FORMAT_SWIZZLE_Z:
case UTIL_FORMAT_SWIZZLE_W:
- return inputs[swizzle];
+ return unswizzled[swizzle];
case UTIL_FORMAT_SWIZZLE_0:
return lp_build_zero(type);
case UTIL_FORMAT_SWIZZLE_1:
@@ -108,6 +59,28 @@ lp_build_format_swizzle(struct lp_type type,
void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+ struct lp_type type,
+ const LLVMValueRef *unswizzled,
+ LLVMValueRef *swizzled)
+{
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[0];
+ LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ swizzled[2] = swizzled[1] = swizzled[0] = depth;
+ swizzled[3] = lp_build_one(type);
+ }
+ else {
+ unsigned chan;
+ for (chan = 0; chan < 4; ++chan) {
+ enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+ swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ }
+ }
+}
+
+
+void
lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
struct lp_type type,
@@ -119,7 +92,9 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
unsigned chan;
/* FIXME: Support more formats */
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+ assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
+ (format_desc->layout == UTIL_FORMAT_LAYOUT_ARRAY &&
+ format_desc->block.bits == format_desc->channel[0].size));
assert(format_desc->block.width == 1);
assert(format_desc->block.height == 1);
assert(format_desc->block.bits <= 32);
@@ -170,39 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
start = stop;
}
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- enum util_format_swizzle swizzle = format_desc->swizzle[0];
- LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
- rgba[2] = rgba[1] = rgba[0] = depth;
- rgba[3] = lp_build_one(type);
- }
- else {
- for (chan = 0; chan < 4; ++chan) {
- enum util_format_swizzle swizzle = format_desc->swizzle[chan];
- rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
- }
- }
-}
-
-
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
- const struct util_format_description *format_desc,
- struct lp_type type,
- LLVMValueRef base_ptr,
- LLVMValueRef offsets,
- LLVMValueRef *rgba)
-{
- LLVMValueRef packed;
-
- assert(format_desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
- assert(format_desc->block.width == 1);
- assert(format_desc->block.height == 1);
- assert(format_desc->block.bits <= 32);
-
- packed = lp_build_gather(builder,
- type.length, format_desc->block.bits, type.width,
- base_ptr, offsets);
-
- lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+ lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca6d1..49dab8ab61 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -109,32 +109,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
/**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
- LLVMValueRef coeff,
- int step)
-{
- LLVMValueRef factor;
-
- switch(step) {
- case 0:
- return bld->base.zero;
- case 1:
- return coeff;
- case 2:
- return lp_build_add(&bld->base, coeff, coeff);
- default:
- factor = lp_build_const_scalar(bld->base.type, (double)step);
- return lp_build_mul(&bld->base, coeff, factor);
- }
-}
-
-
-/**
* Multiply the dadx and dady with the xstep and ystep respectively.
*/
static void
@@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
if (mode != TGSI_INTERPOLATE_CONSTANT) {
for(chan = 0; chan < NUM_CHANNELS; ++chan) {
if(mask & (1 << chan)) {
- bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
- bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+ bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+ bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
}
}
}
@@ -329,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
unsigned first, last, mask;
unsigned attrib;
- first = decl->DeclarationRange.First;
- last = decl->DeclarationRange.Last;
+ first = decl->Range.First;
+ last = decl->Range.Last;
mask = decl->Declaration.UsageMask;
for( attrib = first; attrib <= last; ++attrib ) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.c b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
index 6b6f820769..db22a8028a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.c
@@ -33,6 +33,8 @@
*/
+#include "util/u_cpu_detect.h"
+
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
@@ -65,7 +67,7 @@ lp_build_cmp(struct lp_build_context *bld,
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
- if(type.floating) {
+ if(type.floating && util_cpu_caps.has_sse) {
LLVMValueRef args[3];
unsigned cc;
boolean swap;
@@ -114,7 +116,7 @@ lp_build_cmp(struct lp_build_context *bld,
res = LLVMBuildBitCast(bld->builder, res, int_vec_type, "");
return res;
}
- else {
+ else if(util_cpu_caps.has_sse2) {
static const struct {
unsigned swap:1;
unsigned eq:1;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index a4ee7723b5..d67500ef70 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
#include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
new file mode 100644
index 0000000000..d3f78c06d9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -0,0 +1,61 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_config.h"
+
+#include "lp_bld_misc.h"
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+namespace llvm {
+ extern void LinkInJIT();
+}
+
+
+void
+LLVMLinkInJIT(void)
+{
+ llvm::LinkInJIT();
+}
+
+
+extern "C" int X86TargetMachineModule;
+
+
+int
+LLVMInitializeNativeTarget(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ X86TargetMachineModule = 1;
+#endif
+ return 0;
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
new file mode 100644
index 0000000000..0e787e0b9c
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_MISC_H
+#define LP_BLD_MISC_H
+
+
+#include "llvm/Config/config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+void
+LLVMLinkInJIT(void);
+
+int
+LLVMInitializeNativeTarget(void);
+
+#endif /* !LLVM_NATIVE_ARCH */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* !LP_BLD_MISC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
new file mode 100644
index 0000000000..bc360ad77a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -0,0 +1,418 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for packing/unpacking.
+ *
+ * Pack/unpacking is necessary for conversion between types of different
+ * bit width.
+ *
+ * They are also commonly used when an computation needs higher
+ * precision for the intermediate values. For example, if one needs the
+ * function:
+ *
+ * c = compute(a, b);
+ *
+ * to use more precision for intermediate results then one should implement it
+ * as:
+ *
+ * LLVMValueRef
+ * compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b)
+ * {
+ * struct lp_type wide_type = lp_wider_type(type);
+ * LLVMValueRef al, ah, bl, bh, cl, ch, c;
+ *
+ * lp_build_unpack2(builder, type, wide_type, a, &al, &ah);
+ * lp_build_unpack2(builder, type, wide_type, b, &bl, &bh);
+ *
+ * cl = compute_half(al, bl);
+ * ch = compute_half(ah, bh);
+ *
+ * c = lp_build_pack2(bld->builder, wide_type, type, cl, ch);
+ *
+ * return c;
+ * }
+ *
+ * where compute_half() would do the computation for half the elements with
+ * twice the precision.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
+
+
+/**
+ * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i, j;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+ assert(lo_hi < 2);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
+ elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
+ elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
+ }
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Build shuffle vectors that match PACKxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_pack_shuffle(unsigned n)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ assert(n <= LP_MAX_VECTOR_LENGTH);
+
+ /* TODO: cache results in a static table */
+
+ for(i = 0; i < n; ++i)
+ elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
+
+ return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Interleave vector elements.
+ *
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ */
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle;
+
+ shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi);
+
+ return LLVMBuildShuffleVector(builder, a, b, shuffle, "");
+}
+
+
+/**
+ * Double the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi)
+{
+ LLVMValueRef msb;
+ LLVMTypeRef dst_vec_type;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(dst_type.width == src_type.width * 2);
+ assert(dst_type.length * 2 == src_type.length);
+
+ if(dst_type.sign && src_type.sign) {
+ /* Replicate the sign bit in the most significant bits */
+ msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
+ }
+ else
+ /* Most significant bits always zero */
+ msb = lp_build_zero(src_type);
+
+ /* Interleave bits */
+ if(util_cpu_caps.little_endian) {
+ *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
+ }
+ else {
+ *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
+ *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
+ }
+
+ /* Cast the result into the new type (twice as wide) */
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
+ *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
+}
+
+
+/**
+ * Expand the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ unsigned num_tmps;
+ unsigned i;
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length == dst_type.length * num_dsts);
+
+ num_tmps = 1;
+ dst[0] = src;
+
+ while(src_type.width < dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width *= 2;
+ tmp_type.length /= 2;
+
+ for(i = num_tmps; i--; ) {
+ lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]);
+ }
+
+ src_type = tmp_type;
+
+ num_tmps *= 2;
+ }
+
+ assert(num_tmps == num_dsts);
+}
+
+
+/**
+ * Non-interleaved pack.
+ *
+ * This will move values as
+ *
+ * lo = __ l0 __ l1 __ l2 __.. __ ln
+ * hi = __ h0 __ h1 __ h2 __.. __ hn
+ * res = l0 l1 l2 .. ln h0 h1 h2 .. hn
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ *
+ * It is assumed the values are already clamped into the destination type range.
+ * Values outside that range will produce undefined results. Use
+ * lp_build_packs2 instead.
+ */
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+ LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
+ LLVMValueRef shuffle;
+ LLVMValueRef res;
+
+ dst_vec_type = lp_build_vec_type(dst_type);
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
+ switch(src_type.width) {
+ case 32:
+ if(dst_type.sign) {
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+ }
+ else {
+ if (util_cpu_caps.has_sse4_1) {
+ /* PACKUSDW is the only instrinsic with a consistent signature */
+ return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
+ }
+ else {
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ }
+ }
+ break;
+
+ case 16:
+ if(dst_type.sign)
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+ else
+ res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+ break;
+
+ default:
+ assert(0);
+ return LLVMGetUndef(dst_vec_type);
+ break;
+ }
+
+ res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+ return res;
+ }
+
+ lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
+ hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
+
+ shuffle = lp_build_const_pack_shuffle(dst_type.length);
+
+ res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
+
+ return res;
+}
+
+
+
+/**
+ * Non-interleaved pack and saturate.
+ *
+ * Same as lp_build_pack2 but will saturate values so that they fit into the
+ * destination type.
+ */
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi)
+{
+ boolean clamp;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(src_type.sign == dst_type.sign);
+ assert(src_type.width == dst_type.width * 2);
+ assert(src_type.length * 2 == dst_type.length);
+
+ clamp = TRUE;
+
+ /* All X86 SSE non-interleaved pack instructions take signed inputs and
+ * saturate them, so no need to clamp for those cases. */
+ if(util_cpu_caps.has_sse2 &&
+ src_type.width * src_type.length == 128 &&
+ src_type.sign)
+ clamp = FALSE;
+
+ if(clamp) {
+ struct lp_build_context bld;
+ unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
+ LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
+ lp_build_context_init(&bld, builder, src_type);
+ lo = lp_build_min(&bld, lo, dst_max);
+ hi = lp_build_min(&bld, hi, dst_max);
+ /* FIXME: What about lower bound? */
+ }
+
+ return lp_build_pack2(builder, src_type, dst_type, lo, hi);
+}
+
+
+/**
+ * Truncate the bit width.
+ *
+ * TODO: Handle saturation consistently.
+ */
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs)
+{
+ LLVMValueRef (*pack2)(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+
+ /* Register width must remain constant */
+ assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length);
+
+ if(clamped)
+ pack2 = &lp_build_pack2;
+ else
+ pack2 = &lp_build_packs2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = src[i];
+
+ while(src_type.width > dst_type.width) {
+ struct lp_type tmp_type = src_type;
+
+ tmp_type.width /= 2;
+ tmp_type.length *= 2;
+
+ /* Take in consideration the sign changes only in the last step */
+ if(tmp_type.width == dst_type.width)
+ tmp_type.sign = dst_type.sign;
+
+ num_srcs /= 2;
+
+ for(i = 0; i < num_srcs; ++i)
+ tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]);
+
+ src_type = tmp_type;
+ }
+
+ assert(num_srcs == 1);
+
+ return tmp[0];
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
new file mode 100644
index 0000000000..fb2a34984a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for packing/unpacking conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_PACK_H
+#define LP_BLD_PACK_H
+
+
+#include <llvm-c/Core.h>
+
+
+struct lp_type;
+
+
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+ struct lp_type type,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ unsigned lo_hi);
+
+
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi);
+
+
+void
+lp_build_unpack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst, unsigned num_dsts);
+
+
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef lo,
+ LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ boolean clamped,
+ const LLVMValueRef *src, unsigned num_srcs);
+
+
+#endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
new file mode 100644
index 0000000000..af70ddc6ab
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- common code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+ const struct pipe_texture *texture,
+ const struct pipe_sampler_state *sampler)
+{
+ memset(state, 0, sizeof *state);
+
+ if(!texture)
+ return;
+
+ if(!sampler)
+ return;
+
+ state->format = texture->format;
+ state->target = texture->target;
+ state->pot_width = util_is_pot(texture->width0);
+ state->pot_height = util_is_pot(texture->height0);
+ state->pot_depth = util_is_pot(texture->depth0);
+
+ state->wrap_s = sampler->wrap_s;
+ state->wrap_t = sampler->wrap_t;
+ state->wrap_r = sampler->wrap_r;
+ state->min_img_filter = sampler->min_img_filter;
+ state->min_mip_filter = sampler->min_mip_filter;
+ state->mag_img_filter = sampler->mag_img_filter;
+ if(sampler->compare_mode) {
+ state->compare_mode = sampler->compare_mode;
+ state->compare_func = sampler->compare_func;
+ }
+ state->normalized_coords = sampler->normalized_coords;
+ state->prefilter = sampler->prefilter;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets)
+{
+ LLVMTypeRef src_type = LLVMIntType(src_width);
+ LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+ LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+ LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(dst_vec_type);
+ for(i = 0; i < length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ LLVMValueRef elem_offset;
+ LLVMValueRef elem_ptr;
+ LLVMValueRef elem;
+
+ elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+ elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+ elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+ elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+ assert(src_width <= dst_width);
+ if(src_width > dst_width)
+ elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+ if(src_width < dst_width)
+ elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+ res = LLVMBuildInsertElement(builder, res, elem, index, "");
+ }
+
+ return res;
+}
+
+
+/**
+ * Compute the offset of a pixel.
+ *
+ * x, y, y_stride are vectors
+ */
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef x_stride;
+ LLVMValueRef offset;
+
+ x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
+
+ if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ LLVMValueRef x_lo, x_hi;
+ LLVMValueRef y_lo, y_hi;
+ LLVMValueRef x_stride_lo, x_stride_hi;
+ LLVMValueRef y_stride_lo, y_stride_hi;
+ LLVMValueRef x_offset_lo, x_offset_hi;
+ LLVMValueRef y_offset_lo, y_offset_hi;
+ LLVMValueRef offset_lo, offset_hi;
+
+ x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
+ y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
+
+ x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
+ y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
+
+ x_stride_lo = x_stride;
+ y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+
+ x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
+ y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
+
+ x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
+ y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
+ offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
+
+ x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
+ y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
+ offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
+
+ offset = lp_build_add(bld, offset_hi, offset_lo);
+ }
+ else {
+ LLVMValueRef x_offset;
+ LLVMValueRef y_offset;
+
+ x_offset = lp_build_mul(bld, x, x_stride);
+ y_offset = lp_build_mul(bld, y, y_stride);
+
+ offset = lp_build_add(bld, x_offset, y_offset);
+ }
+
+ return offset;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 403d0e4836..8cb8210ca7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,9 @@
struct pipe_texture;
struct pipe_sampler_state;
+struct util_format_description;
struct lp_type;
+struct lp_build_context;
/**
@@ -119,6 +121,24 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+ unsigned length,
+ unsigned src_width,
+ unsigned dst_width,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offsets);
+
+
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+ const struct util_format_description *format_desc,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr);
+
+
void
lp_build_sample_soa(LLVMBuilderRef builder,
const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 8ca1be6f1b..47b68b71e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
/**
* @file
- * Texture sampling.
+ * Texture sampling -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
*/
@@ -35,54 +35,23 @@
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_debug.h"
+#include "util/u_debug_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
+#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_arit.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
- const struct pipe_texture *texture,
- const struct pipe_sampler_state *sampler)
-{
- memset(state, 0, sizeof *state);
-
- if(!texture)
- return;
-
- if(!sampler)
- return;
-
- state->format = texture->format;
- state->target = texture->target;
- state->pot_width = util_is_pot(texture->width[0]);
- state->pot_height = util_is_pot(texture->height[0]);
- state->pot_depth = util_is_pot(texture->depth[0]);
-
- state->wrap_s = sampler->wrap_s;
- state->wrap_t = sampler->wrap_t;
- state->wrap_r = sampler->wrap_r;
- state->min_img_filter = sampler->min_img_filter;
- state->min_mip_filter = sampler->min_mip_filter;
- state->mag_img_filter = sampler->mag_img_filter;
- if(sampler->compare_mode) {
- state->compare_mode = sampler->compare_mode;
- state->compare_func = sampler->compare_func;
- }
- state->normalized_coords = sampler->normalized_coords;
- state->prefilter = sampler->prefilter;
-}
-
-
-
/**
* Keep all information for sampling code generation in a single place.
*/
@@ -111,66 +80,61 @@ struct lp_build_sample_context
static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
{
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef x_stride;
LLVMValueRef offset;
+ LLVMValueRef packed;
+
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
+
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+ packed = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
+
+ lp_build_unpack_rgba_soa(bld->builder,
+ bld->format_desc,
+ bld->texel_type,
+ packed, texel);
+}
- x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
- if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- LLVMValueRef x_lo, x_hi;
- LLVMValueRef y_lo, y_hi;
- LLVMValueRef x_stride_lo, x_stride_hi;
- LLVMValueRef y_stride_lo, y_stride_hi;
- LLVMValueRef x_offset_lo, x_offset_hi;
- LLVMValueRef y_offset_lo, y_offset_hi;
- LLVMValueRef offset_lo, offset_hi;
-
- x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
- y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
- x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
- y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
- x_stride_lo = x_stride;
- y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
- x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
- y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
- x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
- y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
- offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
- x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
- y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
- offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
- offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
- }
- else {
- LLVMValueRef x_offset;
- LLVMValueRef y_offset;
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+ LLVMValueRef x,
+ LLVMValueRef y,
+ LLVMValueRef y_stride,
+ LLVMValueRef data_ptr)
+{
+ LLVMValueRef offset;
- x_offset = lp_build_mul(int_coord_bld, x, x_stride);
- y_offset = lp_build_mul(int_coord_bld, y, y_stride);
+ offset = lp_build_sample_offset(&bld->int_coord_bld,
+ bld->format_desc,
+ x, y, y_stride,
+ data_ptr);
- offset = lp_build_add(int_coord_bld, x_offset, y_offset);
- }
+ assert(bld->format_desc->block.width == 1);
+ assert(bld->format_desc->block.height == 1);
+ assert(bld->format_desc->block.bits <= bld->texel_type.width);
- lp_build_load_rgba_soa(bld->builder,
- bld->format_desc,
- bld->texel_type,
- data_ptr,
- offset,
- texel);
+ return lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset);
}
@@ -208,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
/* FIXME */
- _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+ _debug_printf("warning: failed to translate texture wrap mode %s\n",
+ debug_dump_tex_wrap(wrap_mode, TRUE));
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
break;
@@ -240,7 +205,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
x = lp_build_sample_wrap(bld, x, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+ lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
}
@@ -274,8 +239,8 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
s_fpart = lp_build_sub(&bld->coord_bld, s, s_ipart);
t_fpart = lp_build_sub(&bld->coord_bld, t, t_ipart);
- x0 = lp_build_int(&bld->coord_bld, s_ipart);
- y0 = lp_build_int(&bld->coord_bld, t_ipart);
+ x0 = lp_build_itrunc(&bld->coord_bld, s_ipart);
+ y0 = lp_build_itrunc(&bld->coord_bld, t_ipart);
x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
@@ -286,10 +251,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
- lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
- lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
- lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
- lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+ lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+ lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+ lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+ lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
/* TODO: Don't interpolate missing channels */
for(chan = 0; chan < 4; ++chan) {
@@ -304,6 +269,217 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+ unsigned chan;
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if(start)
+ input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+ if(stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef width,
+ LLVMValueRef height,
+ LLVMValueRef stride,
+ LLVMValueRef data_ptr,
+ LLVMValueRef *texel)
+{
+ LLVMBuilderRef builder = bld->builder;
+ struct lp_build_context i32, h16, u8n;
+ LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+ LLVMValueRef i32_c8, i32_c128, i32_c255;
+ LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+ LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+ LLVMValueRef x0, x1;
+ LLVMValueRef y0, y1;
+ LLVMValueRef neighbors[2][2];
+ LLVMValueRef neighbors_lo[2][2];
+ LLVMValueRef neighbors_hi[2][2];
+ LLVMValueRef packed, packed_lo, packed_hi;
+ LLVMValueRef unswizzled[4];
+
+ lp_build_context_init(&i32, builder, lp_type_int(32));
+ lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+ lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+ i32_vec_type = lp_build_vec_type(i32.type);
+ h16_vec_type = lp_build_vec_type(h16.type);
+ u8n_vec_type = lp_build_vec_type(u8n.type);
+
+ s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+ t = lp_build_mul_imm(&bld->coord_bld, t, 256);
+
+ s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+ t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+ i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+ s = LLVMBuildAdd(builder, s, i32_c128, "");
+ t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+ i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+ s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+ t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+ i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+ s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+ t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+ x0 = s_ipart;
+ y0 = t_ipart;
+
+ x0 = lp_build_sample_wrap(bld, x0, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+ y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+ x1 = lp_build_sample_wrap(bld, x1, width, bld->static_state->pot_width, bld->static_state->wrap_s);
+ y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+ /*
+ * Transform 4 x i32 in
+ *
+ * s_fpart = {s0, s1, s2, s3}
+ *
+ * into 8 x i16
+ *
+ * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+ *
+ * into two 8 x i16
+ *
+ * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+ * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+ *
+ * and likewise for t_fpart. There is no risk of loosing precision here
+ * since the fractional parts only use the lower 8bits.
+ */
+
+ s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+ t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+ {
+ LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffle_lo;
+ LLVMValueRef shuffle_hi;
+ unsigned i, j;
+
+ for(j = 0; j < h16.type.length; j += 4) {
+ unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+ LLVMValueRef index;
+
+ index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_lo[j + i] = index;
+
+ index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+ for(i = 0; i < 4; ++i)
+ shuffles_hi[j + i] = index;
+ }
+
+ shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+ shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+ s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+ t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+ s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+ t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+ }
+
+ /*
+ * Fetch the pixels as 4 x 32bit (rgba order might differ):
+ *
+ * rgba0 rgba1 rgba2 rgba3
+ *
+ * bit cast them into 16 x u8
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * unpack them into two 8 x i16:
+ *
+ * r0 g0 b0 a0 r1 g1 b1 a1
+ * r2 g2 b2 a2 r3 g3 b3 a3
+ *
+ * The higher 8 bits of the resulting elements will be zero.
+ */
+
+ neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+ neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+ neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+ neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+ neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+ neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+ neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+ neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+ lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+ /*
+ * Linear interpolate with 8.8 fixed point.
+ */
+
+ packed_lo = lp_build_lerp_2d(&h16,
+ s_fpart_lo, t_fpart_lo,
+ neighbors_lo[0][0],
+ neighbors_lo[0][1],
+ neighbors_lo[1][0],
+ neighbors_lo[1][1]);
+
+ packed_hi = lp_build_lerp_2d(&h16,
+ s_fpart_hi, t_fpart_hi,
+ neighbors_hi[0][0],
+ neighbors_hi[0][1],
+ neighbors_hi[1][0],
+ neighbors_hi[1][1]);
+
+ packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+ /*
+ * Convert to SoA and swizzle.
+ */
+
+ packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+ lp_build_rgba8_to_f32_soa(bld->builder,
+ bld->texel_type,
+ packed, unswizzled);
+
+ lp_build_format_swizzle_soa(bld->format_desc,
+ bld->texel_type, unswizzled,
+ texel);
+}
+
+
+static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
LLVMValueRef *texel)
@@ -402,7 +578,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
break;
case PIPE_TEX_FILTER_LINEAR:
case PIPE_TEX_FILTER_ANISO:
- lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+ if(lp_format_is_rgba8(bld.format_desc))
+ lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+ else
+ lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index 1f6da80448..b9472127a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
#include <llvm-c/Core.h>
-struct lp_type type;
+struct lp_type;
struct lp_build_context;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index adc81569ed..3eb0e0c57c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -64,7 +64,7 @@
for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+ ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -157,19 +157,19 @@ emit_fetch(
unsigned index,
const unsigned chan_index )
{
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ const struct tgsi_full_src_register *reg = &inst->Src[index];
+ unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
LLVMValueRef res;
switch (swizzle) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
+ case TGSI_SWIZZLE_X:
+ case TGSI_SWIZZLE_Y:
+ case TGSI_SWIZZLE_Z:
+ case TGSI_SWIZZLE_W:
- switch (reg->SrcRegister.File) {
+ switch (reg->Register.File) {
case TGSI_FILE_CONSTANT: {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
res = lp_build_broadcast_scalar(&bld->base, scalar);
@@ -177,17 +177,17 @@ emit_fetch(
}
case TGSI_FILE_IMMEDIATE:
- res = bld->immediates[reg->SrcRegister.Index][swizzle];
+ res = bld->immediates[reg->Register.Index][swizzle];
assert(res);
break;
case TGSI_FILE_INPUT:
- res = bld->inputs[reg->SrcRegister.Index][swizzle];
+ res = bld->inputs[reg->Register.Index][swizzle];
assert(res);
break;
case TGSI_FILE_TEMPORARY:
- res = bld->temps[reg->SrcRegister.Index][swizzle];
+ res = bld->temps[reg->Register.Index][swizzle];
if(!res)
return bld->base.undef;
break;
@@ -198,14 +198,6 @@ emit_fetch(
}
break;
- case TGSI_EXTSWIZZLE_ZERO:
- res = bld->base.zero;
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- res = bld->base.one;
- break;
-
default:
assert( 0 );
return bld->base.undef;
@@ -275,7 +267,7 @@ emit_store(
unsigned chan_index,
LLVMValueRef value)
{
- const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
+ const struct tgsi_full_dst_register *reg = &inst->Dst[index];
switch( inst->Instruction.Saturate ) {
case TGSI_SAT_NONE:
@@ -295,13 +287,13 @@ emit_store(
assert(0);
}
- switch( reg->DstRegister.File ) {
+ switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
- bld->outputs[reg->DstRegister.Index][chan_index] = value;
+ bld->outputs[reg->Register.Index][chan_index] = value;
break;
case TGSI_FILE_TEMPORARY:
- bld->temps[reg->DstRegister.Index][chan_index] = value;
+ bld->temps[reg->Register.Index][chan_index] = value;
break;
case TGSI_FILE_ADDRESS:
@@ -327,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
boolean projected,
LLVMValueRef *texel)
{
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ const uint unit = inst->Src[1].Register.Index;
LLVMValueRef lodbias;
LLVMValueRef oow;
LLVMValueRef coords[3];
unsigned num_coords;
unsigned i;
- switch (inst->InstructionExtTexture.Texture) {
+ switch (inst->Texture.Texture) {
case TGSI_TEXTURE_1D:
num_coords = 1;
break;
@@ -383,7 +375,7 @@ emit_kil(
struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst )
{
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
LLVMValueRef terms[NUM_CHANNELS];
LLVMValueRef mask;
unsigned chan_index;
@@ -394,12 +386,7 @@ emit_kil(
unsigned swizzle;
/* Unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
- /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
- * not to be tested. */
- if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
- continue;
+ swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
/* Check if the component has not been already tested. */
assert(swizzle < NUM_CHANNELS);
@@ -436,15 +423,15 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
{
uint i;
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
- if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
- reg->SrcRegister.Indirect)
+ const struct tgsi_full_src_register *reg = &inst->Src[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
return TRUE;
}
for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
- const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
- if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
- reg->DstRegister.Indirect)
+ const struct tgsi_full_dst_register *reg = &inst->Dst[i];
+ if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+ reg->Register.Indirect)
return TRUE;
}
return FALSE;
@@ -488,7 +475,6 @@ emit_instruction(
#endif
case TGSI_OPCODE_MOV:
- case TGSI_OPCODE_SWZ:
FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
}
@@ -574,9 +560,9 @@ emit_instruction(
if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
- LLVMValueRef *p_floor_log2;
- LLVMValueRef *p_exp;
- LLVMValueRef *p_log2;
+ LLVMValueRef *p_floor_log2 = NULL;
+ LLVMValueRef *p_exp = NULL;
+ LLVMValueRef *p_log2 = NULL;
src0 = emit_fetch( bld, inst, 0, CHAN_X );
src0 = lp_build_abs( &bld->base, src0 );
@@ -1386,15 +1372,6 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_NOISE1:
- case TGSI_OPCODE_NOISE2:
- case TGSI_OPCODE_NOISE3:
- case TGSI_OPCODE_NOISE4:
- FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- dst0[chan_index] = bld->base.zero;
- }
- break;
-
case TGSI_OPCODE_NOP:
break;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 606243d6c5..1320a26721 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -160,12 +160,31 @@ lp_build_int_vec_type(struct lp_type type)
struct lp_type
lp_int_type(struct lp_type type)
{
- struct lp_type int_type;
+ struct lp_type res_type;
- memset(&int_type, 0, sizeof int_type);
- int_type.width = type.width;
- int_type.length = type.length;
- return int_type;
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = type.width;
+ res_type.length = type.length;
+
+ return res_type;
+}
+
+
+/**
+ * Return the type with twice the bit width (hence half the number of elements).
+ */
+struct lp_type
+lp_wider_type(struct lp_type type)
+{
+ struct lp_type res_type;
+
+ memcpy(&res_type, &type, sizeof res_type);
+ res_type.width *= 2;
+ res_type.length /= 2;
+
+ assert(res_type.length);
+
+ return res_type;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index ee5ca3483c..2fb233d335 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -43,13 +43,18 @@
/**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
+/**
* Several functions can only cope with vectors of length up to this value.
* You may need to increase that value if you want to represent bigger vectors.
*/
#define LP_MAX_VECTOR_LENGTH 16
-#define LP_MAX_TYPE_WIDTH 64
-
/**
* The LLVM type system can't conveniently express all the things we care about
@@ -134,6 +139,91 @@ struct lp_build_context
};
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.norm = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
LLVMTypeRef
lp_build_elem_type(struct lp_type type);
@@ -166,6 +256,10 @@ struct lp_type
lp_int_type(struct lp_type type);
+struct lp_type
+lp_wider_type(struct lp_type type);
+
+
void
lp_build_context_init(struct lp_build_context *bld,
LLVMBuilderRef builder,
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index bdcff94b9b..08d9f2e273 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -50,6 +50,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
double depth, unsigned stencil)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ union util_color uc;
unsigned cv;
uint i;
@@ -64,8 +65,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i];
- util_pack_color(rgba, ps->format, &cv);
- lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
+ util_pack_color(rgba, ps->format, &uc);
+ lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, uc.ui);
}
llvmpipe->dirty_render_cache = TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index a4b2bd8c2a..679e244274 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -31,13 +31,13 @@
*/
#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
#include "pipe/p_defines.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "lp_clear.h"
#include "lp_context.h"
#include "lp_flush.h"
-#include "lp_prim_setup.h"
#include "lp_prim_vbuf.h"
#include "lp_state.h"
#include "lp_surface.h"
@@ -107,11 +107,21 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
if (llvmpipe->draw)
draw_destroy( llvmpipe->draw );
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
+ pipe_surface_reference(&llvmpipe->framebuffer.cbufs[i], NULL);
+ }
+ pipe_surface_reference(&llvmpipe->framebuffer.zsbuf, NULL);
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
+ pipe_texture_reference(&llvmpipe->texture[i], NULL);
+ }
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]);
+ pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL);
+ }
for (i = 0; i < Elements(llvmpipe->constants); i++) {
if (llvmpipe->constants[i].buffer) {
@@ -140,6 +150,11 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
llvmpipe->framebuffer.zsbuf->texture == texture)
return PIPE_REFERENCED_FOR_WRITE;
}
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ if (llvmpipe->vertex_tex_cache[i] &&
+ llvmpipe->vertex_tex_cache[i]->texture == texture)
+ return PIPE_REFERENCED_FOR_READ;
+ }
return PIPE_UNREFERENCED;
}
@@ -175,7 +190,8 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
- llvmpipe->pipe.bind_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.bind_fragment_sampler_states = llvmpipe_bind_sampler_states;
+ llvmpipe->pipe.bind_vertex_sampler_states = llvmpipe_bind_vertex_sampler_states;
llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
@@ -200,7 +216,8 @@ llvmpipe_create( struct pipe_screen *screen )
llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
- llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures;
+ llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
+ llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures;
llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
@@ -229,13 +246,15 @@ llvmpipe_create( struct pipe_screen *screen )
for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+ llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen);
/* vertex shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX;
- llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i];
+ llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i];
llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i];
}
@@ -255,25 +274,25 @@ llvmpipe_create( struct pipe_screen *screen )
goto fail;
draw_texture_samplers(llvmpipe->draw,
- PIPE_MAX_SAMPLERS,
+ PIPE_MAX_VERTEX_SAMPLERS,
(struct tgsi_sampler **)
llvmpipe->tgsi.vert_samplers_list);
- llvmpipe->setup = lp_draw_render_stage(llvmpipe);
- if (!llvmpipe->setup)
- goto fail;
-
if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
llvmpipe->no_rast = TRUE;
- if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) {
- /* Deprecated path -- vbuf is the intended interface to the draw module:
- */
- draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup);
- }
- else {
- lp_init_vbuf(llvmpipe);
- }
+ llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe);
+ if (!llvmpipe->vbuf_backend)
+ goto fail;
+
+ llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend);
+ if (!llvmpipe->vbuf)
+ goto fail;
+
+ draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf);
+ draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend);
+
+
/* plug in AA line/point stages */
draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 8d5a0d4f1f..cc4d5ad5fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -55,6 +55,7 @@ struct llvmpipe_context {
/** Constant state objects */
const struct pipe_blend_state *blend;
const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
const struct pipe_depth_stencil_alpha_state *depth_stencil;
const struct pipe_rasterizer_state *rasterizer;
struct lp_fragment_shader *fs;
@@ -68,12 +69,15 @@ struct llvmpipe_context {
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
unsigned num_samplers;
unsigned num_textures;
+ unsigned num_vertex_samplers;
+ unsigned num_vertex_textures;
unsigned num_vertex_elements;
unsigned num_vertex_buffers;
@@ -88,9 +92,6 @@ struct llvmpipe_context {
/** Mapped vertex buffers */
ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
- /** Mapped constant buffers */
- void *mapped_constants[PIPE_SHADER_TYPES];
-
/** Vertex format */
struct vertex_info vertex_info;
struct vertex_info vertex_info_vbuf;
@@ -124,9 +125,10 @@ struct llvmpipe_context {
/** The primitive drawing context */
struct draw_context *draw;
- struct draw_stage *setup;
+
+ /** Draw module backend */
+ struct vbuf_render *vbuf_backend;
struct draw_stage *vbuf;
- struct llvmpipe_vbuf_render *vbuf_render;
boolean dirty_render_cache;
@@ -138,6 +140,7 @@ struct llvmpipe_context {
unsigned tex_timestamp;
struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+ struct llvmpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
unsigned no_rast : 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 89772e62d3..0aa13a1fc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,54 +45,6 @@
-static void
-llvmpipe_map_constant_buffers(struct llvmpipe_context *lp)
-{
- struct pipe_screen *screen = lp->pipe.screen;
- uint i, size;
-
- for (i = 0; i < PIPE_SHADER_TYPES; i++) {
- if (lp->constants[i].buffer && lp->constants[i].buffer->size)
- lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer,
- PIPE_BUFFER_USAGE_CPU_READ);
- }
-
- if (lp->constants[PIPE_SHADER_VERTEX].buffer)
- size = lp->constants[PIPE_SHADER_VERTEX].buffer->size;
- else
- size = 0;
-
- lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT];
-
- draw_set_mapped_constant_buffer(lp->draw,
- lp->mapped_constants[PIPE_SHADER_VERTEX],
- size);
-}
-
-
-static void
-llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp)
-{
- struct pipe_screen *screen = lp->pipe.screen;
- uint i;
-
- /* really need to flush all prims since the vert/frag shaders const buffers
- * are going away now.
- */
- draw_flush(lp->draw);
-
- draw_set_mapped_constant_buffer(lp->draw, NULL, 0);
-
- lp->jit_context.constants = NULL;
-
- for (i = 0; i < 2; i++) {
- if (lp->constants[i].buffer && lp->constants[i].buffer->size)
- screen->buffer_unmap(screen, lp->constants[i].buffer);
- lp->mapped_constants[i] = NULL;
- }
-}
-
-
boolean
llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
unsigned start, unsigned count)
@@ -124,7 +76,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
llvmpipe_update_derived( lp );
llvmpipe_map_transfers(lp);
- llvmpipe_map_constant_buffers(lp);
/*
* Map vertex buffers
@@ -163,7 +114,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
/* Note: leave drawing surfaces mapped */
- llvmpipe_unmap_constant_buffers(lp);
lp->dirty_render_cache = TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index b5c1c95bb7..cd8381fe30 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -58,8 +58,10 @@ llvmpipe_flush( struct pipe_context *pipe,
* in the hope that a later clear will wipe them out.
*/
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++)
- if (llvmpipe->cbuf_cache[i])
+ if (llvmpipe->cbuf_cache[i]) {
+ lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]);
lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
+ }
/* Need this call for hardware buffers before swapbuffers.
*
@@ -71,8 +73,10 @@ llvmpipe_flush( struct pipe_context *pipe,
}
else if (flags & PIPE_FLUSH_RENDER_CACHE) {
for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++)
- if (llvmpipe->cbuf_cache[i])
+ if (llvmpipe->cbuf_cache[i]) {
+ lp_tile_cache_map_transfers(llvmpipe->cbuf_cache[i]);
lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
+ }
/* FIXME: untile zsbuf! */
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index b4a22ff4a9..bce3baec16 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,8 +36,10 @@
#include <llvm-c/Transforms/Scalar.h>
#include "util/u_memory.h"
+#include "util/u_cpu_detect.h"
#include "lp_screen.h"
#include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
#include "lp_jit.h"
@@ -147,6 +149,18 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
{
char *error = NULL;
+ util_cpu_detect();
+
+#if 0
+ /* For simulating less capable machines */
+ util_cpu_caps.has_sse3 = 0;
+ util_cpu_caps.has_ssse3 = 0;
+ util_cpu_caps.has_sse4_1 = 0;
+#endif
+
+ LLVMLinkInJIT();
+ LLVMInitializeNativeTarget();
+
screen->module = LLVMModuleCreateWithName("llvmpipe");
screen->provider = LLVMCreateModuleProviderForExistingModule(screen->module);
@@ -154,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
_debug_printf("%s\n", error);
LLVMDisposeMessage(error);
- abort();
+ assert(0);
}
screen->target = LLVMGetExecutionEngineTargetData(screen->engine);
@@ -163,8 +177,15 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
LLVMAddTargetData(screen->target, screen->pass);
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
+ /* TODO: Add more passes */
LLVMAddConstantPropagationPass(screen->pass);
- LLVMAddInstructionCombiningPass(screen->pass);
+ if(util_cpu_caps.has_sse4_1) {
+ /* FIXME: There is a bug in this pass, whereby the combination of fptosi
+ * and sitofp (necessary for trunc/floor/ceil/round implementation)
+ * somehow becomes invalid code.
+ */
+ LLVMAddInstructionCombiningPass(screen->pass);
+ }
LLVMAddPromoteMemoryToRegisterPass(screen->pass);
LLVMAddGVNPass(screen->pass);
LLVMAddCFGSimplificationPass(screen->pass);
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c
deleted file mode 100644
index b14f8fb99d..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief A draw stage that drives our triangle setup routines from
- * within the draw pipeline. One of two ways to drive setup, the
- * other being in lp_prim_vbuf.c.
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- * \author Brian Paul
- */
-
-
-#include "lp_context.h"
-#include "lp_setup.h"
-#include "lp_state.h"
-#include "lp_prim_setup.h"
-#include "draw/draw_pipe.h"
-#include "draw/draw_vertex.h"
-#include "util/u_memory.h"
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
- struct draw_stage stage; /**< This must be first (base class) */
-
- struct setup_context *setup;
-};
-
-
-
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
-{
- return (struct setup_stage *)stage;
-}
-
-
-typedef const float (*cptrf4)[4];
-
-static void
-do_tri(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- llvmpipe_setup_tri( setup->setup,
- (cptrf4)prim->v[0]->data,
- (cptrf4)prim->v[1]->data,
- (cptrf4)prim->v[2]->data );
-}
-
-static void
-do_line(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- llvmpipe_setup_line( setup->setup,
- (cptrf4)prim->v[0]->data,
- (cptrf4)prim->v[1]->data );
-}
-
-static void
-do_point(struct draw_stage *stage, struct prim_header *prim)
-{
- struct setup_stage *setup = setup_stage( stage );
-
- llvmpipe_setup_point( setup->setup,
- (cptrf4)prim->v[0]->data );
-}
-
-
-
-
-static void setup_begin( struct draw_stage *stage )
-{
- struct setup_stage *setup = setup_stage(stage);
-
- llvmpipe_setup_prepare( setup->setup );
-
- stage->point = do_point;
- stage->line = do_line;
- stage->tri = do_tri;
-}
-
-
-static void setup_first_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->point( stage, header );
-}
-
-static void setup_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->line( stage, header );
-}
-
-
-static void setup_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- setup_begin(stage);
- stage->tri( stage, header );
-}
-
-
-
-static void setup_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->point = setup_first_point;
- stage->line = setup_first_line;
- stage->tri = setup_first_tri;
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
-}
-
-
-static void render_destroy( struct draw_stage *stage )
-{
- struct setup_stage *ssetup = setup_stage(stage);
- llvmpipe_setup_destroy_context(ssetup->setup);
- FREE( stage );
-}
-
-
-/**
- * Create a new primitive setup/render stage.
- */
-struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe )
-{
- struct setup_stage *sstage = CALLOC_STRUCT(setup_stage);
-
- sstage->setup = llvmpipe_setup_create_context(llvmpipe);
- sstage->stage.draw = llvmpipe->draw;
- sstage->stage.point = setup_first_point;
- sstage->stage.line = setup_first_line;
- sstage->stage.tri = setup_first_tri;
- sstage->stage.flush = setup_flush;
- sstage->stage.reset_stipple_counter = reset_stipple_counter;
- sstage->stage.destroy = render_destroy;
-
- return (struct draw_stage *)sstage;
-}
-
-struct setup_context *
-lp_draw_setup_context( struct draw_stage *stage )
-{
- struct setup_stage *ssetup = setup_stage(stage);
- return ssetup->setup;
-}
-
-void
-lp_draw_flush( struct draw_stage *stage )
-{
- stage->flush( stage, 0 );
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h
deleted file mode 100644
index da6cae6375..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef LP_PRIM_SETUP_H
-#define LP_PRIM_SETUP_H
-
-
-/**
- * vbuf is a special stage to gather the stream of triangles, lines, points
- * together and reconstruct vertex buffers for hardware upload.
- *
- * First attempt, work in progress.
- *
- * TODO:
- * - separate out vertex buffer building and primitive emit, ie >1 draw per vb.
- * - tell vbuf stage how to build hw vertices directly
- * - pass vbuf stage a buffer pointer for direct emit to agp/vram.
- *
- *
- *
- * Vertices are just an array of floats, with all the attributes
- * packed. We currently assume a layout like:
- *
- * attr[0][0..3] - window position
- * attr[1..n][0..3] - remaining attributes.
- *
- * Attributes are assumed to be 4 floats wide but are packed so that
- * all the enabled attributes run contiguously.
- */
-
-
-struct draw_stage;
-struct llvmpipe_context;
-
-
-typedef void (*vbuf_draw_func)( struct pipe_context *pipe,
- unsigned prim,
- const ushort *elements,
- unsigned nr_elements,
- const void *vertex_buffer,
- unsigned nr_vertices );
-
-
-extern struct draw_stage *
-lp_draw_render_stage( struct llvmpipe_context *llvmpipe );
-
-extern struct setup_context *
-lp_draw_setup_context( struct draw_stage * );
-
-extern void
-lp_draw_flush( struct draw_stage * );
-
-
-extern struct draw_stage *
-lp_draw_vbuf_stage( struct draw_context *draw_context,
- struct pipe_context *pipe,
- vbuf_draw_func draw );
-
-
-#endif /* LP_PRIM_SETUP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index c394dcb61d..4abff4eccc 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -37,10 +37,9 @@
#include "lp_context.h"
+#include "lp_setup.h"
#include "lp_state.h"
#include "lp_prim_vbuf.h"
-#include "lp_prim_setup.h"
-#include "lp_setup.h"
#include "draw/draw_context.h"
#include "draw/draw_vbuf.h"
#include "util/u_memory.h"
@@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render
{
struct vbuf_render base;
struct llvmpipe_context *llvmpipe;
+ struct setup_context *setup;
+
uint prim;
uint vertex_size;
uint nr_vertices;
@@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr)
}
+
+
+
+
+
static const struct vertex_info *
lp_vbuf_get_vertex_info(struct vbuf_render *vbr)
{
@@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr,
static void
lp_vbuf_release_vertices(struct vbuf_render *vbr)
{
-#if 0
- {
- struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
- const struct vertex_info *info =
- llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe);
- const float *vtx = (const float *) cvbr->vertex_buffer;
- uint i, j;
- debug_printf("%s (vtx_size = %u, vtx_used = %u)\n",
- __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices);
- for (i = 0; i < cvbr->nr_vertices; i++) {
- for (j = 0; j < info->num_attribs; j++) {
- uint k;
- switch (info->attrib[j].emit) {
- case EMIT_4F: k = 4; break;
- case EMIT_3F: k = 3; break;
- case EMIT_2F: k = 2; break;
- case EMIT_1F: k = 1; break;
- default: assert(0);
- }
- debug_printf("Vert %u attr %u: ", i, j);
- while (k-- > 0) {
- debug_printf("%g ", vtx[0]);
- vtx++;
- }
- debug_printf("\n");
- }
- }
- }
-#endif
-
/* keep the old allocation for next time */
}
@@ -160,11 +136,7 @@ static boolean
lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-
- /* XXX: break this dependency - make setup_context live under
- * llvmpipe, rename the old "setup" draw stage to something else.
- */
- struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup);
+ struct setup_context *setup_ctx = cvbr->setup;
llvmpipe_setup_prepare( setup_ctx );
@@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer = cvbr->vertex_buffer;
+ struct setup_context *setup_ctx = cvbr->setup;
unsigned i;
- /* XXX: break this dependency - make setup_context live under
- * llvmpipe, rename the old "setup" draw stage to something else.
- */
- struct draw_stage *setup = llvmpipe->setup;
- struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
@@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
default:
assert(0);
}
-
- /* XXX: why are we calling this??? If we had to call something, it
- * would be a function in lp_setup.c:
- */
- lp_draw_flush( setup );
}
@@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
{
struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
+ struct setup_context *setup_ctx = cvbr->setup;
const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
const void *vertex_buffer =
(void *) get_vert(cvbr->vertex_buffer, start, stride);
unsigned i;
- /* XXX: break this dependency - make setup_context live under
- * llvmpipe, rename the old "setup" draw stage to something else.
- */
- struct draw_stage *setup = llvmpipe->setup;
- struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
switch (cvbr->prim) {
case PIPE_PRIM_POINTS:
for (i = 0; i < nr; i++) {
@@ -568,40 +525,38 @@ static void
lp_vbuf_destroy(struct vbuf_render *vbr)
{
struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
- cvbr->llvmpipe->vbuf_render = NULL;
+ llvmpipe_setup_destroy_context(cvbr->setup);
FREE(cvbr);
}
/**
- * Initialize the post-transform vertex buffer information for the given
- * context.
+ * Create the post-transform vertex handler for the given context.
*/
-void
-lp_init_vbuf(struct llvmpipe_context *lp)
+struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *lp)
{
- assert(lp->draw);
+ struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render);
- lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render);
+ assert(lp->draw);
- lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES;
- lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
- lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info;
- lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices;
- lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices;
- lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices;
- lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive;
- lp->vbuf_render->base.draw = lp_vbuf_draw;
- lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays;
- lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices;
- lp->vbuf_render->base.destroy = lp_vbuf_destroy;
+ cvbr->base.max_indices = LP_MAX_VBUF_INDEXES;
+ cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
- lp->vbuf_render->llvmpipe = lp;
+ cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info;
+ cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices;
+ cvbr->base.map_vertices = lp_vbuf_map_vertices;
+ cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices;
+ cvbr->base.set_primitive = lp_vbuf_set_primitive;
+ cvbr->base.draw = lp_vbuf_draw;
+ cvbr->base.draw_arrays = lp_vbuf_draw_arrays;
+ cvbr->base.release_vertices = lp_vbuf_release_vertices;
+ cvbr->base.destroy = lp_vbuf_destroy;
- lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base);
+ cvbr->llvmpipe = lp;
- draw_set_rasterize_stage(lp->draw, lp->vbuf);
+ cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe);
- draw_set_render(lp->draw, &lp->vbuf_render->base);
+ return &cvbr->base;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
index 6c4e6063e6..0676e2f42a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
@@ -31,8 +31,8 @@
struct llvmpipe_context;
-extern void
-lp_init_vbuf(struct llvmpipe_context *llvmpipe);
+extern struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe);
#endif /* LP_VBUF_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index ff7ef8658a..19fe2850fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,6 +27,7 @@
#include "util/u_memory.h"
+#include "util/u_format.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
@@ -58,7 +59,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
return PIPE_MAX_SAMPLERS;
case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
- return PIPE_MAX_SAMPLERS;
+ return PIPE_MAX_VERTEX_SAMPLERS;
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
case PIPE_CAP_NPOT_TEXTURES:
return 1;
case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -82,7 +85,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
return 13; /* max 4Kx4K */
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 8; /* max 128x128x128 */
+ return 9; /* max 256x256x256 */
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
return 13; /* max 4Kx4K */
case PIPE_CAP_TGSI_CONT_SUPPORTED:
@@ -131,17 +134,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
{
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
struct llvmpipe_winsys *winsys = screen->winsys;
+ const struct util_format_description *format_desc;
+
+ format_desc = util_format_description(format);
+ if(!format_desc)
+ return FALSE;
assert(target == PIPE_TEXTURE_1D ||
target == PIPE_TEXTURE_2D ||
target == PIPE_TEXTURE_3D ||
target == PIPE_TEXTURE_CUBE);
- if(format == PIPE_FORMAT_Z16_UNORM)
- return FALSE;
- if(format == PIPE_FORMAT_S8_UNORM)
- return FALSE;
-
switch(format) {
case PIPE_FORMAT_DXT1_RGB:
case PIPE_FORMAT_DXT1_RGBA:
@@ -152,8 +155,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
break;
}
- if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
- return winsys->is_displaytarget_format_supported(winsys, format);
+ if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+ if(format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+ return FALSE;
+
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
+ return FALSE;
+ }
+
+ if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+ if(!winsys->is_displaytarget_format_supported(winsys, format))
+ return FALSE;
+ }
+
+ if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+
+ /* FIXME: Temporary restriction. See lp_state_fs.c. */
+ if(format_desc->block.bits != 32)
+ return FALSE;
+ }
+
+ /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */
+ if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+ if(format_desc->block.width != 1 ||
+ format_desc->block.height != 1)
+ return FALSE;
+
+ if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+ format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+ return FALSE;
+
+ if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
+ format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return FALSE;
+ }
return TRUE;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 2d2fc19a65..b4aabd4d7c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -33,7 +33,6 @@
*/
#include "lp_context.h"
-#include "lp_prim_setup.h"
#include "lp_quad.h"
#include "lp_setup.h"
#include "lp_state.h"
@@ -90,6 +89,8 @@ struct setup_context {
float oneoverarea;
int facing;
+ float pixel_offset;
+
struct quad_header quad[MAX_QUADS];
struct quad_header *quad_ptrs[MAX_QUADS];
unsigned count;
@@ -115,6 +116,7 @@ struct setup_context {
/**
* Execute fragment shader for the four fragments in the quad.
*/
+ALIGN_STACK
static void
shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quads[],
@@ -124,7 +126,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
struct quad_header *quad = quads[0];
const unsigned x = quad->input.x0;
const unsigned y = quad->input.y0;
- uint8_t *tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+ uint8_t *tile;
uint8_t *color;
void *depth;
uint32_t ALIGN16_ATTRIB mask[4][NUM_CHANNELS];
@@ -150,7 +152,13 @@ shade_quads(struct llvmpipe_context *llvmpipe,
mask[q][chan_index] = quads[q]->inout.mask & (1 << chan_index) ? ~0 : 0;
/* color buffer */
- color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
+ if(llvmpipe->framebuffer.nr_cbufs >= 1 &&
+ llvmpipe->framebuffer.cbufs[0]) {
+ tile = lp_get_cached_tile(llvmpipe->cbuf_cache[0], x, y);
+ color = &TILE_PIXEL(tile, x & (TILE_SIZE-1), y & (TILE_SIZE-1), 0);
+ }
+ else
+ color = NULL;
/* depth buffer */
if(llvmpipe->zsbuf_map) {
@@ -158,7 +166,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
assert((y % 2) == 0);
depth = llvmpipe->zsbuf_map +
y*llvmpipe->zsbuf_transfer->stride +
- 2*x*llvmpipe->zsbuf_transfer->block.size;
+ 2*x*pf_get_blocksize(llvmpipe->zsbuf_transfer->texture->format);
}
else
depth = NULL;
@@ -271,11 +279,13 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
* until we codegenerate single-quad variants of the fragment pipeline
* we need this hack. */
const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
- struct quad_header quads[nr_quads];
- struct quad_header *quad_ptrs[nr_quads];
+ struct quad_header quads[4];
+ struct quad_header *quad_ptrs[4];
int x0 = block_x(quad->input.x0);
unsigned i;
+ assert(nr_quads == 4);
+
for(i = 0; i < nr_quads; ++i) {
int x = x0 + 2*i;
if(x == quad->input.x0)
@@ -474,6 +484,16 @@ static boolean setup_sort_vertices( struct setup_context *setup,
((det > 0.0) ^
(setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
+ /* Prepare pixel offset for rasterisation:
+ * - pixel center (0.5, 0.5) for GL, or
+ * - assume (0.0, 0.0) for other APIs.
+ */
+ if (setup->llvmpipe->rasterizer->gl_rasterization_rules) {
+ setup->pixel_offset = 0.5f;
+ } else {
+ setup->pixel_offset = 0.0f;
+ }
+
return TRUE;
}
@@ -499,7 +519,7 @@ static void tri_pos_coeff( struct setup_context *setup,
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
- * pixel centers, in other words (0.5, 0.5).
+ * pixel centers, in other words (pixel_offset, pixel_offset).
*
* this is neat but unfortunately not a good way to do things for
* triangles with very large values of dadx or dady as it will
@@ -510,8 +530,8 @@ static void tri_pos_coeff( struct setup_context *setup,
* instead - i'll switch to this later.
*/
setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
/*
debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -600,8 +620,8 @@ static void tri_linear_coeff( struct setup_context *setup,
* instead - i'll switch to this later.
*/
setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
/*
debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -652,8 +672,8 @@ static void tri_persp_coeff( struct setup_context *setup,
setup->coef.dadx[1 + attrib][i] = dadx;
setup->coef.dady[1 + attrib][i] = dady;
setup->coef.a0[1 + attrib][i] = (mina -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
}
@@ -737,12 +757,12 @@ static void setup_tri_coefficients( struct setup_context *setup )
static void setup_tri_edges( struct setup_context *setup )
{
- float vmin_x = setup->vmin[0][0] + 0.5f;
- float vmid_x = setup->vmid[0][0] + 0.5f;
+ float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
+ float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
- float vmin_y = setup->vmin[0][1] - 0.5f;
- float vmid_y = setup->vmid[0][1] - 0.5f;
- float vmax_y = setup->vmax[0][1] - 0.5f;
+ float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
+ float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
+ float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
setup->emaj.sy = ceilf(vmin_y);
setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
@@ -941,8 +961,8 @@ linear_pos_coeff(struct setup_context *setup,
setup->coef.dadx[0][i] = dadx;
setup->coef.dady[0][i] = dady;
setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
@@ -963,8 +983,8 @@ line_linear_coeff(struct setup_context *setup,
setup->coef.dadx[1 + attrib][i] = dadx;
setup->coef.dady[1 + attrib][i] = dady;
setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
}
@@ -989,8 +1009,8 @@ line_persp_coeff(struct setup_context *setup,
setup->coef.dadx[1 + attrib][i] = dadx;
setup->coef.dady[1 + attrib][i] = dady;
setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
- (dadx * (setup->vmin[0][0] - 0.5f) +
- dady * (setup->vmin[0][1] - 0.5f)));
+ (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+ dady * (setup->vmin[0][1] - setup->pixel_offset)));
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 7b26ce61a3..d1c74ab07b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -126,6 +126,10 @@ void *
llvmpipe_create_sampler_state(struct pipe_context *,
const struct pipe_sampler_state *);
void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *,
+ unsigned num_samplers,
+ void **samplers);
void llvmpipe_delete_sampler_state(struct pipe_context *, void *);
void *
@@ -172,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *,
unsigned num,
struct pipe_texture ** );
+void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
+ unsigned num_textures,
+ struct pipe_texture **);
+
void llvmpipe_set_viewport_state( struct pipe_context *,
const struct pipe_viewport_state * );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index 3f03bd0057..b2e75d3b14 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
for (i = 0; i < 4; ++i) {
uint8_t c = float_to_ubyte(blend_color->color[i]);
for (j = 0; j < 16; ++j)
- llvmpipe->jit_context.blend_color[i*4 + j] = c;
+ llvmpipe->jit_context.blend_color[i*16 + j] = c;
}
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index e87976b9f3..e703964aaa 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -65,26 +65,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
if (vinfo->num_attribs == 0) {
/* compute vertex layout now */
const struct lp_fragment_shader *lpfs = llvmpipe->fs;
- const enum interp_mode colorInterp
- = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+ struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
+ const uint num = draw_num_vs_outputs(llvmpipe->draw);
uint i;
- if (llvmpipe->vbuf) {
- /* if using the post-transform vertex buffer, tell draw_vbuf to
- * simply emit the whole post-xform vertex as-is:
- */
- struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
- const uint num = draw_num_vs_outputs(llvmpipe->draw);
- uint i;
-
- /* No longer any need to try and emit draw vertex_header info.
- */
- vinfo_vbuf->num_attribs = 0;
- for (i = 0; i < num; i++) {
- draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
- }
- draw_compute_vertex_size(vinfo_vbuf);
+ /* Tell draw_vbuf to simply emit the whole post-xform vertex
+ * as-is. No longer any need to try and emit draw vertex_header
+ * info.
+ */
+ vinfo_vbuf->num_attribs = 0;
+ for (i = 0; i < num; i++) {
+ draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
}
+ draw_compute_vertex_size(vinfo_vbuf);
/*
* Loop over fragment shader inputs, searching for the matching output
@@ -93,35 +86,40 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
vinfo->num_attribs = 0;
for (i = 0; i < lpfs->info.num_inputs; i++) {
int src;
- switch (lpfs->info.input_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- src = draw_find_vs_output(llvmpipe->draw,
- TGSI_SEMANTIC_POSITION, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
- break;
+ enum interp_mode interp;
- case TGSI_SEMANTIC_COLOR:
- src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR,
- lpfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+ switch (lpfs->info.input_interpolate[i]) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = INTERP_CONSTANT;
break;
-
- case TGSI_SEMANTIC_FOG:
- src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = INTERP_LINEAR;
break;
-
- case TGSI_SEMANTIC_GENERIC:
- case TGSI_SEMANTIC_FACE:
- /* this includes texcoords and varying vars */
- src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
- lpfs->info.input_semantic_index[i]);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = INTERP_PERSPECTIVE;
break;
-
default:
assert(0);
+ interp = INTERP_LINEAR;
+ }
+
+ switch (lpfs->info.input_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ interp = INTERP_POS;
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ if (llvmpipe->rasterizer->flatshade) {
+ interp = INTERP_CONSTANT;
+ }
+ break;
}
+
+ /* this includes texcoords and varying vars */
+ src = draw_find_vs_output(llvmpipe->draw,
+ lpfs->info.input_semantic_name[i],
+ lpfs->info.input_semantic_index[i]);
+ draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
}
llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
@@ -200,10 +198,14 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe )
unsigned i;
/* vertex shader samplers */
- for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
- llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i];
- llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i];
- llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i];
+ llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i];
+ llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
+ }
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] );
}
/* fragment shader samplers */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 9faed5a0b1..ee0f69b2af 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -83,6 +83,7 @@
#include "lp_bld_debug.h"
#include "lp_screen.h"
#include "lp_context.h"
+#include "lp_buffer.h"
#include "lp_state.h"
#include "lp_quad.h"
#include "lp_tex_sample.h"
@@ -147,6 +148,20 @@ generate_depth(LLVMBuilderRef builder,
format_desc = util_format_description(key->zsbuf_format);
assert(format_desc);
+ /*
+ * Depths are expected to be between 0 and 1, even if they are stored in
+ * floats. Setting these bits here will ensure that the lp_build_conv() call
+ * below won't try to unnecessarily clamp the incoming values.
+ */
+ if(src_type.floating) {
+ src_type.sign = FALSE;
+ src_type.norm = TRUE;
+ }
+ else {
+ assert(!src_type.sign);
+ assert(src_type.norm);
+ }
+
/* Pick the depth type. */
dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
@@ -154,14 +169,11 @@ generate_depth(LLVMBuilderRef builder,
assert(dst_type.width == src_type.width);
assert(dst_type.length == src_type.length);
-#if 1
- src = lp_build_clamped_float_to_unsigned_norm(builder,
- src_type,
- dst_type.width,
- src);
-#else
lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-#endif
+
+ dst_ptr = LLVMBuildBitCast(builder,
+ dst_ptr,
+ LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
lp_build_depth_test(builder,
&key->depth,
@@ -399,9 +411,9 @@ generate_fragment(struct llvmpipe_context *lp,
#ifdef DEBUG
tgsi_dump(shader->base.tokens, 0);
if(key->depth.enabled) {
+ debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
debug_printf("depth.writemask = %u\n", key->depth.writemask);
- debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
}
if(key->alpha.enabled) {
debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
@@ -419,6 +431,34 @@ generate_fragment(struct llvmpipe_context *lp,
debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
}
debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+ for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+ if(key->sampler[i].format) {
+ debug_printf("sampler[%u] = \n", i);
+ debug_printf(" .format = %s\n",
+ pf_name(key->sampler[i].format));
+ debug_printf(" .target = %s\n",
+ debug_dump_tex_target(key->sampler[i].target, TRUE));
+ debug_printf(" .pot = %u %u %u\n",
+ key->sampler[i].pot_width,
+ key->sampler[i].pot_height,
+ key->sampler[i].pot_depth);
+ debug_printf(" .wrap = %s %s %s\n",
+ debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+ debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+ debug_printf(" .min_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+ debug_printf(" .min_mip_filter = %s\n",
+ debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+ debug_printf(" .mag_img_filter = %s\n",
+ debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+ if(key->sampler[i].compare_mode)
+ debug_printf(" .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+ debug_printf(" .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+ debug_printf(" .prefilter = %u\n", key->sampler[i].prefilter);
+ }
+ }
+
#endif
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -582,6 +622,13 @@ generate_fragment(struct llvmpipe_context *lp,
* Translate the LLVM IR into machine code.
*/
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
+ LLVMDumpValue(variant->function);
+ assert(0);
+ }
+#endif
+
LLVMRunFunctionPassManager(screen->pass, variant->function);
#ifdef DEBUG
@@ -589,11 +636,6 @@ generate_fragment(struct llvmpipe_context *lp,
debug_printf("\n");
#endif
- if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
- abort();
- }
-
variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
#ifdef DEBUG
@@ -672,16 +714,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
void
llvmpipe_set_constant_buffer(struct pipe_context *pipe,
uint shader, uint index,
- const struct pipe_constant_buffer *buf)
+ const struct pipe_constant_buffer *constants)
{
struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ struct pipe_buffer *buffer = constants ? constants->buffer : NULL;
+ unsigned size = buffer ? buffer->size : 0;
+ const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL;
assert(shader < PIPE_SHADER_TYPES);
assert(index == 0);
+ if(shader == PIPE_SHADER_VERTEX)
+ draw_flush(llvmpipe->draw);
+
/* note: reference counting */
- pipe_buffer_reference(&llvmpipe->constants[shader].buffer,
- buf ? buf->buffer : NULL);
+ pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
+
+ if(shader == PIPE_SHADER_FRAGMENT) {
+ llvmpipe->jit_context.constants = data;
+ }
+
+ if(shader == PIPE_SHADER_VERTEX) {
+ draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+ }
llvmpipe->dirty |= LP_NEW_CONSTANTS;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index c69d90c723..d382f9ca87 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -78,6 +78,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+ unsigned num_samplers,
+ void **samplers)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ unsigned i;
+
+ assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_samplers == llvmpipe->num_vertex_samplers &&
+ !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+ return;
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < num_samplers; ++i)
+ llvmpipe->vertex_samplers[i] = samplers[i];
+ for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+ llvmpipe->vertex_samplers[i] = NULL;
+
+ llvmpipe->num_vertex_samplers = num_samplers;
+
+ llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
+void
llvmpipe_set_sampler_textures(struct pipe_context *pipe,
unsigned num, struct pipe_texture **texture)
{
@@ -102,8 +130,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
if(tex) {
struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
- jit_tex->width = tex->width[0];
- jit_tex->height = tex->height[0];
+ jit_tex->width = tex->width0;
+ jit_tex->height = tex->height0;
jit_tex->stride = lp_tex->stride[0];
if(!lp_tex->dt)
jit_tex->data = lp_tex->data;
@@ -117,6 +145,37 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+ unsigned num_textures,
+ struct pipe_texture **textures)
+{
+ struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+ uint i;
+
+ assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+ /* Check for no-op */
+ if (num_textures == llvmpipe->num_vertex_textures &&
+ !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+ return;
+ }
+
+ draw_flush(llvmpipe->draw);
+
+ for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+ struct pipe_texture *tex = i < num_textures ? textures[i] : NULL;
+
+ pipe_texture_reference(&llvmpipe->vertex_textures[i], tex);
+ lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex);
+ }
+
+ llvmpipe->num_vertex_textures = num_textures;
+
+ llvmpipe->dirty |= LP_NEW_TEXTURE;
+}
+
+
+void
llvmpipe_delete_sampler_state(struct pipe_context *pipe,
void *sampler)
{
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 177a26b7b1..c06ce8b75c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -53,10 +53,11 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
/* check if changing cbuf */
if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) {
/* flush old */
+ lp_tile_cache_map_transfers(lp->cbuf_cache[i]);
lp_flush_tile_cache(lp->cbuf_cache[i]);
/* assign new */
- lp->framebuffer.cbufs[i] = fb->cbufs[i];
+ pipe_surface_reference(&lp->framebuffer.cbufs[i], fb->cbufs[i]);
/* update cache */
lp_tile_cache_set_surface(lp->cbuf_cache[i], fb->cbufs[i]);
@@ -81,7 +82,7 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
}
/* assign new */
- lp->framebuffer.zsbuf = fb->zsbuf;
+ pipe_surface_reference(&lp->framebuffer.zsbuf, fb->zsbuf);
/* Tell draw module how deep the Z/depth buffer is */
if (lp->framebuffer.zsbuf) {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 15c3029614..8a761648e7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
(struct lp_vertex_shader *)vs;
draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+ FREE( (void *)state->shader.tokens );
FREE( state );
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index a88e110c66..39d80726e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -56,6 +56,9 @@
#include "lp_bld_type.h"
+#define LP_TEST_NUM_SAMPLES 32
+
+
void
write_tsv_header(FILE *fp);
@@ -68,17 +71,28 @@ boolean
test_all(unsigned verbose, FILE *fp);
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
static INLINE uint64_t
rdtsc(void)
{
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
uint32_t hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
#else
- return 0;
+
+#define rdtsc() 0
+
#endif
-}
+
float
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 94b661dcba..29fff91981 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
}
+ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -477,8 +478,8 @@ test_one(unsigned verbose,
char *error = NULL;
blend_test_ptr_t blend_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned i, j;
@@ -530,11 +531,11 @@ test_one(unsigned verbose,
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
@@ -595,11 +596,11 @@ test_one(unsigned verbose,
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9dcf58e5dc..968c7a2d4a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module,
}
+ALIGN_STACK
static boolean
test_one(unsigned verbose,
FILE *fp,
@@ -156,8 +157,8 @@ test_one(unsigned verbose,
char *error = NULL;
conv_test_ptr_t conv_test_ptr;
boolean success;
- const unsigned n = 32;
- int64_t cycles[n];
+ const unsigned n = LP_TEST_NUM_SAMPLES;
+ int64_t cycles[LP_TEST_NUM_SAMPLES];
double cycles_avg = 0.0;
unsigned num_srcs;
unsigned num_dsts;
@@ -229,8 +230,8 @@ test_one(unsigned verbose,
for(i = 0; i < n && success; ++i) {
unsigned src_stride = src_type.length*src_type.width/8;
unsigned dst_stride = dst_type.length*dst_type.width/8;
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+ ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
int64_t start_counter = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index d8455e5649..23ea9ebbe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -35,10 +35,11 @@
#include <llvm-c/Target.h>
#include <llvm-c/Transforms/Scalar.h>
+#include "util/u_cpu_detect.h"
#include "util/u_format.h"
-#include "lp_bld_flow.h"
#include "lp_bld_format.h"
+#include "lp_test.h"
struct pixel_test_case
@@ -89,40 +90,63 @@ struct pixel_test_case test_cases[] =
};
-typedef void (*load_ptr_t)(const void *, float *);
+void
+write_tsv_header(FILE *fp)
+{
+ fprintf(fp,
+ "result\t"
+ "format\n");
+
+ fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+ const struct util_format_description *desc,
+ boolean success)
+{
+ fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+ fprintf(fp, "%s\n", desc->name);
+
+ fflush(fp);
+}
+
+
+typedef void (*load_ptr_t)(const uint32_t packed, float *);
static LLVMValueRef
add_load_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
- struct lp_build_loop_state loop;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMInt32Type();
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 1, 0), &loop);
+ if(desc->block.bits < 32)
+ packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), "");
- rgba = lp_build_load_rgba_aos(builder, format, ptr);
- LLVMBuildStore(builder, rgba, rgba_ptr);
+ rgba = lp_build_unpack_rgba_aos(builder, desc, packed);
- lp_build_loop_end(builder, LLVMConstInt(LLVMInt32Type(), 4, 0), NULL, &loop);
+ LLVMBuildStore(builder, rgba, rgba_ptr);
LLVMBuildRetVoid(builder);
@@ -131,27 +155,28 @@ add_load_rgba_test(LLVMModuleRef module,
}
-typedef void (*store_ptr_t)(void *, const float *);
+typedef void (*store_ptr_t)(uint32_t *, const float *);
static LLVMValueRef
add_store_rgba_test(LLVMModuleRef module,
- enum pipe_format format)
+ const struct util_format_description *desc)
{
LLVMTypeRef args[2];
LLVMValueRef func;
- LLVMValueRef ptr;
+ LLVMValueRef packed_ptr;
LLVMValueRef rgba_ptr;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
+ LLVMValueRef packed;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[0] = LLVMPointerType(LLVMInt32Type(), 0);
args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 0);
rgba_ptr = LLVMGetParam(func, 1);
block = LLVMAppendBasicBlock(func, "entry");
@@ -160,7 +185,12 @@ add_store_rgba_test(LLVMModuleRef module,
rgba = LLVMBuildLoad(builder, rgba_ptr, "");
- lp_build_store_rgba_aos(builder, format, ptr, rgba);
+ packed = lp_build_pack_rgba_aos(builder, desc, rgba);
+
+ if(desc->block.bits < 32)
+ packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+ LLVMBuildStore(builder, packed, packed_ptr);
LLVMBuildRetVoid(builder);
@@ -169,8 +199,9 @@ add_store_rgba_test(LLVMModuleRef module,
}
+ALIGN_STACK
static boolean
-test_format(const struct pixel_test_case *test)
+test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
{
LLVMModuleRef module = NULL;
LLVMValueRef load = NULL;
@@ -192,8 +223,8 @@ test_format(const struct pixel_test_case *test)
module = LLVMModuleCreateWithName("test");
- load = add_load_rgba_test(module, test->format);
- store = add_store_rgba_test(module, test->format);
+ load = add_load_rgba_test(module, desc);
+ store = add_store_rgba_test(module, desc);
if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
LLVMDumpModule(module);
@@ -229,7 +260,7 @@ test_format(const struct pixel_test_case *test)
memset(unpacked, 0, sizeof unpacked);
packed = 0;
- load_ptr(&test->packed, unpacked);
+ load_ptr(test->packed, unpacked);
store_ptr(&packed, unpacked);
success = TRUE;
@@ -255,18 +286,29 @@ test_format(const struct pixel_test_case *test)
if(pass)
LLVMDisposePassManager(pass);
+ if(fp)
+ write_tsv_row(fp, desc, success);
+
return success;
}
-int main(int argc, char **argv)
+boolean
+test_all(unsigned verbose, FILE *fp)
{
unsigned i;
- int ret;
+ bool success = TRUE;
for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
- if(!test_format(&test_cases[i]))
- ret = 1;
+ if(!test_format(verbose, fp, &test_cases[i]))
+ success = FALSE;
- return ret;
+ return success;
+}
+
+
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+ return test_all(verbose, fp);
}
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 4592dc0b2d..314544aa9a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -34,10 +34,25 @@
*/
+#include "util/u_cpu_detect.h"
+
#include "lp_bld_const.h"
+#include "lp_bld_misc.h"
#include "lp_test.h"
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+ if (x >= 0.0)
+ return floor(x + 0.5);
+ else
+ return ceil(x - 0.5);
+}
+#endif
+
+
void
dump_type(FILE *fp,
struct lp_type type)
@@ -365,6 +380,11 @@ int main(int argc, char **argv)
n = atoi(argv[i]);
}
+ LLVMLinkInJIT();
+ LLVMInitializeNativeTarget();
+
+ util_cpu_detect();
+
if(fp) {
/* Warm up the caches */
test_some(0, NULL, 100);
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 773e848242..a6d9a2c1ac 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -36,6 +36,7 @@
#include "util/u_memory.h"
#include "util/u_tile.h"
#include "util/u_format.h"
+#include "util/u_math.h"
#include "lp_context.h"
#include "lp_surface.h"
#include "lp_texture.h"
@@ -154,7 +155,6 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
if (lpt->timestamp != tc->timestamp) {
/* texture was modified, invalidate all cached tiles */
uint i;
- debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
for (i = 0; i < NUM_ENTRIES; i++) {
tc->entries[i].addr.bits.invalid = 1;
}
@@ -270,8 +270,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
addr.bits.level,
addr.bits.z,
PIPE_TRANSFER_READ, 0, 0,
- tc->texture->width[addr.bits.level],
- tc->texture->height[addr.bits.level]);
+ u_minify(tc->texture->width0, addr.bits.level),
+ u_minify(tc->texture->height0, addr.bits.level));
tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
@@ -290,7 +290,7 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
assert(0);
}
- util_format_read_4ub(tc->tex_trans->format,
+ util_format_read_4ub(tc->tex_trans->texture->format,
(uint8_t *)tile->color, sizeof tile->color[0],
tc->tex_trans_map, tc->tex_trans->stride,
x, y, w, h);
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index a1365a045f..0d01c07fb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
dsdx = fabsf(dsdx);
dsdy = fabsf(dsdy);
- rho = MAX2(dsdx, dsdy) * texture->width[0];
+ rho = MAX2(dsdx, dsdy) * texture->width0;
}
if (t) {
float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
float max;
dtdx = fabsf(dtdx);
dtdy = fabsf(dtdy);
- max = MAX2(dtdx, dtdy) * texture->height[0];
+ max = MAX2(dtdx, dtdy) * texture->height0;
rho = MAX2(rho, max);
}
if (p) {
@@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
float max;
dpdx = fabsf(dpdx);
dpdy = fabsf(dpdy);
- max = MAX2(dpdx, dpdy) * texture->depth[0];
+ max = MAX2(dpdx, dpdy) * texture->depth0;
rho = MAX2(rho, max);
}
@@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler,
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
- if (x < 0 || x >= (int) texture->width[level] ||
- y < 0 || y >= (int) texture->height[level] ||
- z < 0 || z >= (int) texture->depth[level]) {
+ if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+ y < 0 || y >= (int) u_minify(texture->height0, level) ||
+ z < 0 || z >= (int) u_minify(texture->depth0, level)) {
rgba[0][j] = sampler->border_color[0];
rgba[1][j] = sampler->border_color[1];
rgba[2][j] = sampler->border_color[2];
@@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
assert(sampler->normalized_coords);
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
@@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
assert(sampler->normalized_coords);
- width = texture->width[level0];
- height = texture->height[level0];
- depth = texture->depth[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
+ depth = u_minify(texture->depth0, level0);
assert(width > 0);
assert(height > 0);
@@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
/* texture RECTS cannot be mipmapped */
assert(level0 == level1);
- width = texture->width[level0];
- height = texture->height[level0];
+ width = u_minify(texture->width0, level0);
+ height = u_minify(texture->height0, level0);
assert(width > 0);
@@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
/* Do this elsewhere:
*/
- samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
- samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+ samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
+ samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
/* Try to hook in a faster sampler. Ultimately we'll have to
* code-generate these. Luckily most of this looks like it is
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 724d437833..f099f903bd 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -48,7 +48,6 @@
/* Simple, maximally packed layout.
*/
-
/* Conventional allocation path for non-display textures:
*/
static boolean
@@ -57,31 +56,31 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
{
struct pipe_texture *pt = &lpt->base;
unsigned level;
- unsigned width = pt->width[0];
- unsigned height = pt->height[0];
- unsigned depth = pt->depth[0];
+ unsigned width = pt->width0;
+ unsigned height = pt->height0;
+ unsigned depth = pt->depth0;
unsigned buffer_size = 0;
- pf_get_block(lpt->base.format, &lpt->base.block);
-
for (level = 0; level <= pt->last_level; level++) {
- pt->width[level] = width;
- pt->height[level] = height;
- pt->depth[level] = depth;
- pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);
- pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
- lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16);
+ unsigned nblocksx, nblocksy;
+
+ /* Allocate storage for whole quads. This is particularly important
+ * for depth surfaces, which are currently stored in a swizzled format. */
+ nblocksx = pf_get_nblocksx(pt->format, align(width, 2));
+ nblocksy = pf_get_nblocksy(pt->format, align(height, 2));
+
+ lpt->stride[level] = align(nblocksx * pf_get_blocksize(pt->format), 16);
lpt->level_offset[level] = buffer_size;
- buffer_size += (pt->nblocksy[level] *
+ buffer_size += (nblocksy *
((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
lpt->stride[level]);
- width = minify(width);
- height = minify(height);
- depth = minify(depth);
+ width = u_minify(width, 1);
+ height = u_minify(height, 1);
+ depth = u_minify(depth, 1);
}
lpt->data = align_malloc(buffer_size, 16);
@@ -95,14 +94,10 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
{
struct llvmpipe_winsys *winsys = screen->winsys;
- pf_get_block(lpt->base.format, &lpt->base.block);
- lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);
- lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);
-
lpt->dt = winsys->displaytarget_create(winsys,
lpt->base.format,
- lpt->base.width[0],
- lpt->base.height[0],
+ lpt->base.width0,
+ lpt->base.height0,
16,
&lpt->stride[0] );
@@ -164,7 +159,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
/* Only supports one type */
if (base->target != PIPE_TEXTURE_2D ||
base->last_level != 0 ||
- base->depth[0] != 1) {
+ base->depth0 != 1) {
return NULL;
}
@@ -175,8 +170,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
lpt->base = *base;
pipe_reference_init(&lpt->base.reference, 1);
lpt->base.screen = screen;
- lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);
- lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);
lpt->stride[0] = stride[0];
pipe_buffer_reference(&lpt->buffer, buffer);
@@ -221,8 +214,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
pipe_reference_init(&ps->reference, 1);
pipe_texture_reference(&ps->texture, pt);
ps->format = pt->format;
- ps->width = pt->width[level];
- ps->height = pt->height[level];
+ ps->width = u_minify(pt->width0, level);
+ ps->height = u_minify(pt->height0, level);
ps->offset = lpt->level_offset[level];
ps->usage = usage;
@@ -250,11 +243,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
ps->level = level;
ps->zslice = zslice;
+ /* XXX shouldn't that rather be
+ tex_height = align(ps->height, 2);
+ to account for alignment done in llvmpipe_texture_layout ?
+ */
if (pt->target == PIPE_TEXTURE_CUBE) {
- ps->offset += face * pt->nblocksy[level] * lpt->stride[level];
+ unsigned tex_height = ps->height;
+ ps->offset += face * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else if (pt->target == PIPE_TEXTURE_3D) {
- ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level];
+ unsigned tex_height = ps->height;
+ ps->offset += zslice * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
}
else {
assert(face == 0);
@@ -295,14 +294,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
if (lpt) {
struct pipe_transfer *pt = &lpt->base;
pipe_texture_reference(&pt->texture, texture);
- pt->format = texture->format;
- pt->block = texture->block;
pt->x = x;
pt->y = y;
pt->width = w;
pt->height = h;
- pt->nblocksx = texture->nblocksx[level];
- pt->nblocksy = texture->nblocksy[level];
pt->stride = lptex->stride[level];
pt->usage = usage;
pt->face = face;
@@ -311,11 +306,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
lpt->offset = lptex->level_offset[level];
+ /* XXX shouldn't that rather be
+ tex_height = align(u_minify(texture->height0, level), 2)
+ to account for alignment done in llvmpipe_texture_layout ?
+ */
if (texture->target == PIPE_TEXTURE_CUBE) {
- lpt->offset += face * pt->nblocksy * pt->stride;
+ unsigned tex_height = u_minify(texture->height0, level);
+ lpt->offset += face * pf_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else if (texture->target == PIPE_TEXTURE_3D) {
- lpt->offset += zslice * pt->nblocksy * pt->stride;
+ unsigned tex_height = u_minify(texture->height0, level);
+ lpt->offset += zslice * pf_get_nblocksy(texture->format, tex_height) * pt->stride;
}
else {
assert(face == 0);
@@ -347,23 +348,17 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
ubyte *map, *xfer_map;
struct llvmpipe_texture *lpt;
+ enum pipe_format format;
assert(transfer->texture);
lpt = llvmpipe_texture(transfer->texture);
+ format = lpt->base.format;
if(lpt->dt) {
struct llvmpipe_winsys *winsys = screen->winsys;
- unsigned flags = 0;
-
- if (transfer->usage != PIPE_TRANSFER_READ) {
- flags |= PIPE_BUFFER_USAGE_CPU_WRITE;
- }
-
- if (transfer->usage != PIPE_TRANSFER_WRITE) {
- flags |= PIPE_BUFFER_USAGE_CPU_READ;
- }
- map = winsys->displaytarget_map(winsys, lpt->dt, flags);
+ map = winsys->displaytarget_map(winsys, lpt->dt,
+ pipe_transfer_buffer_flags(transfer));
if (map == NULL)
return NULL;
}
@@ -373,7 +368,7 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
/* May want to different things here depending on read/write nature
* of the map:
*/
- if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ)
+ if (transfer->texture && (transfer->usage & PIPE_TRANSFER_WRITE))
{
/* Do something to notify sharing contexts of a texture change.
* In llvmpipe, that would mean flushing the texture cache.
@@ -382,8 +377,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
}
xfer_map = map + llvmpipe_transfer(transfer)->offset +
- transfer->y / transfer->block.height * transfer->stride +
- transfer->x / transfer->block.width * transfer->block.size;
+ transfer->y / pf_get_blockheight(format) * transfer->stride +
+ transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
/*printf("map = %p xfer map = %p\n", map, xfer_map);*/
return xfer_map;
}
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 143afec3d3..7a1ecf5107 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -44,10 +44,53 @@
#include "lp_tile_cache.h"
+#define MAX_WIDTH 4096
+#define MAX_HEIGHT 4096
+
+
+enum llvmpipe_tile_status
+{
+ LP_TILE_STATUS_UNDEFINED = 0,
+ LP_TILE_STATUS_CLEAR = 1,
+ LP_TILE_STATUS_DEFINED = 2
+};
+
+
+struct llvmpipe_cached_tile
+{
+ enum llvmpipe_tile_status status;
+
+ /** color in SOA format */
+ uint8_t *color;
+};
+
+
+struct llvmpipe_tile_cache
+{
+ struct pipe_screen *screen;
+ struct pipe_surface *surface; /**< the surface we're caching */
+ struct pipe_transfer *transfer;
+ void *transfer_map;
+
+ struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE];
+
+ uint8_t clear_color[4]; /**< for color bufs */
+ uint clear_val; /**< for z+stencil, or packed color clear value */
+
+ struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */
+};
+
+
struct llvmpipe_tile_cache *
lp_create_tile_cache( struct pipe_screen *screen )
{
struct llvmpipe_tile_cache *tc;
+ int maxLevels, maxTexSize;
+
+ /* sanity checking: max sure MAX_WIDTH/HEIGHT >= largest texture image */
+ maxLevels = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+ maxTexSize = 1 << (maxLevels - 1);
+ assert(MAX_WIDTH >= maxTexSize);
tc = CALLOC_STRUCT( llvmpipe_tile_cache );
if(!tc)
@@ -193,44 +236,41 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
if(!pt)
return;
+ assert(tc->transfer_map);
+
/* push the tile to all positions marked as clear */
for (y = 0; y < pt->height; y += TILE_SIZE) {
for (x = 0; x < pt->width; x += TILE_SIZE) {
struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE];
- switch(tile->status) {
- case LP_TILE_STATUS_UNDEFINED:
- break;
-
- case LP_TILE_STATUS_CLEAR: {
- /**
- * Actually clear the tiles which were flagged as being in a clear state.
- */
-
- struct pipe_screen *screen = pt->texture->screen;
- unsigned tw = TILE_SIZE;
- unsigned th = TILE_SIZE;
- void *dst;
-
- if (pipe_clip_tile(x, y, &tw, &th, pt))
- continue;
-
- dst = screen->transfer_map(screen, pt);
- assert(dst);
- if(!dst)
- continue;
-
- util_fill_rect(dst, &pt->block, pt->stride,
- x, y, tw, th,
- tc->clear_val);
-
- screen->transfer_unmap(screen, pt);
- break;
- }
+ if(tile->status != LP_TILE_STATUS_UNDEFINED) {
+ unsigned w = TILE_SIZE;
+ unsigned h = TILE_SIZE;
+
+ if (!pipe_clip_tile(x, y, &w, &h, pt)) {
+ switch(tile->status) {
+ case LP_TILE_STATUS_CLEAR:
+ /* Actually clear the tiles which were flagged as being in a
+ * clear state. */
+ util_fill_rect(tc->transfer_map, pt->texture->format, pt->stride,
+ x, y, w, h,
+ tc->clear_val);
+ break;
+
+ case LP_TILE_STATUS_DEFINED:
+ lp_tile_write_4ub(pt->texture->format,
+ tile->color,
+ tc->transfer_map, pt->stride,
+ x, y, w, h);
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
- case LP_TILE_STATUS_DEFINED:
- lp_put_tile_rgba_soa(pt, x, y, tile->color);
- break;
+ tile->status = LP_TILE_STATUS_UNDEFINED;
}
}
}
@@ -248,6 +288,14 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
struct llvmpipe_cached_tile *tile = &tc->entries[y/TILE_SIZE][x/TILE_SIZE];
struct pipe_transfer *pt = tc->transfer;
+ assert(tc->surface);
+ assert(tc->transfer);
+
+ if(!tc->transfer_map)
+ lp_tile_cache_map_transfers(tc);
+
+ assert(tc->transfer_map);
+
switch(tile->status) {
case LP_TILE_STATUS_CLEAR:
/* don't get tile from framebuffer, just clear it */
@@ -255,11 +303,22 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
tile->status = LP_TILE_STATUS_DEFINED;
break;
- case LP_TILE_STATUS_UNDEFINED:
- /* get new tile data from transfer */
- lp_get_tile_rgba_soa(pt, x, y, tile->color);
+ case LP_TILE_STATUS_UNDEFINED: {
+ unsigned w = TILE_SIZE;
+ unsigned h = TILE_SIZE;
+
+ x &= ~(TILE_SIZE - 1);
+ y &= ~(TILE_SIZE - 1);
+
+ if (!pipe_clip_tile(x, y, &w, &h, tc->transfer))
+ lp_tile_read_4ub(pt->texture->format,
+ tile->color,
+ tc->transfer_map, tc->transfer->stride,
+ x, y, w, h);
+
tile->status = LP_TILE_STATUS_DEFINED;
break;
+ }
case LP_TILE_STATUS_DEFINED:
/* nothing to do */
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.h b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
index 6d8ba5ece7..161bab3799 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.h
@@ -33,42 +33,7 @@
#include "lp_tile_soa.h"
-enum llvmpipe_tile_status
-{
- LP_TILE_STATUS_UNDEFINED = 0,
- LP_TILE_STATUS_CLEAR = 1,
- LP_TILE_STATUS_DEFINED = 2
-};
-
-
-struct llvmpipe_cached_tile
-{
- enum llvmpipe_tile_status status;
-
- /** color in SOA format */
- uint8_t *color;
-};
-
-
-/** XXX move these */
-#define MAX_WIDTH 2048
-#define MAX_HEIGHT 2048
-
-
-struct llvmpipe_tile_cache
-{
- struct pipe_screen *screen;
- struct pipe_surface *surface; /**< the surface we're caching */
- struct pipe_transfer *transfer;
- void *transfer_map;
-
- struct llvmpipe_cached_tile entries[MAX_WIDTH/TILE_SIZE][MAX_HEIGHT/TILE_SIZE];
-
- uint8_t clear_color[4]; /**< for color bufs */
- uint clear_val; /**< for z+stencil, or packed color clear value */
-
- struct llvmpipe_cached_tile *last_tile; /**< most recently retrieved tile */
-};
+struct llvmpipe_tile_cache; /* opaque */
extern struct llvmpipe_tile_cache *
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.c b/src/gallium/drivers/llvmpipe/lp_tile_soa.c
deleted file mode 100644
index 4e4ccb31cc..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.c
+++ /dev/null
@@ -1,931 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * RGBA/float tile get/put functions.
- * Usable both by drivers and state trackers.
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_inlines.h"
-
-#include "util/u_math.h"
-#include "util/u_memory.h"
-#include "util/u_rect.h"
-#include "util/u_tile.h"
-#include "lp_tile_cache.h"
-#include "lp_tile_soa.h"
-
-
-const unsigned char
-tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {
- { 0, 1, 4, 5, 8, 9, 12, 13},
- { 2, 3, 6, 7, 10, 11, 14, 15}
-};
-
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-a8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
- TILE_PIXEL(p, j, i, 3) = (pixel >> 24) & 0xff;
- }
- }
-}
-
-
-static void
-a8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-x8r8g8b8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 0) & 0xff;
- TILE_PIXEL(p, j, i, 3) = 0xff;
- }
- }
-}
-
-
-static void
-x8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
-
-static void
-b8g8r8a8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const unsigned pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = (pixel >> 8) & 0xff;
- TILE_PIXEL(p, j, i, 1) = (pixel >> 16) & 0xff;
- TILE_PIXEL(p, j, i, 2) = (pixel >> 24) & 0xff;
- TILE_PIXEL(p, j, i, 3) = (pixel >> 0) & 0xff;
- }
- }
-}
-
-
-static void
-b8g8r8a8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
-
-static void
-a1r5g5b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 10) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 3) = ((pixel >> 15) ) * 255;
- }
- }
-}
-
-
-static void
-a1r5g5b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- r = r >> 3; /* 5 bits */
- g = g >> 3; /* 5 bits */
- b = b >> 3; /* 5 bits */
- a = a >> 7; /* 1 bit */
- *dst++ = (a << 15) | (r << 10) | (g << 5) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
-
-static void
-a4r4g4b4_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 8) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 4) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0xf) * 255 / 15;
- TILE_PIXEL(p, j, i, 3) = ((pixel >> 12) ) * 255 / 15;
- }
- }
-}
-
-
-static void
-a4r4g4b4_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, g, b, a;
- r = TILE_PIXEL(p, j, i, 0);
- g = TILE_PIXEL(p, j, i, 1);
- b = TILE_PIXEL(p, j, i, 2);
- a = TILE_PIXEL(p, j, i, 3);
- r >>= 4;
- g >>= 4;
- b >>= 4;
- a >>= 4;
- *dst++ = (a << 12) | (r << 16) | (g << 4) | b;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_R5G6B5_UNORM ***/
-
-static void
-r5g6b5_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const ushort pixel = *src++;
- TILE_PIXEL(p, j, i, 0) = ((pixel >> 11) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 1) = ((pixel >> 5) & 0x3f) * 255 / 63;
- TILE_PIXEL(p, j, i, 2) = ((pixel ) & 0x1f) * 255 / 31;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-r5g6b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- uint r = (uint) TILE_PIXEL(p, j, i, 0) * 31 / 255;
- uint g = (uint) TILE_PIXEL(p, j, i, 1) * 63 / 255;
- uint b = (uint) TILE_PIXEL(p, j, i, 2) * 31 / 255;
- *dst++ = (r << 11) | (g << 5) | (b);
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_Z16_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z16_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const float scale = 1.0f / 65535.0f;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src++ * scale;
- }
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_L8_UNORM ***/
-
-static void
-l8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = *src;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-l8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r;
- r = TILE_PIXEL(p, j, i, 0);
- *dst++ = (ubyte) r;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_A8_UNORM ***/
-
-static void
-a8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = 0;
- TILE_PIXEL(p, j, i, 3) = *src;
- }
- }
-}
-
-
-static void
-a8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned a;
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (ubyte) a;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_R16_SNORM ***/
-
-static void
-r16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) = MAX2(src[0] >> 7, 0);
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = 0;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-r16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, dst++) {
- dst[0] = TILE_PIXEL(p, j, i, 0) << 7;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
-
-static void
-r16g16b16a16_get_tile_rgba(const short *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src += 4) {
- TILE_PIXEL(p, j, i, 0) = src[0] >> 8;
- TILE_PIXEL(p, j, i, 1) = src[1] >> 8;
- TILE_PIXEL(p, j, i, 2) = src[2] >> 8;
- TILE_PIXEL(p, j, i, 3) = src[3] >> 8;
- }
- }
-}
-
-
-static void
-r16g16b16a16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, dst += 4) {
- dst[0] = TILE_PIXEL(p, j, i, 0) << 8;
- dst[1] = TILE_PIXEL(p, j, i, 1) << 8;
- dst[2] = TILE_PIXEL(p, j, i, 2) << 8;
- dst[3] = TILE_PIXEL(p, j, i, 3) << 8;
- }
- }
-}
-
-
-
-/*** PIPE_FORMAT_I8_UNORM ***/
-
-static void
-i8_get_tile_rgba(const ubyte *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++, src++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src;
- }
- }
-}
-
-
-static void
-i8_put_tile_rgba(ubyte *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r;
- r = TILE_PIXEL(p, j, i, 0);
- *dst++ = (ubyte) r;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_A8L8_UNORM ***/
-
-static void
-a8l8_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- ushort ra = *src++;
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) = ra & 0xff;
- TILE_PIXEL(p, j, i, 3) = ra >> 8;
- }
- }
-}
-
-
-static void
-a8l8_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- unsigned r, a;
- r = TILE_PIXEL(p, j, i, 0);
- a = TILE_PIXEL(p, j, i, 3);
- *dst++ = (a << 8) | r;
- }
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_Z32_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z32_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / (double) 0xffffffff;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (*src++ * scale);
- }
- }
-}
-
-
-/*** PIPE_FORMAT_S8Z24_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-s8z24_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ & 0xffffff));
- }
- }
-}
-
-
-/*** PIPE_FORMAT_Z24S8_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-z24s8_get_tile_rgba(const unsigned *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (float) (scale * (*src++ >> 8));
- }
- }
-}
-
-
-/*** PIPE_FORMAT_Z32_FLOAT ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z32f_get_tile_rgba(const float *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = *src++;
- }
- }
-}
-
-
-/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/
-
-/**
- * Convert YCbCr (or YCrCb) to RGBA.
- */
-static void
-ycbcr_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p,
- boolean rev)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- /* do two texels at a time */
- for (j = 0; j < (w & ~1); j += 2, src += 2) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j, i, 0) = r;
- TILE_PIXEL(p, j, i, 1) = g;
- TILE_PIXEL(p, j, i, 2) = b;
- TILE_PIXEL(p, j, i, 3) = 255;
-
- /* odd pixel: use y1,cr,cb */
- r = 1.164f * (y1-16) + 1.596f * (cr-128);
- g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y1-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j + 1, i, 0) = r;
- TILE_PIXEL(p, j + 1, i, 1) = g;
- TILE_PIXEL(p, j + 1, i, 2) = b;
- TILE_PIXEL(p, j + 1, i, 3) = 255;
- }
- /* do the last texel */
- if (w & 1) {
- const ushort t0 = src[0];
- const ushort t1 = src[1];
- const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */
- ubyte cb, cr;
- float r, g, b;
-
- if (rev) {
- cb = t1 & 0xff; /* chroma U */
- cr = t0 & 0xff; /* chroma V */
- }
- else {
- cb = t0 & 0xff; /* chroma U */
- cr = t1 & 0xff; /* chroma V */
- }
-
- /* even pixel: y0,cr,cb */
- r = 1.164f * (y0-16) + 1.596f * (cr-128);
- g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128);
- b = 1.164f * (y0-16) + 2.018f * (cb-128);
- TILE_PIXEL(p, j, i, 0) = r;
- TILE_PIXEL(p, j, i, 1) = g;
- TILE_PIXEL(p, j, i, 2) = b;
- TILE_PIXEL(p, j, i, 3) = 255;
- }
- }
-}
-
-
-static void
-fake_get_tile_rgba(const ushort *src,
- unsigned w, unsigned h,
- uint8_t *p)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- TILE_PIXEL(p, j, i, 0) =
- TILE_PIXEL(p, j, i, 1) =
- TILE_PIXEL(p, j, i, 2) =
- TILE_PIXEL(p, j, i, 3) = (i ^ j) & 1 ? 255 : 0;
- }
- }
-}
-
-
-static void
-lp_tile_raw_to_rgba_soa(enum pipe_format format,
- void *src,
- uint w, uint h,
- uint8_t *p)
-{
- switch (format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- a1r5g5b5_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- a4r4g4b4_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g6b5_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_get_tile_rgba((ubyte *) src, w, h, p);
- break;
- case PIPE_FORMAT_A8L8_UNORM:
- a8l8_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_get_tile_rgba((short *) src, w, h, p);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_get_tile_rgba((short *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- z16_get_tile_rgba((ushort *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z32_UNORM:
- z32_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- s8z24_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- z24s8_get_tile_rgba((unsigned *) src, w, h, p);
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- z32f_get_tile_rgba((float *) src, w, h, p);
- break;
- case PIPE_FORMAT_YCBCR:
- ycbcr_get_tile_rgba((ushort *) src, w, h, p, FALSE);
- break;
- case PIPE_FORMAT_YCBCR_REV:
- ycbcr_get_tile_rgba((ushort *) src, w, h, p, TRUE);
- break;
- default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
- fake_get_tile_rgba(src, w, h, p);
- }
-}
-
-
-void
-lp_get_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- uint8_t *p)
-{
- uint w = TILE_SIZE, h = TILE_SIZE;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, pt))
- return;
-
- packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
-
- if (!packed)
- return;
-
- if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
- assert((x & 1) == 0);
-
- pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
-
- lp_tile_raw_to_rgba_soa(pt->format, packed, w, h, p);
-
- FREE(packed);
-}
-
-
-void
-lp_put_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- const uint8_t *p)
-{
- uint w = TILE_SIZE, h = TILE_SIZE;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, pt))
- return;
-
- packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
-
- if (!packed)
- return;
-
- switch (pt->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_X8R8G8B8_UNORM:
- x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g6b5_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- assert(0);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_L8_UNORM:
- l8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A8_UNORM:
- a8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_I8_UNORM:
- i8_put_tile_rgba((ubyte *) packed, w, h, p);
- break;
- case PIPE_FORMAT_A8L8_UNORM:
- a8l8_put_tile_rgba((ushort *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R16_SNORM:
- r16_put_tile_rgba((short *) packed, w, h, p);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_put_tile_rgba((short *) packed, w, h, p);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- /*z16_put_tile_rgba((ushort *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_Z32_UNORM:
- /*z32_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- case PIPE_FORMAT_X8Z24_UNORM:
- /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p);*/
- break;
- default:
- debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
- }
-
- pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
-
- FREE(packed);
-}
-
-
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 3d8c703b73..040b01865d 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -64,14 +64,18 @@ tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH];
void
-lp_get_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- uint8_t *p);
+lp_tile_read_4ub(enum pipe_format format,
+ uint8_t *dst,
+ const void *src, unsigned src_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
void
-lp_put_tile_rgba_soa(struct pipe_transfer *pt,
- uint x, uint y,
- const uint8_t *p);
+lp_tile_write_4ub(enum pipe_format format,
+ const uint8_t *src,
+ void *dst, unsigned dst_stride,
+ unsigned x, unsigned y, unsigned w, unsigned h);
+
#ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
new file mode 100644
index 0000000000..004c5c979e
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+
+'''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Pixel format accessor functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+'''
+
+
+import sys
+import os.path
+
+sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '../../auxiliary/util'))
+
+from u_format_access import *
+
+
+def generate_format_read(format, dst_type, dst_native_type, dst_suffix):
+ '''Generate the function to read pixels from a particular format'''
+
+ name = short_name(format)
+
+ src_native_type = native_type(format)
+
+ print 'static void'
+ print 'lp_tile_%s_read_%s(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, dst_suffix, dst_native_type)
+ print '{'
+ print ' unsigned x, y;'
+ print ' const uint8_t *src_row = src + y0*src_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' const %s *src_pixel = (const %s *)(src_row + x0*%u);' % (src_native_type, src_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ names = ['']*4
+ if format.colorspace == 'rgb':
+ for i in range(4):
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ names[swizzle] += 'rgba'[i]
+ elif format.colorspace == 'zs':
+ swizzle = format.out_swizzle[0]
+ if swizzle < 4:
+ names[swizzle] = 'z'
+ else:
+ assert False
+ else:
+ assert False
+
+ if format.layout == ARITH:
+ print ' %s pixel = *src_pixel++;' % src_native_type
+ shift = 0;
+ for i in range(4):
+ src_type = format.in_types[i]
+ width = src_type.size
+ if names[i]:
+ value = 'pixel'
+ mask = (1 << width) - 1
+ if shift:
+ value = '(%s >> %u)' % (value, shift)
+ if shift + width < format.block_size():
+ value = '(%s & 0x%x)' % (value, mask)
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' %s %s = %s;' % (dst_native_type, names[i], value)
+ shift += width
+ elif format.layout == ARRAY:
+ for i in range(4):
+ src_type = format.in_types[i]
+ if names[i]:
+ value = '(*src_pixel++)'
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' %s %s = %s;' % (dst_native_type, names[i], value)
+ else:
+ assert False
+
+ for i in range(4):
+ if format.colorspace == 'rgb':
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ value = names[swizzle]
+ elif swizzle == SWIZZLE_0:
+ value = '0'
+ elif swizzle == SWIZZLE_1:
+ value = '1'
+ else:
+ assert False
+ elif format.colorspace == 'zs':
+ if i < 3:
+ value = 'z'
+ else:
+ value = '1'
+ else:
+ assert False
+ print ' TILE_PIXEL(dst, x, y, %u) = %s; /* %s */' % (i, value, 'rgba'[i])
+
+ print ' }'
+ print ' src_row += src_stride;'
+ print ' }'
+ print '}'
+ print
+
+
+def generate_format_write(format, src_type, src_native_type, src_suffix):
+ '''Generate the function to write pixels to a particular format'''
+
+ name = short_name(format)
+
+ dst_native_type = native_type(format)
+
+ print 'static void'
+ print 'lp_tile_%s_write_%s(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h)' % (name, src_suffix, src_native_type)
+ print '{'
+ print ' unsigned x, y;'
+ print ' uint8_t *dst_row = dst + y0*dst_stride;'
+ print ' for (y = 0; y < h; ++y) {'
+ print ' %s *dst_pixel = (%s *)(dst_row + x0*%u);' % (dst_native_type, dst_native_type, format.stride())
+ print ' for (x = 0; x < w; ++x) {'
+
+ inv_swizzle = [None]*4
+ if format.colorspace == 'rgb':
+ for i in range(4):
+ swizzle = format.out_swizzle[i]
+ if swizzle < 4:
+ inv_swizzle[swizzle] = i
+ elif format.colorspace == 'zs':
+ swizzle = format.out_swizzle[0]
+ if swizzle < 4:
+ inv_swizzle[swizzle] = 0
+ else:
+ assert False
+
+ if format.layout == ARITH:
+ print ' %s pixel = 0;' % dst_native_type
+ shift = 0;
+ for i in range(4):
+ dst_type = format.in_types[i]
+ width = dst_type.size
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ if shift:
+ value = '(%s << %u)' % (value, shift)
+ print ' pixel |= %s;' % value
+ shift += width
+ print ' *dst_pixel++ = pixel;'
+ elif format.layout == ARRAY:
+ for i in range(4):
+ dst_type = format.in_types[i]
+ if inv_swizzle[i] is not None:
+ value = 'TILE_PIXEL(src, x, y, %u)' % inv_swizzle[i]
+ value = conversion_expr(src_type, dst_type, dst_native_type, value)
+ print ' *dst_pixel++ = %s;' % value
+ else:
+ assert False
+
+ print ' }'
+ print ' dst_row += dst_stride;'
+ print ' }'
+ print '}'
+ print
+
+
+def generate_read(formats, dst_type, dst_native_type, dst_suffix):
+ '''Generate the dispatch function to read pixels from any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_read(format, dst_type, dst_native_type, dst_suffix)
+
+ print 'void'
+ print 'lp_tile_read_%s(enum pipe_format format, %s *dst, const void *src, unsigned src_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (dst_suffix, dst_native_type)
+ print '{'
+ print ' void (*func)(%s *dst, const uint8_t *src, unsigned src_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % dst_native_type
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_read_%s;' % (short_name(format), dst_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("unsupported format\\n");'
+ print ' return;'
+ print ' }'
+ print ' func(dst, (const uint8_t *)src, src_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def generate_write(formats, src_type, src_native_type, src_suffix):
+ '''Generate the dispatch function to write pixels to any format'''
+
+ for format in formats:
+ if is_format_supported(format):
+ generate_format_write(format, src_type, src_native_type, src_suffix)
+
+ print 'void'
+ print 'lp_tile_write_%s(enum pipe_format format, const %s *src, void *dst, unsigned dst_stride, unsigned x, unsigned y, unsigned w, unsigned h)' % (src_suffix, src_native_type)
+
+ print '{'
+ print ' void (*func)(const %s *src, uint8_t *dst, unsigned dst_stride, unsigned x0, unsigned y0, unsigned w, unsigned h);' % src_native_type
+ print ' switch(format) {'
+ for format in formats:
+ if is_format_supported(format):
+ print ' case %s:' % format.name
+ print ' func = &lp_tile_%s_write_%s;' % (short_name(format), src_suffix)
+ print ' break;'
+ print ' default:'
+ print ' debug_printf("unsupported format\\n");'
+ print ' return;'
+ print ' }'
+ print ' func(src, (uint8_t *)dst, dst_stride, x, y, w, h);'
+ print '}'
+ print
+
+
+def main():
+ formats = []
+ for arg in sys.argv[1:]:
+ formats.extend(parse(arg))
+
+ print '/* This file is autogenerated by lp_tile_soa.py from u_format.csv. Do not edit directly. */'
+ print
+ # This will print the copyright message on the top of this file
+ print __doc__.strip()
+ print
+ print '#include "pipe/p_compiler.h"'
+ print '#include "util/u_format.h"'
+ print '#include "util/u_math.h"'
+ print '#include "lp_tile_soa.h"'
+ print
+ print 'const unsigned char'
+ print 'tile_offset[TILE_VECTOR_HEIGHT][TILE_VECTOR_WIDTH] = {'
+ print ' { 0, 1, 4, 5, 8, 9, 12, 13},'
+ print ' { 2, 3, 6, 7, 10, 11, 14, 15}'
+ print '};'
+ print
+
+ generate_clamp()
+
+ type = Type(UNSIGNED, True, 8)
+ native_type = 'uint8_t'
+ suffix = '4ub'
+
+ generate_read(formats, type, native_type, suffix)
+ generate_write(formats, type, native_type, suffix)
+
+
+if __name__ == '__main__':
+ main()