From c11a7ec821d41b91a3825c5abfb4687c98b5bf98 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 8 Jun 2008 03:04:14 -0400 Subject: Initial commit for g3dvl. Initial commit for g3dvl, contains support for basic XvMC features. - Context, surface, block, macroblock creation and deletion - Surface rendering - Frame pictures - Frame based motion compensation - Intra-coded macroblocks - Predicted macroblocks - Bi-directionally predicted macroblocks - Surface display - Color conversion - Scaling --- src/gallium/state_trackers/g3dvl/Makefile | 18 + src/gallium/state_trackers/g3dvl/tests/.gitignore | 6 + src/gallium/state_trackers/g3dvl/tests/Makefile | 42 + .../state_trackers/g3dvl/tests/test_b_rendering.c | 226 ++ .../state_trackers/g3dvl/tests/test_context.c | 22 + .../state_trackers/g3dvl/tests/test_i_rendering.c | 137 ++ .../state_trackers/g3dvl/tests/test_p_rendering.c | 214 ++ .../state_trackers/g3dvl/tests/test_surface.c | 26 + src/gallium/state_trackers/g3dvl/vl_context.c | 2293 ++++++++++++++++++++ src/gallium/state_trackers/g3dvl/vl_context.h | 73 + src/gallium/state_trackers/g3dvl/vl_data.c | 188 ++ src/gallium/state_trackers/g3dvl/vl_data.h | 25 + src/gallium/state_trackers/g3dvl/vl_defs.h | 12 + src/gallium/state_trackers/g3dvl/vl_surface.c | 539 +++++ src/gallium/state_trackers/g3dvl/vl_surface.h | 81 + src/gallium/state_trackers/g3dvl/vl_types.h | 88 + 16 files changed, 3990 insertions(+) create mode 100644 src/gallium/state_trackers/g3dvl/Makefile create mode 100644 src/gallium/state_trackers/g3dvl/tests/.gitignore create mode 100644 src/gallium/state_trackers/g3dvl/tests/Makefile create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_context.c create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_surface.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_context.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_context.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_data.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_data.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_defs.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_surface.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_surface.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_types.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile new file mode 100644 index 0000000000..a0d85fbcc8 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -0,0 +1,18 @@ +TARGET = libg3dvl.a +OBJECTS = vl_context.o vl_data.o vl_surface.o +GALLIUMDIR = ../.. + +CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + +clean: + rm -rf ${OBJECTS} ${TARGET} + diff --git a/src/gallium/state_trackers/g3dvl/tests/.gitignore b/src/gallium/state_trackers/g3dvl/tests/.gitignore new file mode 100644 index 0000000000..939666da9a --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/.gitignore @@ -0,0 +1,6 @@ +test_context +test_surface +test_i_rendering +test_p_rendering +test_b_rendering + diff --git a/src/gallium/state_trackers/g3dvl/tests/Makefile b/src/gallium/state_trackers/g3dvl/tests/Makefile new file mode 100644 index 0000000000..8f983593c3 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/Makefile @@ -0,0 +1,42 @@ +GALLIUMDIR = ../../.. + +CFLAGS += -g -Wall -Werror \ + -I${GALLIUMDIR}/state_trackers/g3dvl \ + -I${GALLIUMDIR}/winsys/g3dvl \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/drivers +LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ + -L${GALLIUMDIR}/drivers/softpipe \ + -L${GALLIUMDIR}/auxiliary/tgsi \ + -L${GALLIUMDIR}/auxiliary/draw \ + -L${GALLIUMDIR}/auxiliary/util \ + -L${GALLIUMDIR}/auxiliary/translate \ + -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/auxiliary/rtasm +LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil -lX11 -lm + +############################################# + +.PHONY = all clean + +all: test_context test_surface test_i_rendering test_p_rendering test_b_rendering + +test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_surface: test_surface.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + +clean: + rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_b_rendering + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c new file mode 100644 index 0000000000..b78cc851ae --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + -0x00A5,-0x00A5,-0x00A5,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, + -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, + -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5, + -0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5,-0x00A5, + + -0x004F,-0x004F,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F, + -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F,-0x004F, + + -0x003E,-0x003E,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E, + -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E,-0x003E +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *past_sfc, *future_sfc; + struct VL_MOTION_VECTOR motion_vector[2] = + { + { + {0, 0}, {0, 0} + }, + { + {0, 0}, {0, 0} + } + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &past_sfc); + vlCreateSurface(ctx, &future_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); + vlRenderBMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FRAME_MC, + motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + past_sfc, + future_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(past_sfc); + vlDestroySurface(future_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_context.c b/src/gallium/state_trackers/g3dvl/tests/test_context.c new file mode 100644 index 0000000000..2002977ee2 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_context.c @@ -0,0 +1,22 @@ +#include +#include + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + + Display *display; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + + display = XOpenDisplay(NULL); + pipe = create_pipe_context(display); + + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlDestroyContext(ctx); + + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c new file mode 100644 index 0000000000..1f96471130 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c new file mode 100644 index 0000000000..2203349784 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + -51,-51,-51, 0,-51, 0, 0, 0, + 0,-51, 0, 0,-51, 0, 0, 0, + 0,-51, 0, 0,-51,-51, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 99, 99, 99, 0, 0, 0, 0, 0, + 0, 0, 99, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 33, 33, 33, 0, 0, 0, 0, 0, + 0, 0, 33, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *ref_sfc; + struct VL_MOTION_VECTOR motion_vector = + { + {0, 0}, {0, 0} + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &ref_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderPMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FRAME_MC, + &motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + ref_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(ref_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/tests/test_surface.c b/src/gallium/state_trackers/g3dvl/tests/test_surface.c new file mode 100644 index 0000000000..4d1946396a --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_surface.c @@ -0,0 +1,26 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + + Display *display; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc; + + display = XOpenDisplay(NULL); + pipe = create_pipe_context(display); + + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlDestroySurface(sfc); + vlDestroyContext(ctx); + + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c new file mode 100644 index 0000000000..7193f7ccea --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -0,0 +1,2293 @@ +#include "vl_context.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vl_data.h" + +static int vlInitIDCT(struct VL_CONTEXT *context) +{ + assert(context); + + + + return 0; +} + +static int vlDestroyIDCT(struct VL_CONTEXT *context) +{ + assert(context); + + + + return 0; +} + +static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + const unsigned int num_attribs = 4; + const unsigned int semantic_names[4] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int semantic_indexes[4] = {0, 1, 2, 3}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 1; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move texcoords to output + mov o2, i2 + mov o3, i3 + */ + for (i = 1; i < num_attribs; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); + + return 0; +} + +static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) */ + for (i = 0; i < 3; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare output (color) */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 3; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel + tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel + tex2d o0.z, i2, s2 ; Read texel from chroma Cr texture into .z channel + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); + + return 0; +} + +static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_attribs = 5; + const unsigned int semantic_names[5] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int semantic_indexes[5] = {0, 1, 2, 3, 4}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] translates the ref surface texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 2; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + mov o3, i3 + */ + for (i = 1; i < num_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i4, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o4, t0, c2 ; Translate texcoords into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.p_vs = pipe->create_vs_state(pipe, &vs); + + return 0; +} + +static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) */ + for (i = 0; i < 4; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[0] is a bias to get differential back to -1,1 range*/ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 1; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 4; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i2, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.p_fs = pipe->create_fs_state(pipe, &fs); + + return 0; +} + +static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_attribs = 6; + const unsigned int semantic_names[6] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] translates the past surface texcoords to the ref macroblock + C[3] translates the future surface texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 3; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + mov o3, i3 + */ + for (i = 1; i < num_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i4, c0 ; Scale normalized coords to window coords + add o4, t0, c2 ; Translate texcoords into position + mul t1, i5, c0 ; Repeat for the future surface + add o5, t1, c3 */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.b_vs = pipe->create_vs_state(pipe, &vs); + + return 0; +} + +static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) */ + for (i = 0; i < 5; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[0] is a bias to get differential back to -1,1 range*/ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 1; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 5; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i2, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i3, s3 ; Read texel from past macroblock + tex2d t2, i4, s4 ; Read texel from future macroblock */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 1; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add t0, t0, t1 ; Add past and differential to form partial output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t2 ; Add future and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.b_fs = pipe->create_fs_state(pipe, &fs); + + return 0; +} + +int vlCreateDataBufsMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + /* Create our vertex buffer and vertex buffer element */ + context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.mc.vertex_bufs[0].max_index = 23; + context->states.mc.vertex_bufs[0].buffer_offset = 0; + context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 24 + ); + + context->states.mc.vertex_buf_elems[0].src_offset = 0; + context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.mc.vertex_buf_elems[0].nr_components = 2; + context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our texcoord buffers and texcoord buffer elements */ + /* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */ + for (i = 1; i < 6; ++i) + { + context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.mc.vertex_bufs[i].max_index = 23; + context->states.mc.vertex_bufs[i].buffer_offset = 0; + context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + context->states.mc.vertex_buf_elems[i].src_offset = 0; + context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.mc.vertex_buf_elems[i].nr_components = 2; + context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + } + + /* Fill buffers */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_VERTEX2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_luma_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + /* TODO: Accomodate 422, 444 */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[3].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_chroma_420_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[4].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_ref_surface_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[5].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_ref_surface_texcoords, + sizeof(struct VL_TEXCOORD2F) * 24 + ); + + for (i = 0; i < 6; ++i) + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + + /* Create our constant buffer */ + context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS); + context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.vs_const_buf.size + ); + + context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS); + context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.mc.fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_mc_fs_consts, + sizeof(struct VL_MC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitMC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + context->states.mc.viewport.scale[0] = context->video_width; + context->states.mc.viewport.scale[1] = context->video_height; + context->states.mc.viewport.scale[2] = 1; + context->states.mc.viewport.scale[3] = 1; + context->states.mc.viewport.translate[0] = 0; + context->states.mc.viewport.translate[1] = 0; + context->states.mc.viewport.translate[2] = 0; + context->states.mc.viewport.translate[3] = 0; + + context->states.mc.render_target.width = context->video_width; + context->states.mc.render_target.height = context->video_height; + context->states.mc.render_target.num_cbufs = 1; + /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ + /*context->states.mc.render_target.cbufs[0] = ;*/ + context->states.mc.render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_NEAREST; + filters[4] = PIPE_TEX_FILTER_NEAREST; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8 * 4; + template.depth[0] = 1; + template.compressed = 0; + template.cpp = 2; + context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); + + if (context->video_format == VL_FORMAT_YCBCR_420) + template.height[0] = 8; + else if (context->video_format == VL_FORMAT_YCBCR_422) + template.height[0] = 8 * 2; + else if (context->video_format == VL_FORMAT_YCBCR_444) + template.height[0] = 8 * 4; + else + assert(0); + + context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template); + context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template); + + /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */ + + vlCreateVertexShaderIMC(context); + vlCreateFragmentShaderIMC(context); + vlCreateVertexShaderPMC(context); + vlCreateFragmentShaderPMC(context); + vlCreateVertexShaderBMC(context); + vlCreateFragmentShaderBMC(context); + vlCreateDataBufsMC(context); + + return 0; +} + +static int vlDestroyMC(struct VL_CONTEXT *context) +{ + unsigned int i; + + assert(context); + + for (i = 0; i < 5; ++i) + { + context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); + } + + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[5].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_release(&context->states.mc.textures[i]); + + context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); + context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); + context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs); + context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs); + context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs); + context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs); + + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); + + return 0; +} + +static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + const unsigned int num_attribs = 2; + const unsigned int semantic_names[2] = {TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC}; + const unsigned int semantic_indexes[2] = {0, 1}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* MOV instructions */ + /* mov o0, i0 + mov o1, i1 */ + for (i = 0; i < num_attribs; i++) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); + + return 0; +} + +static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare constant input */ + /* Constants include bias vector, 4x4 csc matrix, total 5 vectors */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 4; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* TEX instruction */ + /* tex2d t0, i0, s0 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* SUB instruction */ + /* sub t0, t0, c0 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* DP4 instruction */ + /* dp4 o0.x, t0, c1 + dp4 o0.y, t0, c2 + dp4 o0.z, t0, c3 + dp4 o0.w, t0, c4 */ + for (i = 0; i < 4; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_DP4; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); + + return 0; +} + +static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* + Create our vertex buffer and vertex buffer element + VB contains 4 vertices that render a quad covering the entire window + to display a rendered surface + Quad is rendered as a tri strip + */ + context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); + context->states.csc.vertex_bufs[0].max_index = 3; + context->states.csc.vertex_bufs[0].buffer_offset = 0; + context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_vertex_positions, + sizeof(struct VL_VERTEX2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + + context->states.csc.vertex_buf_elems[0].src_offset = 0; + context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0; + context->states.csc.vertex_buf_elems[0].nr_components = 2; + context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our texcoord buffer and texcoord buffer element + Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F); + context->states.csc.vertex_bufs[1].max_index = 3; + context->states.csc.vertex_bufs[1].buffer_offset = 0; + context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + vl_surface_texcoords, + sizeof(struct VL_TEXCOORD2F) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + + context->states.csc.vertex_buf_elems[1].src_offset = 0; + context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1; + context->states.csc.vertex_buf_elems[1].nr_components = 2; + context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our fragment shader's constant buffer + Const buffer contains the color conversion matrix and bias vectors + */ + context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS); + context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.fs_const_buf.size + ); + + /* + TODO: Refactor this into a seperate function, + allow changing the csc matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &vl_csc_fs_consts_601, + sizeof(struct VL_CSC_FS_CONSTS) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCSC(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(context); + + pipe = context->pipe; + + /* Delay creating the FB until vlPutSurface() so we know window size */ + context->states.csc.framebuffer.num_cbufs = 1; + context->states.csc.framebuffer.cbufs[0] = NULL; + context->states.csc.framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShaderCSC(context); + vlCreateFragmentShaderCSC(context); + vlCreateDataBufsCSC(context); + + return 0; +} + +static int vlDestroyCSC(struct VL_CONTEXT *context) +{ + assert(context); + + /* + Since we create the final FB when we display our first surface, + it may not be created if vlPutSurface() is never called + */ + if (context->states.csc.framebuffer.cbufs[0]) + context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]); + context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler); + context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader); + context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); + + return 0; +} + +static int vlInitCommon(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + rast.flatshade = 1; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + /* Don't need clipping, but viewport mapping done here */ + rast.bypass_clipping = 0; + rast.bypass_vs = 0; + rast.origin_lower_left = 0; + rast.line_width = 1; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->states.common.raster); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + context->states.common.blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->states.common.blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].value_mask = 0; + dsa.stencil[i].write_mask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref = 0; + context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa); + + return 0; +} + +static int vlDestroyCommon(struct VL_CONTEXT *context) +{ + assert(context); + + context->pipe->delete_blend_state(context->pipe, context->states.common.blend); + context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa); + + return 0; +} + +static int vlInit(struct VL_CONTEXT *context) +{ + assert(context); + + vlInitCommon(context); + vlInitCSC(context); + vlInitMC(context); + vlInitIDCT(context); + + return 0; +} + +static int vlDestroy(struct VL_CONTEXT *context) +{ + assert(context); + + /* Must unbind shaders before we can delete them for some reason */ + context->pipe->bind_vs_state(context->pipe, NULL); + context->pipe->bind_fs_state(context->pipe, NULL); + + vlDestroyCommon(context); + vlDestroyCSC(context); + vlDestroyMC(context); + vlDestroyIDCT(context); + + return 0; +} + +int vlCreateContext +( + Display *display, + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum VL_FORMAT video_format, + struct VL_CONTEXT **context +) +{ + struct VL_CONTEXT *ctx; + + assert(display); + assert(pipe); + assert(context); + + ctx = calloc(1, sizeof(struct VL_CONTEXT)); + + ctx->display = display; + ctx->pipe = pipe; + ctx->video_width = video_width; + ctx->video_height = video_height; + ctx->video_format = video_format; + + vlInit(ctx); + + /* Since we only change states in vlPutSurface() we need to start in render mode */ + vlBeginRender(ctx); + + *context = ctx; + + return 0; +} + +int vlDestroyContext(struct VL_CONTEXT *context) +{ + assert(context); + + vlDestroy(context); + + free(context); + + return 0; +} + +int vlBeginRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + /* Frame buffer set in vlRender*Macroblock() */ + /* Shaders, samplers, textures, VBs, VB elements set in vlRender*Macroblock() */ + pipe->set_viewport_state(pipe, &context->states.mc.viewport); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); + + return 0; +} + +int vlEndRender(struct VL_CONTEXT *context) +{ + struct pipe_context *pipe; + + assert(context); + + pipe = context->pipe; + + pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer); + pipe->set_viewport_state(pipe, &context->states.csc.viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler); + /* Source texture set in vlPutSurface() */ + pipe->bind_vs_state(pipe, context->states.csc.vertex_shader); + pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); + pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); + pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h new file mode 100644 index 0000000000..0aeba184cc --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -0,0 +1,73 @@ +#ifndef vl_context_h +#define vl_context_h + +#include +#include +#include "vl_types.h" + +struct pipe_context; + +struct VL_CONTEXT +{ + Display *display; + struct pipe_context *pipe; + unsigned int video_width; + unsigned int video_height; + enum VL_FORMAT video_format; + + struct + { + struct + { + struct pipe_rasterizer_state *raster; + struct pipe_depth_stencil_alpha_state *dsa; + struct pipe_blend_state *blend; + } common; + + struct + { + } idct; + + struct + { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[5]; + struct pipe_shader_state *i_vs, *p_vs, *b_vs; + struct pipe_shader_state *i_fs, *p_fs, *b_fs; + struct pipe_vertex_buffer vertex_bufs[6]; + struct pipe_vertex_element vertex_buf_elems[6]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; + } mc; + + struct + { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_state *sampler; + struct pipe_shader_state *vertex_shader, *fragment_shader; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_buf_elems[2]; + struct pipe_constant_buffer fs_const_buf; + } csc; + } states; +}; + +int vlCreateContext +( + Display *display, + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum VL_FORMAT video_format, + struct VL_CONTEXT **context +); + +int vlDestroyContext(struct VL_CONTEXT *context); + +int vlBeginRender(struct VL_CONTEXT *context); +int vlEndRender(struct VL_CONTEXT *context); + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c new file mode 100644 index 0000000000..c04163276d --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -0,0 +1,188 @@ +#include "vl_data.h" + +/* + * Represents 8 triangles (4 quads, 1 per block) in noormalized coords + * that render a macroblock. + * Need to be scaled to cover mbW*mbH macroblock pixels and translated into + * position on target surface. + */ +const struct VL_VERTEX2F vl_mb_vertex_positions[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, + {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, + + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, + {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, + + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 4 luma blocks arranged + * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. + */ +const struct VL_TEXCOORD2F vl_luma_texcoords[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, + + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, + {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, + + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, + {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 1 chroma block. + * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. + */ +const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; + +/* + * Represents texcoords for the above for rendering 2 chroma blocks arranged + * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. + * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. + */ +const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; + +/* + * Represents texcoords for the above for rendering 4 chroma blocks. + * Same case as 4 luma blocks. + */ +const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords; + +/* + * Represents texcoords for the above for rendering a predicted macroblock. + * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. + * Texcoords need to be translated to cover source macroblock on the + * past/future surface. + */ + const struct VL_TEXCOORD2F *vl_ref_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +const struct VL_VERTEX2F vl_surface_vertex_positions[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + */ +const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_surface_vertex_positions; + +/* + * Used when rendering P and B macroblocks, multiplier is applied to the A channel, + * which is then added to the L channel, then the bias is subtracted from that to + * get back the differential. The differential is then added to the samples from the + * reference surface(s). + */ +const struct VL_MC_FS_CONSTS vl_mc_fs_consts = +{ + {256.0f, 256.0f, 256.0f, 0.0f}, + {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f} +}; + +/* + * Identity color conversion constants, for debugging + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h new file mode 100644 index 0000000000..67a0a74990 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_data.h @@ -0,0 +1,25 @@ +#ifndef vl_data_h +#define vl_data_h + +#include "vl_types.h" + +extern const struct VL_VERTEX2F vl_mb_vertex_positions[24]; +extern const struct VL_TEXCOORD2F vl_luma_texcoords[24]; +extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords; +extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords; +extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords; +extern const struct VL_TEXCOORD2F *vl_ref_surface_texcoords; + +extern const struct VL_VERTEX2F vl_surface_vertex_positions[4]; +extern const struct VL_TEXCOORD2F *vl_surface_texcoords; + +extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; + +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; +extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h new file mode 100644 index 0000000000..e668a7a10e --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_defs.h @@ -0,0 +1,12 @@ +#ifndef vl_defs_h +#define vl_defs_h + +#define VL_BLOCK_WIDTH 8 +#define VL_BLOCK_HEIGHT 8 +#define VL_BLOCK_SIZE (VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT) +#define VL_MACROBLOCK_WIDTH 16 +#define VL_MACROBLOCK_HEIGHT 16 +#define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT) + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c new file mode 100644 index 0000000000..e58e434dab --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -0,0 +1,539 @@ +#include "vl_surface.h" +#include +#include +#include +#include +#include +#include +#include "vl_context.h" +#include "vl_defs.h" + +static int vlGrabBlocks +( + struct VL_CONTEXT *context, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + enum VL_SAMPLE_TYPE sample_type, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int b, x, y, y2; + + assert(context); + assert(blocks); + + tex_surface = context->pipe->screen->get_tex_surface + ( + context->pipe->screen, + context->states.mc.textures[0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, 0); + + for (b = 0; b < 4; ++b) + { + if ((coded_block_pattern >> b) & 1) + { + if (dct_type == VL_DCT_FRAME_CODED) + { + if (sample_type == VL_FULL_SAMPLE) + { + for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) + memcpy + ( + texels + y * tex_surface->pitch, + blocks + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + } + else + { + for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = + blocks[y * VL_BLOCK_WIDTH + x] + 0x100; + } + } + else + { + if (sample_type == VL_FULL_SAMPLE) + { + for + ( + y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; + y < VL_BLOCK_HEIGHT * ((b % 2) + 1); + y += 2, ++y2 + ) + memcpy + ( + texels + y * tex_surface->pitch, + blocks + y2 * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + for + ( + y = VL_BLOCK_HEIGHT * ((b % 2) + 2); + y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); + y += 2, ++y2 + ) + memcpy + ( + texels + y * tex_surface->pitch, + blocks + y2 * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + } + else + { + for + ( + y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; + y < VL_BLOCK_HEIGHT * ((b % 2) + 1); + y += 2, ++y2 + ) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = + blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; + for + ( + y = VL_BLOCK_HEIGHT * ((b % 2) + 2); + y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); + y += 2, ++y2 + ) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = + blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; + } + } + } + else + { + for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) + { + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = 0x100; + } + } + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + for (b = 0; b < 2; ++b) + { + tex_surface = context->pipe->screen->get_tex_surface + ( + context->pipe->screen, + context->states.mc.textures[b + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, 0); + + if ((coded_block_pattern >> (b + 4)) & 1) + { + if (sample_type == VL_FULL_SAMPLE) + { + for (y = 0; y < tex_surface->height; ++y) + memcpy + ( + texels + y * tex_surface->pitch, + blocks + VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + } + else + { + for (y = 0; y < tex_surface->height; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = + blocks[VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH + x] + 0x100; + } + } + else + { + for (y = 0; y < tex_surface->height; ++y) + { + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + texels[y * tex_surface->pitch + x] = 0x100; + } + } + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) +{ + struct pipe_context *pipe; + struct pipe_texture template; + struct VL_SURFACE *sfc; + + assert(context); + assert(surface); + + pipe = context->pipe; + + sfc = calloc(1, sizeof(struct VL_SURFACE)); + + sfc->context = context; + sfc->width = context->video_width; + sfc->height = context->video_height; + sfc->format = context->video_format; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = sfc->width; + template.height[0] = sfc->height; + template.depth[0] = 1; + template.compressed = 0; + template.cpp = 4; + + sfc->texture = pipe->screen->texture_create(pipe->screen, &template); + + *surface = sfc; + + return 0; +} + +int vlDestroySurface(struct VL_SURFACE *surface) +{ + assert(surface); + pipe_texture_release(&surface->texture); + free(surface); + + return 0; +} + +int vlRenderIMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vscbdata; + + assert(blocks); + assert(surface); + + /* TODO: Implement interlaced rendering */ + /*assert(picture_type == VL_FRAME_PICTURE);*/ + if (picture_type != VL_FRAME_PICTURE) + { + /*fprintf(stderr, "field picture (I) unimplemented, ignoring\n");*/ + return 0; + } + + pipe = surface->context->pipe; + + vscbdata = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vscbdata->scale.z = 1.0f; + vscbdata->scale.w = 1.0f; + vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vscbdata->mb_pos_trans.z = 0.0f; + vscbdata->mb_pos_trans.w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers); + pipe->set_vertex_buffers(pipe, 4, surface->context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 4, surface->context->states.mc.vertex_buf_elems); + pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); + pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderPMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *ref_surface, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vscbdata; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + /*assert(picture_type == VL_FRAME_PICTURE);*/ + if (picture_type != VL_FRAME_PICTURE) + { + /*fprintf(stderr, "field picture (P) unimplemented, ignoring\n");*/ + return 0; + } + /* TODO: Implement field based motion compensation */ + /*assert(mc_type == VL_FRAME_MC);*/ + if (mc_type != VL_FRAME_MC) + { + /*fprintf(stderr, "field MC (P) unimplemented, ignoring\n");*/ + return 0; + } + + pipe = surface->context->pipe; + + vscbdata = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vscbdata->scale.z = 1.0f; + vscbdata->scale.w = 1.0f; + vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vscbdata->mb_pos_trans.z = 0.0f; + vscbdata->mb_pos_trans.w = 0.0f; + vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; + vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; + vscbdata->mb_tc_trans[0].z = 0.0f; + vscbdata->mb_tc_trans[0].w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + + surface->context->states.mc.textures[3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); + pipe->set_vertex_buffers(pipe, 5, surface->context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 5, surface->context->states.mc.vertex_buf_elems); + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderBMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *past_surface, + struct VL_SURFACE *future_surface, + struct VL_SURFACE *surface +) +{ + struct pipe_context *pipe; + struct VL_MC_VS_CONSTS *vscbdata; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + /*assert(picture_type == VL_FRAME_PICTURE);*/ + if (picture_type != VL_FRAME_PICTURE) + { + /*fprintf(stderr, "field picture (B) unimplemented, ignoring\n");*/ + return 0; + } + /* TODO: Implement field based motion compensation */ + /*assert(mc_type == VL_FRAME_MC);*/ + if (mc_type != VL_FRAME_MC) + { + /*fprintf(stderr, "field MC (B) unimplemented, ignoring\n");*/ + return 0; + } + + pipe = surface->context->pipe; + + vscbdata = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.mc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vscbdata->scale.z = 1.0f; + vscbdata->scale.w = 1.0f; + vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vscbdata->mb_pos_trans.z = 0.0f; + vscbdata->mb_pos_trans.w = 0.0f; + vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; + vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; + vscbdata->mb_tc_trans[0].z = 0.0f; + vscbdata->mb_tc_trans[0].w = 0.0f; + vscbdata->mb_tc_trans[1].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; + vscbdata->mb_tc_trans[1].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; + vscbdata->mb_tc_trans[1].z = 0.0f; + vscbdata->mb_tc_trans[1].w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + + surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); + + surface->context->states.mc.textures[3] = past_surface->texture; + surface->context->states.mc.textures[4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); + pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); + pipe->set_vertex_buffers(pipe, 6, surface->context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 6, surface->context->states.mc.vertex_buf_elems); + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlPutSurface +( + struct VL_SURFACE *surface, + Drawable drawable, + unsigned int srcx, + unsigned int srcy, + unsigned int srcw, + unsigned int srch, + unsigned int destx, + unsigned int desty, + unsigned int destw, + unsigned int desth, + enum VL_PICTURE picture_type +) +{ + unsigned int create_fb = 0; + struct pipe_context *pipe; + + assert(surface); + + pipe = surface->context->pipe; + + if (!surface->context->states.csc.framebuffer.cbufs[0]) + create_fb = 1; + else if + ( + surface->context->states.csc.framebuffer.width != destw || + surface->context->states.csc.framebuffer.height != desth + ) + { + pipe->winsys->surface_release + ( + pipe->winsys, + &surface->context->states.csc.framebuffer.cbufs[0] + ); + + create_fb = 1; + } + + if (create_fb) + { + surface->context->states.csc.viewport.scale[0] = destw; + surface->context->states.csc.viewport.scale[1] = desth; + surface->context->states.csc.viewport.scale[2] = 1; + surface->context->states.csc.viewport.scale[3] = 1; + surface->context->states.csc.viewport.translate[0] = 0; + surface->context->states.csc.viewport.translate[1] = 0; + surface->context->states.csc.viewport.translate[2] = 0; + surface->context->states.csc.viewport.translate[3] = 0; + + surface->context->states.csc.framebuffer.width = destw; + surface->context->states.csc.framebuffer.height = desth; + surface->context->states.csc.framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); + pipe->winsys->surface_alloc_storage + ( + pipe->winsys, + surface->context->states.csc.framebuffer.cbufs[0], + destw, + desth, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, + 0 + ); + } + + vlEndRender(surface->context); + + pipe->set_sampler_textures(pipe, 1, &surface->texture); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + pipe->winsys->flush_frontbuffer + ( + pipe->winsys, + surface->context->states.csc.framebuffer.cbufs[0], + &drawable + ); + + vlBeginRender(surface->context); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h new file mode 100644 index 0000000000..9f56b77e1e --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -0,0 +1,81 @@ +#ifndef vl_surface_h +#define vl_surface_h + +#include +#include "vl_types.h" + +struct pipe_texture; + +struct VL_SURFACE +{ + struct VL_CONTEXT *context; + unsigned int width; + unsigned int height; + enum VL_FORMAT format; + struct pipe_texture *texture; +}; + +int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface); + +int vlDestroySurface(struct VL_SURFACE *surface); + +int vlRenderIMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *surface +); + +int vlRenderPMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *ref_surface, + struct VL_SURFACE *surface +); + +int vlRenderBMacroBlock +( + enum VL_PICTURE picture_type, + enum VL_FIELD_ORDER field_order, + unsigned int mbx, + unsigned int mby, + enum VL_MC_TYPE mc_type, + struct VL_MOTION_VECTOR *motion_vector, + unsigned int coded_block_pattern, + enum VL_DCT_TYPE dct_type, + short *blocks, + struct VL_SURFACE *past_surface, + struct VL_SURFACE *future_surface, + struct VL_SURFACE *surface +); + +int vlPutSurface +( + struct VL_SURFACE *surface, + Drawable drawable, + unsigned int srcx, + unsigned int srcy, + unsigned int srcw, + unsigned int srch, + unsigned int destx, + unsigned int desty, + unsigned int destw, + unsigned int desth, + enum VL_PICTURE picture_type +); + +#endif + diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h new file mode 100644 index 0000000000..7040b74503 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -0,0 +1,88 @@ +#ifndef vl_types_h +#define vl_types_h + +enum VL_FORMAT +{ + VL_FORMAT_YCBCR_420, + VL_FORMAT_YCBCR_422, + VL_FORMAT_YCBCR_444 +}; + +enum VL_PICTURE +{ + VL_TOP_FIELD, + VL_BOTTOM_FIELD, + VL_FRAME_PICTURE +}; + +enum VL_FIELD_ORDER +{ + VL_FIELD_FIRST, + VL_FIELD_SECOND +}; + +enum VL_DCT_TYPE +{ + VL_DCT_FIELD_CODED, + VL_DCT_FRAME_CODED +}; + +enum VL_SAMPLE_TYPE +{ + VL_FULL_SAMPLE, + VL_DIFFERENCE_SAMPLE +}; + +enum VL_MC_TYPE +{ + VL_FIELD_MC, + VL_FRAME_MC +}; + +struct VL_VERTEX4F +{ + float x, y, z, w; +}; + +struct VL_VERTEX2F +{ + float x, y; +}; + +struct VL_TEXCOORD2F +{ + float s, t; +}; + +struct VL_MC_VS_CONSTS +{ + struct VL_VERTEX4F scale; + struct VL_VERTEX4F mb_pos_trans; + struct VL_VERTEX4F mb_tc_trans[2]; +}; + +struct VL_MC_FS_CONSTS +{ + struct VL_VERTEX4F multiplier; + struct VL_VERTEX4F bias; +}; + +struct VL_CSC_FS_CONSTS +{ + struct VL_VERTEX4F bias; + float matrix[16]; +}; + +struct VL_MOTION_VECTOR +{ + struct + { + int x, y; + } top_field, bottom_field; +}; + +struct VL_CONTEXT; +struct VL_SURFACE; + +#endif + -- cgit v1.2.3 From f5a3768c4e7733a11ad0421e3e4b84e6994af0e0 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 8 Jun 2008 14:34:41 -0400 Subject: g3dvl: Reduce number of input vert streams by copying, reusing in shaders. Reduce number of input vertex streams by using same texcoord stream for chroma textures, reusing pos stream when calculating texcoords for P, B macroblocks. --- src/gallium/state_trackers/g3dvl/vl_context.c | 175 ++++++++++---------------- src/gallium/state_trackers/g3dvl/vl_context.h | 4 +- src/gallium/state_trackers/g3dvl/vl_data.c | 8 -- src/gallium/state_trackers/g3dvl/vl_data.h | 1 - src/gallium/state_trackers/g3dvl/vl_surface.c | 6 - 5 files changed, 66 insertions(+), 128 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 7193f7ccea..2d1d543495 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -32,15 +32,14 @@ static int vlDestroyIDCT(struct VL_CONTEXT *context) static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int num_attribs = 4; - const unsigned int semantic_names[4] = + const unsigned int num_attribs = 3; + const unsigned int semantic_names[3] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC }; - const unsigned int semantic_indexes[4] = {0, 1, 2, 3}; + const unsigned int semantic_indexes[3] = {0, 1, 2}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; @@ -173,7 +172,6 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) /* mov o1, i1 ; Move texcoords to output mov o2, i2 - mov o3, i3 */ for (i = 1; i < num_attribs; ++i) { @@ -251,7 +249,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) ti = 3; /* Declare inputs (texcoords) */ - for (i = 0; i < 3; ++i) + for (i = 0; i < 2; ++i) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; @@ -306,7 +304,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) /* tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel - tex2d o0.z, i2, s2 ; Read texel from chroma Cr texture into .z channel + tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel */ for (i = 0; i < 3; ++i) { @@ -319,7 +317,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) inst.Instruction.NumSrcRegs = 2; inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; inst.FullSrcRegisters[1].SrcRegister.Index = i; ti += tgsi_build_full_instruction @@ -354,16 +352,23 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_attribs = 5; - const unsigned int semantic_names[5] = + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 4; + const unsigned int input_semantic_names[3] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int output_semantic_names[4] = + { + TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC }; - const unsigned int semantic_indexes[5] = {0, 1, 2, 3, 4}; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[4] = {0, 1, 2, 3}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; @@ -398,14 +403,14 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) ti = 3; /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + for (i = 0; i < num_input_attribs; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; @@ -438,13 +443,13 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) ); /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + for (i = 0; i < num_output_attribs; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration @@ -497,9 +502,8 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) /* mov o1, i1 ; Move luma & chroma texcoords to output mov o2, i2 - mov o3, i3 */ - for (i = 1; i < num_attribs - 1; ++i) + for (i = 1; i < num_output_attribs - 1; ++i) { inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MOV; @@ -517,32 +521,13 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) max_tokens - ti ); } - - /* mul t0, i4, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); - /* add o4, t0, c2 ; Translate texcoords into position */ + /* add o3, t0, c2 ; Translate texcoords into position */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.FullDstRegisters[0].DstRegister.Index = 3; inst.Instruction.NumSrcRegs = 2; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; @@ -613,7 +598,7 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) ti = 3; /* Declare inputs (texcoords) */ - for (i = 0; i < 4; ++i) + for (i = 0; i < 3; ++i) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; @@ -688,7 +673,7 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) mov t1.x, t0.w ; Move high part from .w channel to .x tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i2, s2 ; Read texel from chroma Cr texture into .z and .w channels + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels mov t1.z, t0.w ; Move high part from .w channel to .z */ for (i = 0; i < 3; ++i) @@ -702,7 +687,7 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) inst.Instruction.NumSrcRegs = 2; inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; inst.FullSrcRegisters[1].SrcRegister.Index = i; ti += tgsi_build_full_instruction @@ -792,7 +777,7 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; @@ -801,7 +786,7 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) inst.Instruction.NumSrcRegs = 2; inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[0].SrcRegister.Index = 2; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; inst.FullSrcRegisters[1].SrcRegister.Index = 3; ti += tgsi_build_full_instruction @@ -854,17 +839,24 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_attribs = 6; - const unsigned int semantic_names[6] = + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 5; + const unsigned int input_semantic_names[3] = { TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int output_semantic_names[5] = + { + TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC }; - const unsigned int semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[5] = {0, 1, 2, 3, 4}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; @@ -899,14 +891,14 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ti = 3; /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + for (i = 0; i < num_input_attribs; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; @@ -940,13 +932,13 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ); /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + for (i = 0; i < num_output_attribs; i++) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration @@ -999,9 +991,8 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) /* mov o1, i1 ; Move luma & chroma texcoords to output mov o2, i2 - mov o3, i3 */ - for (i = 1; i < num_attribs - 1; ++i) + for (i = 1; i < num_output_attribs - 2; ++i) { inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MOV; @@ -1020,38 +1011,18 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ); } - /* mul t0, i4, c0 ; Scale normalized coords to window coords - add o4, t0, c2 ; Translate texcoords into position - mul t1, i5, c0 ; Repeat for the future surface - add o5, t1, c3 */ + /* add o3, t0, c2 ; Translate past surface texcoords into position + add o4, t0, c3 ; Repeat for future surface texcoords */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); - inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 4; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; inst.Instruction.NumSrcRegs = 2; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; inst.FullSrcRegisters[1].SrcRegister.Index = i + 2; ti += tgsi_build_full_instruction @@ -1120,7 +1091,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) ti = 3; /* Declare inputs (texcoords) */ - for (i = 0; i < 5; ++i) + for (i = 0; i < 4; ++i) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; @@ -1195,7 +1166,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) mov t1.x, t0.w ; Move high part from .w channel to .x tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i2, s2 ; Read texel from chroma Cr texture into .z and .w channels + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels mov t1.z, t0.w ; Move high part from .w channel to .z */ for (i = 0; i < 3; ++i) @@ -1209,7 +1180,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) inst.Instruction.NumSrcRegs = 2; inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; inst.FullSrcRegisters[1].SrcRegister.Index = i; ti += tgsi_build_full_instruction @@ -1299,8 +1270,8 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* tex2d t1, i3, s3 ; Read texel from past macroblock - tex2d t2, i4, s4 ; Read texel from future macroblock */ + /* tex2d t1, i2, s3 ; Read texel from past macroblock + tex2d t2, i3, s4 ; Read texel from future macroblock */ for (i = 0; i < 2; ++i) { inst = tgsi_default_full_instruction(); @@ -1311,7 +1282,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) inst.Instruction.NumSrcRegs = 2; inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 3; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; ti += tgsi_build_full_instruction @@ -1409,7 +1380,7 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context) /* Create our texcoord buffers and texcoord buffer elements */ /* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */ - for (i = 1; i < 6; ++i) + for (i = 1; i < 3; ++i) { context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); context->states.mc.vertex_bufs[i].max_index = 23; @@ -1448,26 +1419,8 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context) vl_chroma_420_texcoords, sizeof(struct VL_TEXCOORD2F) * 24 ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[3].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_chroma_420_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[4].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_ref_surface_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[5].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_ref_surface_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - for (i = 0; i < 6; ++i) + for (i = 0; i < 3; ++i) pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); /* Create our constant buffer */ @@ -1599,12 +1552,10 @@ static int vlDestroyMC(struct VL_CONTEXT *context) assert(context); for (i = 0; i < 5; ++i) - { context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); - } - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[5].buffer); + for (i = 0; i < 3; ++i) + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ for (i = 0; i < 3; ++i) @@ -2262,7 +2213,9 @@ int vlBeginRender(struct VL_CONTEXT *context) pipe = context->pipe; /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures, VBs, VB elements set in vlRender*Macroblock() */ + /* Shaders, samplers, textures set in vlRender*Macroblock() */ + pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs); + pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems); pipe->set_viewport_state(pipe, &context->states.mc.viewport); pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index 0aeba184cc..f26a4c5b6a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -36,8 +36,8 @@ struct VL_CONTEXT struct pipe_texture *textures[5]; struct pipe_shader_state *i_vs, *p_vs, *b_vs; struct pipe_shader_state *i_fs, *p_fs, *b_fs; - struct pipe_vertex_buffer vertex_bufs[6]; - struct pipe_vertex_element vertex_buf_elems[6]; + struct pipe_vertex_buffer vertex_bufs[3]; + struct pipe_vertex_element vertex_buf_elems[3]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; } mc; diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index c04163276d..27893aee95 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -61,14 +61,6 @@ const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2 */ const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords; -/* - * Represents texcoords for the above for rendering a predicted macroblock. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - * Texcoords need to be translated to cover source macroblock on the - * past/future surface. - */ - const struct VL_TEXCOORD2F *vl_ref_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; - /* * Represents 2 triangles in a strip in normalized coords. * Used to render the surface onto the frame buffer. diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h index 67a0a74990..8f347273ad 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.h +++ b/src/gallium/state_trackers/g3dvl/vl_data.h @@ -8,7 +8,6 @@ extern const struct VL_TEXCOORD2F vl_luma_texcoords[24]; extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords; extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords; extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords; -extern const struct VL_TEXCOORD2F *vl_ref_surface_texcoords; extern const struct VL_VERTEX2F vl_surface_vertex_positions[4]; extern const struct VL_TEXCOORD2F *vl_surface_texcoords; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index e58e434dab..0e1adea472 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -269,8 +269,6 @@ int vlRenderIMacroBlock pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers); - pipe->set_vertex_buffers(pipe, 4, surface->context->states.mc.vertex_bufs); - pipe->set_vertex_elements(pipe, 4, surface->context->states.mc.vertex_buf_elems); pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); @@ -354,8 +352,6 @@ int vlRenderPMacroBlock surface->context->states.mc.textures[3] = ref_surface->texture; pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - pipe->set_vertex_buffers(pipe, 5, surface->context->states.mc.vertex_bufs); - pipe->set_vertex_elements(pipe, 5, surface->context->states.mc.vertex_buf_elems); pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs); @@ -445,8 +441,6 @@ int vlRenderBMacroBlock surface->context->states.mc.textures[4] = future_surface->texture; pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - pipe->set_vertex_buffers(pipe, 6, surface->context->states.mc.vertex_bufs); - pipe->set_vertex_elements(pipe, 6, surface->context->states.mc.vertex_buf_elems); pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs); -- cgit v1.2.3 From 14d4f9e44e55e2b427579ed6788e579d70b289e7 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 16 Jun 2008 23:18:20 -0400 Subject: g3dvl: Get rid of some Valgrind errors. Get rid of some Valgrind memory leak and uninitialized var errors. --- src/gallium/state_trackers/g3dvl/vl_context.c | 24 +++++++++++++++++++++++- src/gallium/state_trackers/g3dvl/vl_surface.c | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 2d1d543495..59a1ccd152 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -209,6 +209,8 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + return 0; } @@ -346,6 +348,8 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + return 0; } @@ -558,6 +562,8 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) context->states.mc.p_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + return 0; } @@ -833,6 +839,8 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) context->states.mc.p_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + return 0; } @@ -1051,6 +1059,8 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) context->states.mc.b_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + return 0; } @@ -1349,6 +1359,8 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) context->states.mc.b_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + return 0; } @@ -1502,7 +1514,7 @@ static int vlInitMC(struct VL_CONTEXT *context) /*sampler.prefilter = ;*/ /*sampler.shadow_ambient = ;*/ /*sampler.lod_bias = ;*/ - /*sampler.min_lod = ;*/ + sampler.min_lod = 0; /*sampler.max_lod = ;*/ /*sampler.border_color[i] = ;*/ /*sampler.max_anisotropy = ;*/ @@ -1692,6 +1704,8 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); + free(tokens); + return 0; } @@ -1880,6 +1894,8 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); + free(tokens); + return 0; } @@ -2052,6 +2068,7 @@ static int vlInitCommon(struct VL_CONTEXT *context) pipe = context->pipe; rast.flatshade = 1; + rast.flatshade_first = 0; rast.light_twoside = 0; rast.front_winding = PIPE_WINDING_CCW; rast.cull_mode = PIPE_WINDING_CW; @@ -2061,6 +2078,7 @@ static int vlInitCommon(struct VL_CONTEXT *context) rast.offset_ccw = 0; rast.scissor = 0; rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; rast.point_sprite = 0; rast.point_size_per_vertex = 0; rast.multisample = 0; @@ -2074,6 +2092,7 @@ static int vlInitCommon(struct VL_CONTEXT *context) rast.bypass_vs = 0; rast.origin_lower_left = 0; rast.line_width = 1; + rast.point_smooth = 0; rast.point_size = 1; rast.offset_units = 1; rast.offset_scale = 1; @@ -2199,6 +2218,9 @@ int vlDestroyContext(struct VL_CONTEXT *context) vlDestroy(context); + context->pipe->screen->destroy(context->pipe->screen); + context->pipe->destroy(context->pipe); + free(context); return 0; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 0e1adea472..3f59d0f155 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -195,6 +195,7 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) template.depth[0] = 1; template.compressed = 0; template.cpp = 4; + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sfc->texture = pipe->screen->texture_create(pipe->screen, &template); -- cgit v1.2.3 From 0a6aec8c0f2173cfb95ce95d12b66f090ea0ba1f Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 18 Jun 2008 22:21:11 -0400 Subject: g3dvl: Work around SP tex cache bug, specify resource usage flags. --- src/gallium/state_trackers/g3dvl/vl_context.c | 1 - src/gallium/state_trackers/g3dvl/vl_surface.c | 14 +++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 59a1ccd152..3b9afabbb8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -2218,7 +2218,6 @@ int vlDestroyContext(struct VL_CONTEXT *context) vlDestroy(context); - context->pipe->screen->destroy(context->pipe->screen); context->pipe->destroy(context->pipe); free(context); diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 3f59d0f155..6451e54953 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -31,7 +31,7 @@ static int vlGrabBlocks 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); - texels = pipe_surface_map(tex_surface, 0); + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); for (b = 0; b < 4; ++b) { @@ -131,7 +131,7 @@ static int vlGrabBlocks 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); - texels = pipe_surface_map(tex_surface, 0); + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); if ((coded_block_pattern >> (b + 4)) & 1) { @@ -165,6 +165,9 @@ static int vlGrabBlocks pipe_surface_unmap(tex_surface); } + /* XXX: Texture cache is not invalidated when texture contents change */ + context->pipe->flush(context->pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL); + return 0; } @@ -265,7 +268,7 @@ int vlRenderIMacroBlock ( pipe->screen, surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); @@ -346,7 +349,7 @@ int vlRenderPMacroBlock ( pipe->screen, surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); @@ -434,7 +437,7 @@ int vlRenderBMacroBlock ( pipe->screen, surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); @@ -510,6 +513,7 @@ int vlPutSurface destw, desth, PIPE_FORMAT_A8R8G8B8_UNORM, + /*XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, 0 ); -- cgit v1.2.3 From 3933fec6bd62285506fecdc3a254306648cfefb2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 28 Jun 2008 20:16:01 -0400 Subject: g3dvl: Support for field and frame based MC for progressive pictures. MC support for frame and field based motion prediction. Also various bug fixes, clean up. --- src/gallium/state_trackers/g3dvl/tests/.gitignore | 2 +- src/gallium/state_trackers/g3dvl/tests/Makefile | 7 +- .../state_trackers/g3dvl/tests/test_pf_rendering.c | 214 +++ src/gallium/state_trackers/g3dvl/vl_context.c | 1688 ++++++++++++++++++-- src/gallium/state_trackers/g3dvl/vl_context.h | 4 +- src/gallium/state_trackers/g3dvl/vl_data.c | 3 +- src/gallium/state_trackers/g3dvl/vl_surface.c | 404 ++--- src/gallium/state_trackers/g3dvl/vl_types.h | 12 +- 8 files changed, 2026 insertions(+), 308 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/tests/.gitignore b/src/gallium/state_trackers/g3dvl/tests/.gitignore index 939666da9a..9b1ec4e212 100644 --- a/src/gallium/state_trackers/g3dvl/tests/.gitignore +++ b/src/gallium/state_trackers/g3dvl/tests/.gitignore @@ -2,5 +2,5 @@ test_context test_surface test_i_rendering test_p_rendering +test_pf_rendering test_b_rendering - diff --git a/src/gallium/state_trackers/g3dvl/tests/Makefile b/src/gallium/state_trackers/g3dvl/tests/Makefile index 8f983593c3..45cefa2e57 100644 --- a/src/gallium/state_trackers/g3dvl/tests/Makefile +++ b/src/gallium/state_trackers/g3dvl/tests/Makefile @@ -20,7 +20,7 @@ LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil .PHONY = all clean -all: test_context test_surface test_i_rendering test_p_rendering test_b_rendering +all: test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} @@ -34,9 +34,12 @@ test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} +test_pf_rendering: test_pf_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o + $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} + test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} clean: - rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_b_rendering + rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering diff --git a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c new file mode 100644 index 0000000000..43586fc553 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include + +static const unsigned short ycbcr16x16_420[8*8*6] = +{ + 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, + 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, + 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, + + 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, + 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, + + 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, + 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E +}; + +static const signed short ycbcr16x16_420_2[8*8*6] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int main(int argc, char **argv) +{ + const unsigned int video_width = 32, video_height = 32; + const unsigned int window_width = video_width * 2, window_height = video_height * 2; + int quit = 0; + Display *display; + Window root, window; + Pixmap framebuffer; + XEvent event; + struct pipe_context *pipe; + struct VL_CONTEXT *ctx; + struct VL_SURFACE *sfc, *ref_sfc; + struct VL_MOTION_VECTOR motion_vector = + { + {0, 0}, {32, 32} + }; + + display = XOpenDisplay(NULL); + root = XDefaultRootWindow(display); + window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); + framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); + + XSelectInput(display, window, ExposureMask | KeyPressMask); + XMapWindow(display, window); + XSync(display, 0); + + pipe = create_pipe_context(display); + vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); + vlCreateSurface(ctx, &sfc); + vlCreateSurface(ctx, &ref_sfc); + + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); + vlRenderPMacroBlock + ( + VL_FRAME_PICTURE, + VL_FIELD_FIRST, + 0, + 0, + VL_FIELD_MC, + &motion_vector, + 0x3F, + VL_DCT_FRAME_CODED, + (short*)ycbcr16x16_420_2, + ref_sfc, + sfc + ); + vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); + + puts("Press any key to continue..."); + + while (!quit) + { + XNextEvent(display, &event); + switch (event.type) + { + case Expose: + { + XCopyArea + ( + display, + framebuffer, + window, + XDefaultGC(display, XDefaultScreen(display)), + 0, + 0, + window_width, + window_height, + 0, + 0 + ); + break; + } + case KeyPress: + { + quit = 1; + break; + } + } + } + + vlDestroySurface(sfc); + vlDestroySurface(ref_sfc); + vlDestroyContext(ctx); + + XFreePixmap(display, framebuffer); + XDestroyWindow(display, window); + XCloseDisplay(display); + + return 0; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 3b9afabbb8..d2b1ad7948 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -36,8 +36,8 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) const unsigned int semantic_names[3] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ }; const unsigned int semantic_indexes[3] = {0, 1, 2}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; @@ -353,7 +353,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) return 0; } -static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) +static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int num_input_attribs = 3; @@ -361,15 +361,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) const unsigned int input_semantic_names[3] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ }; const unsigned int output_semantic_names[4] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC /* Ref surface texcoords */ }; const unsigned int input_semantic_indexes[3] = {0, 1, 2}; const unsigned int output_semantic_indexes[4] = {0, 1, 2, 3}; @@ -430,14 +430,15 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) /* Declare constant inputs */ /* C[0] scales the normalized MB to cover 16x16 pixels, C[1] translates the macroblock into position on the surface - C[2] translates the ref surface texcoords to the ref macroblock */ + C[2] unused + C[3] translates the ref surface texcoords to the ref macroblock */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; + decl.u.DeclarationRange.Last = 3; ti += tgsi_build_full_declaration ( &decl, @@ -526,7 +527,7 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) ); } - /* add o3, t0, c2 ; Translate texcoords into position */ + /* add o3, t0, c3 ; Translate texcoords into position */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; @@ -536,6 +537,264 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); + + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 6; + const unsigned int input_semantic_names[3] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ + }; + const unsigned int output_semantic_names[6] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field surface texcoords */ + TGSI_SEMANTIC_POSITION /* Pos */ + }; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_input_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] denormalizes pos components + C[3] translates the ref surface top field texcoords to the ref macroblock + C[4] translates the ref surface bottom field texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 4; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_output_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t1, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + */ + for (i = 1; i < num_output_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add o3, t0, c3 ; Translate top field texcoords into position + add o4, t0, c4 ; Translate bottom field texcoords into position */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul o5, t1, c2 ; Denorm pos for fragment shader */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 5; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; inst.FullSrcRegisters[1].SrcRegister.Index = 2; ti += tgsi_build_full_instruction ( @@ -560,14 +819,14 @@ static int vlCreateVertexShaderPMC(struct VL_CONTEXT *context) vs.tokens = tokens; - context->states.mc.p_vs = pipe->create_vs_state(pipe, &vs); + context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); free(tokens); return 0; } -static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) +static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; @@ -837,42 +1096,928 @@ static int vlCreateFragmentShaderPMC(struct VL_CONTEXT *context) fs.tokens = tokens; - context->states.mc.p_fs = pipe->create_fs_state(pipe, &fs); + context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); free(tokens); return 0; } -static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) +static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) { - const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 5; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC - }; - const unsigned int output_semantic_names[5] = - { - TGSI_SEMANTIC_POSITION, + const unsigned int max_tokens = 200; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (texcoords) + I[0] Luma texcoords + I[1] Chroma texcoords + I[2] Ref top field surface texcoords + I[3] Ref bottom field surface texcoords + I[4] Denormalized texel pos */ + for (i = 0; i < 5; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[1] is a bias to get differential back to -1,1 range + C[2] is constants 2 and 1/2 for Y%2 field selector */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 2; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 4; ++i) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field + tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 1; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* XXX: Pos values off by 0.5 for rounding? */ + /* sub t4, i4.y, c2.x ; Sub 0.5 from position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* lerp t1, t3, t1, t2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + fs.tokens = tokens; + + context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); + + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 5; + const unsigned int input_semantic_names[3] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, + TGSI_SEMANTIC_GENERIC + }; + const unsigned int output_semantic_names[5] = + { + TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC, TGSI_SEMANTIC_GENERIC }; const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[5] = {0, 1, 2, 3, 4}; + const unsigned int output_semantic_indexes[5] = {0, 1, 2, 3, 4}; + const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_input_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] unused + C[3] translates the past surface texcoords to the ref macroblock + C[4] unused + C[5] translates the future surface texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 5; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_output_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + */ + for (i = 1; i < num_output_attribs - 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add o3, t0, c3 ; Translate past surface texcoords into position + add o4, t0, c5 ; Repeat for future surface texcoords */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i * 2 + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); + + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int num_input_attribs = 3; + const unsigned int num_output_attribs = 8; + const unsigned int input_semantic_names[3] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ + }; + const unsigned int output_semantic_names[8] = + { + TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field past surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field past surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Top field future surface texcoords */ + TGSI_SEMANTIC_GENERIC, /* Bottom field future surface texcoords */ + TGSI_SEMANTIC_POSITION /* Pos */ + }; + const unsigned int input_semantic_indexes[3] = {0, 1, 2}; + const unsigned int output_semantic_indexes[8] = {0, 1, 2, 3, 4, 5, 6, 7}; const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; - struct pipe_shader_state vs; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + + /* Processor */ + processor = (struct tgsi_processor*)&tokens[2]; + *processor = tgsi_build_processor(proc_type, header); + + ti = 3; + + /* Declare inputs (pos, texcoords) */ + for (i = 0; i < num_input_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = input_semantic_names[i]; + decl.Semantic.SemanticIndex = input_semantic_indexes[i]; + + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* Declare constant inputs */ + /* C[0] scales the normalized MB to cover 16x16 pixels, + C[1] translates the macroblock into position on the surface + C[2] denormalizes pos components + C[3] translates the past surface top field texcoords to the ref macroblock + C[4] translates the past surface bottom field texcoords to the ref macroblock + C[5] translates the future surface top field texcoords to the ref macroblock + C[6] translates the future surface bottom field texcoords to the ref macroblock */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 6; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare outputs (pos, texcoords) */ + for (i = 0; i < num_output_attribs; i++) + { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = output_semantic_names[i]; + decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.u.DeclarationRange.First = i; + decl.u.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add t1, t0, c1 ; Translate vertex into position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* + mov o1, i1 ; Move luma & chroma texcoords to output + mov o2, i2 + */ + for (i = 1; i < num_output_attribs - 1; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* add o3, t0, c3 ; Translate top field past texcoords into position + add o4, t0, c4 ; Translate bottom field past texcoords into position + add o5, t0, c5 ; Translate top field past texcoords into position + add o6, t0, c6 ; Translate bottom field past texcoords into position */ + for (i = 0; i < 4; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul o7, t1, c2 ; Denorm pos for fragment shader */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 7; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* END */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + vs.tokens = tokens; + + context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); + + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 100; + const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; + + struct pipe_context *pipe; + struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; struct tgsi_processor *processor; - + struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -882,12 +2027,12 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - + /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); @@ -898,16 +2043,16 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* Declare inputs (texcoords) */ + for (i = 0; i < 4; ++i) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = i + 1; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration @@ -919,18 +2064,17 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] translates the past surface texcoords to the ref macroblock - C[3] translates the future surface texcoords to the ref macroblock */ + /* Declare constant input */ + /* C[0] is a multiplier to use when concatenating differential into a single channel + C[1] is a bias to get differential back to -1,1 range + C[2] contains 0.5 in channel X for use as a weight to blend past and future samples */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 3; + decl.u.DeclarationRange.Last = 2; ti += tgsi_build_full_declaration ( &decl, @@ -939,14 +2083,27 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* Declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = 0; + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + + /* Declare samplers */ + for (i = 0; i < 5; ++i) { decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; + decl.Declaration.File = TGSI_FILE_SAMPLER; decl.u.DeclarationRange.First = i; decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration @@ -958,15 +2115,67 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) ); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ + /* + tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + mov t1.x, t0.w ; Move high part from .w channel to .x + tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + mov t1.y, t0.w ; Move high part from .w channel to .y + tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + mov t1.z, t0.w ; Move high part from .w channel to .z + */ + for (i = 0; i < 3; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_MUL; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.FullDstRegisters[0].DstRegister.Index = 1; inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; inst.FullSrcRegisters[1].SrcRegister.Index = 0; ti += tgsi_build_full_instruction @@ -976,17 +2185,17 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) header, max_tokens - ti ); - - /* add o0, t0, c1 ; Translate vertex into position */ + + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; inst.FullDstRegisters[0].DstRegister.Index = 0; inst.Instruction.NumSrcRegs = 2; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[1].SrcRegister.Index = 1; ti += tgsi_build_full_instruction ( @@ -996,43 +2205,40 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 2; ++i) - { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); - } + /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); - /* add o3, t0, c2 ; Translate past surface texcoords into position - add o4, t0, c3 ; Repeat for future surface texcoords */ + /* tex2d t1, i2, s3 ; Read texel from past macroblock + tex2d t2, i3, s4 ; Read texel from future macroblock */ for (i = 0; i < 2; ++i) { inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 1; inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; ti += tgsi_build_full_instruction ( &inst, @@ -1041,6 +2247,50 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); } + + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); /* END */ inst = tgsi_default_full_instruction(); @@ -1054,19 +2304,19 @@ static int vlCreateVertexShaderBMC(struct VL_CONTEXT *context) header, max_tokens - ti ); + + fs.tokens = tokens; - vs.tokens = tokens; - - context->states.mc.b_vs = pipe->create_vs_state(pipe, &vs); + context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); free(tokens); return 0; } -static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) +static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) { - const unsigned int max_tokens = 100; + const unsigned int max_tokens = 200; const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; @@ -1100,8 +2350,15 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) ti = 3; - /* Declare inputs (texcoords) */ - for (i = 0; i < 4; ++i) + /* Declare inputs (texcoords) + I[0] Luma texcoords + I[1] Chroma texcoords + I[2] Past top field surface texcoords + I[3] Past bottom field surface texcoords + I[4] Future top field surface texcoords + I[5] Future bottom field surface texcoords + I[6] Denormalized texel pos */ + for (i = 0; i < 7; ++i) { decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; @@ -1123,14 +2380,15 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) /* Declare constant input */ /* C[0] is a multiplier to use when concatenating differential into a single channel - C[0] is a bias to get differential back to -1,1 range*/ + C[1] is a bias to get differential back to -1,1 range + C[2] is constants 2 and 1/2 for Y%2 field selector */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; + decl.u.DeclarationRange.Last = 2; ti += tgsi_build_full_declaration ( &decl, @@ -1280,8 +2538,118 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* tex2d t1, i2, s3 ; Read texel from past macroblock - tex2d t2, i3, s4 ; Read texel from future macroblock */ + /* XXX: Pos values off by 0.5 for rounding? */ + /* sub t4, i6.y, c2.x ; Sub 0.5 from position */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 4; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 6; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MUL; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_SUB; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 3; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t1, i2, s3 ; Read texel from past macroblock top field + tex2d t2, i3, s3 ; Read texel from past macroblock bottom field */ for (i = 0; i < 2; ++i) { inst = tgsi_default_full_instruction(); @@ -1294,7 +2662,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; + inst.FullSrcRegisters[1].SrcRegister.Index = 3; ti += tgsi_build_full_instruction ( &inst, @@ -1304,17 +2672,89 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) ); } - /* add t0, t0, t1 ; Add past and differential to form partial output */ + /* lerp t1, t3, t1, t2 */ inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; + inst.Instruction.Opcode = TGSI_OPCODE_LERP; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* tex2d t4, i4, s4 ; Read texel from future macroblock top field + tex2d t5, i5, s4 ; Read texel from future macroblock bottom field */ + for (i = 0; i < 2; ++i) + { + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = i + 4; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 4; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* lerp t2, t3, t4, t5 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 2; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[0].SrcRegister.Index = 3; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[1].SrcRegister.Index = 4; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 5; + ti += tgsi_build_full_instruction + ( + &inst, + &tokens[ti], + header, + max_tokens - ti + ); + + /* lerp t1, c2.x, t1, t2 */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_LERP; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; + inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[1].SrcRegister.Index = 1; + inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + inst.FullSrcRegisters[2].SrcRegister.Index = 2; ti += tgsi_build_full_instruction ( &inst, @@ -1323,7 +2763,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) max_tokens - ti ); - /* add o0, t0, t2 ; Add future and differential to form final output */ + /* add o0, t0, t1 ; Add future and differential to form final output */ inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = TGSI_OPCODE_ADD; inst.Instruction.NumDstRegs = 1; @@ -1333,7 +2773,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + inst.FullSrcRegisters[1].SrcRegister.Index = 1; ti += tgsi_build_full_instruction ( &inst, @@ -1357,7 +2797,7 @@ static int vlCreateFragmentShaderBMC(struct VL_CONTEXT *context) fs.tokens = tokens; - context->states.mc.b_fs = pipe->create_fs_state(pipe, &fs); + context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); free(tokens); @@ -1491,7 +2931,6 @@ static int vlInitMC(struct VL_CONTEXT *context) context->states.mc.render_target.height = context->video_height; context->states.mc.render_target.num_cbufs = 1; /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ - /*context->states.mc.render_target.cbufs[0] = ;*/ context->states.mc.render_target.zsbuf = NULL; filters[0] = PIPE_TEX_FILTER_NEAREST; @@ -1530,6 +2969,7 @@ static int vlInitMC(struct VL_CONTEXT *context) template.depth[0] = 1; template.compressed = 0; template.cpp = 2; + context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); if (context->video_format == VL_FORMAT_YCBCR_420) @@ -1548,10 +2988,14 @@ static int vlInitMC(struct VL_CONTEXT *context) vlCreateVertexShaderIMC(context); vlCreateFragmentShaderIMC(context); - vlCreateVertexShaderPMC(context); - vlCreateFragmentShaderPMC(context); - vlCreateVertexShaderBMC(context); - vlCreateFragmentShaderBMC(context); + vlCreateVertexShaderFramePMC(context); + vlCreateVertexShaderFieldPMC(context); + vlCreateFragmentShaderFramePMC(context); + vlCreateFragmentShaderFieldPMC(context); + vlCreateVertexShaderFrameBMC(context); + vlCreateVertexShaderFieldBMC(context); + vlCreateFragmentShaderFrameBMC(context); + vlCreateFragmentShaderFieldBMC(context); vlCreateDataBufsMC(context); return 0; @@ -1575,10 +3019,14 @@ static int vlDestroyMC(struct VL_CONTEXT *context) context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); - context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs); - context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs); + + for (i = 0; i < 2; ++i) + { + context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); + context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); + context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); + } context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); @@ -1982,7 +3430,7 @@ static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) /* TODO: Refactor this into a seperate function, - allow changing the csc matrix at runtime to switch between regular & full versions + allow changing the CSC matrix at runtime to switch between regular & full versions */ memcpy ( @@ -2166,7 +3614,7 @@ static int vlDestroy(struct VL_CONTEXT *context) { assert(context); - /* Must unbind shaders before we can delete them for some reason */ + /* XXX: Must unbind shaders before we can delete them for some reason */ context->pipe->bind_vs_state(context->pipe, NULL); context->pipe->bind_fs_state(context->pipe, NULL); diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index f26a4c5b6a..8a12318073 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -34,8 +34,8 @@ struct VL_CONTEXT struct pipe_framebuffer_state render_target; struct pipe_sampler_state *samplers[5]; struct pipe_texture *textures[5]; - struct pipe_shader_state *i_vs, *p_vs, *b_vs; - struct pipe_shader_state *i_fs, *p_fs, *b_fs; + struct pipe_shader_state *i_vs, *p_vs[2], *b_vs[2]; + struct pipe_shader_state *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[3]; struct pipe_vertex_element vertex_buf_elems[3]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 27893aee95..7e6ee8ac12 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -87,7 +87,8 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { {256.0f, 256.0f, 256.0f, 0.0f}, - {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f} + {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} }; /* diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 6451e54953..d2220d7abf 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -8,6 +8,85 @@ #include "vl_context.h" #include "vl_defs.h" +static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int x, y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + dst[y * dst_pitch + x] = 0x100; + + return 0; +} + static int vlGrabBlocks ( struct VL_CONTEXT *context, @@ -19,7 +98,7 @@ static int vlGrabBlocks { struct pipe_surface *tex_surface; short *texels; - unsigned int b, x, y, y2; + unsigned int tb, sb = 0; assert(context); assert(blocks); @@ -33,134 +112,81 @@ static int vlGrabBlocks texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - for (b = 0; b < 4; ++b) + for (tb = 0; tb < 4; ++tb) { - if ((coded_block_pattern >> b) & 1) + if ((coded_block_pattern >> (5 - tb)) & 1) { if (dct_type == VL_DCT_FRAME_CODED) - { if (sample_type == VL_FULL_SAMPLE) - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, + tex_surface->pitch + ); else - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y * VL_BLOCK_WIDTH + x] + 0x100; - } - } + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, + tex_surface->pitch + ); else - { if (sample_type == VL_FULL_SAMPLE) - { - for + vlGrabFieldCodedFullBlock ( - y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; - y < VL_BLOCK_HEIGHT * ((b % 2) + 1); - y += 2, ++y2 - ) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y2 * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - for - ( - y = VL_BLOCK_HEIGHT * ((b % 2) + 2); - y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); - y += 2, ++y2 - ) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + y2 * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, + tex_surface->pitch + ); else - { - for + vlGrabFieldCodedDiffBlock ( - y = VL_BLOCK_HEIGHT * (b % 2), y2 = VL_BLOCK_HEIGHT * b; - y < VL_BLOCK_HEIGHT * ((b % 2) + 1); - y += 2, ++y2 - ) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; - for - ( - y = VL_BLOCK_HEIGHT * ((b % 2) + 2); - y < VL_BLOCK_HEIGHT * (((b % 2) + 2) + 1); - y += 2, ++y2 - ) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[y2 * VL_BLOCK_WIDTH + x] + 0x100; - } - } + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, + tex_surface->pitch + ); + ++sb; } else - { - for (y = VL_BLOCK_HEIGHT * b; y < VL_BLOCK_HEIGHT * (b + 1); ++y) - { - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = 0x100; - } - } + vlGrabNoBlock(texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, tex_surface->pitch); } pipe_surface_unmap(tex_surface); /* TODO: Implement 422, 444 */ - for (b = 0; b < 2; ++b) + for (tb = 0; tb < 2; ++tb) { tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[b + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); + ( + context->pipe->screen, + context->states.mc.textures[tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - if ((coded_block_pattern >> (b + 4)) & 1) - { + if ((coded_block_pattern >> (1 - tb)) & 1) + { if (sample_type == VL_FULL_SAMPLE) - { - for (y = 0; y < tex_surface->height; ++y) - memcpy - ( - texels + y * tex_surface->pitch, - blocks + VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - } + vlGrabFrameCodedFullBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_surface->pitch + ); else - { - for (y = 0; y < tex_surface->height; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = - blocks[VL_BLOCK_SIZE * (b + 4) + y * VL_BLOCK_WIDTH + x] + 0x100; - } + vlGrabFrameCodedDiffBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels, + tex_surface->pitch + ); + + ++sb; } else - { - for (y = 0; y < tex_surface->height; ++y) - { - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - texels[y * tex_surface->pitch + x] = 0x100; - } - } + vlGrabNoBlock(texels, tex_surface->pitch); pipe_surface_unmap(tex_surface); } @@ -229,41 +255,35 @@ int vlRenderIMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(blocks); assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (I) unimplemented, ignoring\n");*/ return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -276,6 +296,8 @@ int vlRenderIMacroBlock pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -297,7 +319,7 @@ int vlRenderPMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(motion_vectors); assert(blocks); @@ -305,46 +327,55 @@ int vlRenderPMacroBlock assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (P) unimplemented, ignoring\n");*/ return 0; - } - /* TODO: Implement field based motion compensation */ - /*assert(mc_type == VL_FRAME_MC);*/ - if (mc_type != VL_FRAME_MC) - { - /*fprintf(stderr, "field MC (P) unimplemented, ignoring\n");*/ + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; - vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[0].z = 0.0f; - vscbdata->mb_tc_trans[0].w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]); + } pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -356,8 +387,8 @@ int vlRenderPMacroBlock surface->context->states.mc.textures[3] = ref_surface->texture; pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); @@ -381,7 +412,7 @@ int vlRenderBMacroBlock ) { struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vscbdata; + struct VL_MC_VS_CONSTS *vs_consts; assert(motion_vectors); assert(blocks); @@ -389,50 +420,63 @@ int vlRenderBMacroBlock assert(surface); /* TODO: Implement interlaced rendering */ - /*assert(picture_type == VL_FRAME_PICTURE);*/ if (picture_type != VL_FRAME_PICTURE) - { - /*fprintf(stderr, "field picture (B) unimplemented, ignoring\n");*/ return 0; - } - /* TODO: Implement field based motion compensation */ - /*assert(mc_type == VL_FRAME_MC);*/ - if (mc_type != VL_FRAME_MC) - { - /*fprintf(stderr, "field MC (B) unimplemented, ignoring\n");*/ + /* TODO: Implement other MC types */ + if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; - } pipe = surface->context->pipe; - vscbdata = pipe->winsys->buffer_map + vs_consts = pipe->winsys->buffer_map ( pipe->winsys, surface->context->states.mc.vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ); - vscbdata->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vscbdata->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vscbdata->scale.z = 1.0f; - vscbdata->scale.w = 1.0f; - vscbdata->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vscbdata->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vscbdata->mb_pos_trans.z = 0.0f; - vscbdata->mb_pos_trans.w = 0.0f; - vscbdata->mb_tc_trans[0].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[0].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[0].z = 0.0f; - vscbdata->mb_tc_trans[0].w = 0.0f; - vscbdata->mb_tc_trans[1].x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; - vscbdata->mb_tc_trans[1].y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; - vscbdata->mb_tc_trans[1].z = 0.0f; - vscbdata->mb_tc_trans[1].w = 0.0f; + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].top_field.z = 0.0f; + vs_consts->mb_tc_trans[1].top_field.w = 0.0f; + + if (mc_type == VL_FIELD_MC) + { + vs_consts->denorm.x = (float)surface->width; + vs_consts->denorm.y = (float)surface->height; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width; + vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height; + vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]); + pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]); + } pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -445,8 +489,8 @@ int vlRenderBMacroBlock surface->context->states.mc.textures[4] = future_surface->texture; pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs); + + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); @@ -513,7 +557,7 @@ int vlPutSurface destw, desth, PIPE_FORMAT_A8R8G8B8_UNORM, - /*XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ + /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, 0 ); diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 7040b74503..97753699db 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -36,7 +36,9 @@ enum VL_SAMPLE_TYPE enum VL_MC_TYPE { VL_FIELD_MC, - VL_FRAME_MC + VL_FRAME_MC, + VL_DUAL_PRIME_MC, + VL_16x8_MC = VL_FRAME_MC }; struct VL_VERTEX4F @@ -58,13 +60,19 @@ struct VL_MC_VS_CONSTS { struct VL_VERTEX4F scale; struct VL_VERTEX4F mb_pos_trans; - struct VL_VERTEX4F mb_tc_trans[2]; + struct VL_VERTEX4F denorm; + struct + { + struct VL_VERTEX4F top_field; + struct VL_VERTEX4F bottom_field; + } mb_tc_trans[2]; }; struct VL_MC_FS_CONSTS { struct VL_VERTEX4F multiplier; struct VL_VERTEX4F bias; + struct VL_VERTEX4F y_divider; }; struct VL_CSC_FS_CONSTS -- cgit v1.2.3 From 1c893fd513f5335a81dd72db70d64763634ea856 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 29 Jun 2008 20:52:58 -0400 Subject: g3dvl: Simplify shader code. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 2968 ++++---------------- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 205 ++ src/gallium/state_trackers/g3dvl/vl_shader_build.h | 61 + 4 files changed, 890 insertions(+), 2346 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_shader_build.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_shader_build.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index a0d85fbcc8..bd46d004e2 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,5 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o +OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index d2b1ad7948..88da47c06a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -9,6 +9,7 @@ #include #include #include +#include "vl_shader_build.h" #include "vl_data.h" static int vlInitIDCT(struct VL_CONTEXT *context) @@ -32,21 +33,11 @@ static int vlDestroyIDCT(struct VL_CONTEXT *context) static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int num_attribs = 3; - const unsigned int semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - }; - const unsigned int semantic_indexes[3] = {0, 1, 2}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -57,158 +48,71 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_attribs; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -217,13 +121,11 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; - + struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -234,120 +136,61 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ for (i = 0; i < 2; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare output (color) */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ for (i = 0; i < 3; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); } /* - tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel - tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel - tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel + * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel + * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel + * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -356,30 +199,11 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 4; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[4] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC /* Ref surface texcoords */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[4] = {0, 1, 2, 3}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -390,179 +214,78 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] unused - C[3] translates the ref surface texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 3; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 1; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate texcoords into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -571,32 +294,11 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 6; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[6] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field surface texcoords */ - TGSI_SEMANTIC_POSITION /* Pos */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[6] = {0, 1, 2, 3, 4, 5}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -607,34 +309,26 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration ( &decl, @@ -644,183 +338,70 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) ); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] denormalizes pos components - C[3] translates the ref surface top field texcoords to the ref macroblock - C[4] translates the ref surface bottom field texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 4; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t1, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mov o0, t1 ; Move vertex pos to output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output */ - for (i = 1; i < num_output_attribs - 1; ++i) + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate top field texcoords into position - add o4, t0, c4 ; Translate bottom field texcoords into position */ + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o5, t1, c2 ; Denorm pos for fragment shader */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 5; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -829,13 +410,11 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -846,258 +425,101 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ for (i = 0; i < 3; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[0] is a bias to get differential back to -1,1 range*/ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -1106,13 +528,11 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 200; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1123,399 +543,150 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) - I[0] Luma texcoords - I[1] Chroma texcoords - I[2] Ref top field surface texcoords - I[3] Ref bottom field surface texcoords - I[4] Denormalized texel pos */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] is constants 2 and 1/2 for Y%2 field selector */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field - tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field + tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* XXX: Pos values off by 0.5 for rounding? */ - /* sub t4, i4.y, c2.x ; Sub 0.5 from position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 4; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* floor t3, t3 ; Get rid of fractional part */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, t3, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -1524,31 +695,11 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 5; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC - }; - const unsigned int output_semantic_names[5] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC, - TGSI_SEMANTIC_GENERIC - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[5] = {0, 1, 2, 3, 4}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1559,185 +710,85 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] unused - C[3] translates the past surface texcoords to the ref macroblock - C[4] unused - C[5] translates the future surface texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 5; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); - - /* add o0, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 2; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate past surface texcoords into position - add o4, t0, c5 ; Repeat for future surface texcoords */ + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i * 2 + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -1746,34 +797,11 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int num_input_attribs = 3; - const unsigned int num_output_attribs = 8; - const unsigned int input_semantic_names[3] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC /* Chroma texcoords */ - }; - const unsigned int output_semantic_names[8] = - { - TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_GENERIC, /* Luma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Chroma texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field past surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field past surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Top field future surface texcoords */ - TGSI_SEMANTIC_GENERIC, /* Bottom field future surface texcoords */ - TGSI_SEMANTIC_POSITION /* Pos */ - }; - const unsigned int input_semantic_indexes[3] = {0, 1, 2}; - const unsigned int output_semantic_indexes[8] = {0, 1, 2, 3, 4, 5, 6, 7}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -1783,225 +811,102 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) assert(context); - pipe = context->pipe; - + pipe = context->pipe; tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_input_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = input_semantic_names[i]; - decl.Semantic.SemanticIndex = input_semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant inputs */ - /* C[0] scales the normalized MB to cover 16x16 pixels, - C[1] translates the macroblock into position on the surface - C[2] denormalizes pos components - C[3] translates the past surface top field texcoords to the ref macroblock - C[4] translates the past surface bottom field texcoords to the ref macroblock - C[5] translates the future surface top field texcoords to the ref macroblock - C[6] translates the future surface bottom field texcoords to the ref macroblock */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 6; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_output_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = output_semantic_names[i]; - decl.Semantic.SemanticIndex = output_semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* mul t0, i0, c0 ; Scale normalized coords to window coords */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t1, t0, c1 ; Translate vertex into position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* mov o0, t1 ; Move vertex pos to output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - mov o1, i1 ; Move luma & chroma texcoords to output - mov o2, i2 - */ - for (i = 1; i < num_output_attribs - 1; ++i) + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o3, t0, c3 ; Translate top field past texcoords into position - add o4, t0, c4 ; Translate bottom field past texcoords into position - add o5, t0, c5 ; Translate top field past texcoords into position - add o6, t0, c6 ; Translate bottom field past texcoords into position */ + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ for (i = 0; i < 4; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i + 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o7, t1, c2 ; Denorm pos for fragment shader */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 7; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -2010,13 +915,11 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 100; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -2027,288 +930,118 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ for (i = 0; i < 4; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] contains 0.5 in channel X for use as a weight to blend past and future samples */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from past macroblock - tex2d t2, i3, s4 ; Read texel from future macroblock */ + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -2317,13 +1050,11 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 200; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -2334,471 +1065,179 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare inputs (texcoords) - I[0] Luma texcoords - I[1] Chroma texcoords - I[2] Past top field surface texcoords - I[3] Past bottom field surface texcoords - I[4] Future top field surface texcoords - I[5] Future bottom field surface texcoords - I[6] Denormalized texel pos */ + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ for (i = 0; i < 7; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = i + 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare constant input */ - /* C[0] is a multiplier to use when concatenating differential into a single channel - C[1] is a bias to get differential back to -1,1 range - C[2] is constants 2 and 1/2 for Y%2 field selector */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 2; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Multiplier to shift 9th bit of differential into place + * decl c1 ; Bias to get differential back to a signed value + * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare samplers */ + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ for (i = 0; i < 5; ++i) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* - tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - mov t1.x, t0.w ; Move high part from .w channel to .x - tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - mov t1.y, t0.w ; Move high part from .w channel to .y - tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - mov t1.z, t0.w ; Move high part from .w channel to .z - */ + * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels + * mov t1.x, t0.w ; Move 9th bit from .w channel to .x + * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels + * mov t1.y, t0.w ; Move 9th bit from .w channel to .y + * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels + * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + */ for (i = 0; i < 3; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i > 0 ? 1 : 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul t1, t1, c0 ; Muliply high part by multiplier to get back its full value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get a single value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t0, t0, c1 ; Subtract bias to get back the signed value */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c1 ; Subtract bias to get back signed values */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* XXX: Pos values off by 0.5 for rounding? */ - /* sub t4, i6.y, c2.x ; Sub 0.5 from position */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 4; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 6; + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Divide pos y coord by 2 (mul by 0.5) */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* floor t3, t3 ; Get rid of fractional part */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_FLOOR; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MUL; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 2; + /* mul t3, t3, c2.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* sub t3, t4, t3 ; Subtract from y to get y % 2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 3; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from past macroblock top field - tex2d t2, i3, s3 ; Read texel from past macroblock bottom field */ + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 1; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 2; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 3; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t1, t3, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t4, i4, s4 ; Read texel from future macroblock top field - tex2d t5, i5, s4 ; Read texel from future macroblock bottom field */ + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = i + 4; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i + 4; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 4; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t2, t3, t4, t5 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 2; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 3; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 4; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 5; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, c2.x, t1, t2 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_LERP; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 3; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[0].SrcRegister.Index = 2; + /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - inst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[2].SrcRegister.Index = 2; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* add o0, t0, t1 ; Add future and differential to form final output */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_ADD; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[1].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* END */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; @@ -2936,8 +1375,8 @@ static int vlInitMC(struct VL_CONTEXT *context) filters[0] = PIPE_TEX_FILTER_NEAREST; filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_NEAREST; - filters[4] = PIPE_TEX_FILTER_NEAREST; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; for (i = 0; i < 5; ++i) { @@ -3037,16 +1476,11 @@ static int vlDestroyMC(struct VL_CONTEXT *context) static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int num_attribs = 2; - const unsigned int semantic_names[2] = {TGSI_SEMANTIC_POSITION, TGSI_SEMANTIC_GENERIC}; - const unsigned int semantic_indexes[2] = {0, 1}; - const unsigned int proc_type = TGSI_PROCESSOR_VERTEX; struct pipe_context *pipe; struct pipe_shader_state vs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -3057,101 +1491,54 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); ti = 3; - /* Declare inputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* Declare outputs (pos, texcoords) */ - for (i = 0; i < num_attribs; i++) + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) { - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = semantic_names[i]; - decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = i; - decl.u.DeclarationRange.Last = i; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* MOV instructions */ - /* mov o0, i0 - mov o1, i1 */ - for (i = 0; i < num_attribs; i++) + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move input texcoords to output + */ + for (i = 0; i < 2; i++) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = i; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = i; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); vs.tokens = tokens; - context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); return 0; @@ -3160,13 +1547,11 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) { const unsigned int max_tokens = 50; - const unsigned int proc_type = TGSI_PROCESSOR_FRAGMENT; struct pipe_context *pipe; struct pipe_shader_state fs; struct tgsi_token *tokens; struct tgsi_header *header; - struct tgsi_processor *processor; struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; @@ -3177,171 +1562,64 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) assert(context); pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ header = (struct tgsi_header*)&tokens[1]; *header = tgsi_build_header(); - /* Processor */ - processor = (struct tgsi_processor*)&tokens[2]; - *processor = tgsi_build_processor(proc_type, header); + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); ti = 3; - /* Declare TEX[0] input */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 1; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare constant input */ - /* Constants include bias vector, 4x4 csc matrix, total 5 vectors */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_CONSTANT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 4; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* Declare sampler */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = 0; - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* TEX instruction */ - /* tex2d t0, i0, s0 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* SUB instruction */ - /* sub t0, t0, c0 */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_SUB; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* DP4 instruction */ - /* dp4 o0.x, t0, c1 - dp4 o0.y, t0, c2 - dp4 o0.z, t0, c3 - dp4 o0.w, t0, c4 */ + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ for (i = 0; i < 4; ++i) { - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_DP4; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.Instruction.NumSrcRegs = 2; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_CONSTANT; - inst.FullSrcRegisters[1].SrcRegister.Index = i + 1; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction - ( - &inst, - &tokens[ti], - header, - max_tokens - ti - ); + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); fs.tokens = tokens; - context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); return 0; diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c new file mode 100644 index 0000000000..fd9de7dab8 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -0,0 +1,205 @@ +#include "vl_shader_build.h" +#include +#include +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = interpolation; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = first; + decl.u.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h new file mode 100644 index 0000000000..9e64bbeeae --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif + -- cgit v1.2.3 From 9833aec6cbd113d24277aa5da8625c1427d831ca Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 30 Jun 2008 19:26:22 -0400 Subject: g3dvl: Use block and stride instead of cpp and pitch. --- src/gallium/state_trackers/g3dvl/vl_context.c | 2 +- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 23 ++++++++--------- src/gallium/state_trackers/g3dvl/vl_surface.c | 29 ++++++++++++---------- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 88da47c06a..bd8743dc9a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1407,7 +1407,7 @@ static int vlInitMC(struct VL_CONTEXT *context) template.height[0] = 8 * 4; template.depth[0] = 1; template.compressed = 0; - template.cpp = 2; + pf_get_block(template.format, &template.block); context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index fd9de7dab8..365ad69725 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -11,8 +11,8 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; - decl.u.DeclarationRange.First = first; - decl.u.DeclarationRange.Last = last; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; return decl; } @@ -39,10 +39,9 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = interpolation; - decl.u.DeclarationRange.First = first; - decl.u.DeclarationRange.Last = last; + decl.Declaration.Interpolate = interpolation;; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; return decl; } @@ -55,8 +54,8 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; - decl.u.DeclarationRange.First = first; - decl.u.DeclarationRange.Last = last; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; return decl; } @@ -69,8 +68,8 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; - decl.u.DeclarationRange.First = first; - decl.u.DeclarationRange.Last = last; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; return decl; } @@ -81,8 +80,8 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = first; - decl.u.DeclarationRange.Last = last; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; return decl; } diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index d2220d7abf..68313cc750 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -98,6 +98,7 @@ static int vlGrabBlocks { struct pipe_surface *tex_surface; short *texels; + unsigned int tex_pitch; unsigned int tb, sb = 0; assert(context); @@ -111,6 +112,7 @@ static int vlGrabBlocks ); texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; for (tb = 0; tb < 4; ++tb) { @@ -121,35 +123,35 @@ static int vlGrabBlocks vlGrabFrameCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, - tex_surface->pitch + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch ); else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, - tex_surface->pitch + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch ); else if (sample_type == VL_FULL_SAMPLE) vlGrabFieldCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, - tex_surface->pitch + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch ); else vlGrabFieldCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_surface->pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_surface->pitch, - tex_surface->pitch + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch ); ++sb; } else - vlGrabNoBlock(texels + tb * tex_surface->pitch * VL_BLOCK_HEIGHT, tex_surface->pitch); + vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); } pipe_surface_unmap(tex_surface); @@ -165,6 +167,7 @@ static int vlGrabBlocks ); texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; if ((coded_block_pattern >> (1 - tb)) & 1) { @@ -173,20 +176,20 @@ static int vlGrabBlocks ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, texels, - tex_surface->pitch + tex_pitch ); else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, texels, - tex_surface->pitch + tex_pitch ); ++sb; } else - vlGrabNoBlock(texels, tex_surface->pitch); + vlGrabNoBlock(texels, tex_pitch); pipe_surface_unmap(tex_surface); } @@ -223,7 +226,7 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) template.height[0] = sfc->height; template.depth[0] = 1; template.compressed = 0; - template.cpp = 4; + pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sfc->texture = pipe->screen->texture_create(pipe->screen, &template); -- cgit v1.2.3 From 49937b99855984dd01a431c026f9308b6c0dac4f Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 3 Jul 2008 20:05:32 -0400 Subject: g3dvl: Round surfaces up to POT, use src rect when outputting surfaces. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 48 +++++++++++++++++++++------ src/gallium/state_trackers/g3dvl/vl_context.h | 2 +- src/gallium/state_trackers/g3dvl/vl_surface.c | 29 +++++++++++++--- src/gallium/state_trackers/g3dvl/vl_types.h | 6 ++++ src/gallium/state_trackers/g3dvl/vl_util.c | 17 ++++++++++ src/gallium/state_trackers/g3dvl/vl_util.h | 7 ++++ 7 files changed, 93 insertions(+), 18 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_util.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_util.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index bd46d004e2..50e3c843b5 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,5 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o +OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index bd8743dc9a..58971bd7c7 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -11,6 +11,7 @@ #include #include "vl_shader_build.h" #include "vl_data.h" +#include "vl_util.h" static int vlInitIDCT(struct VL_CONTEXT *context) { @@ -1357,8 +1358,9 @@ static int vlInitMC(struct VL_CONTEXT *context) pipe = context->pipe; - context->states.mc.viewport.scale[0] = context->video_width; - context->states.mc.viewport.scale[1] = context->video_height; + /* For MC we render to textures, which are rounded up to nearest POT */ + context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); + context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); context->states.mc.viewport.scale[2] = 1; context->states.mc.viewport.scale[3] = 1; context->states.mc.viewport.translate[0] = 0; @@ -1512,6 +1514,13 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* * decl o0 ; Vertex pos @@ -1522,16 +1531,18 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* - * mov o0, i0 ; Move pos in to pos out - * mov o1, i1 ; Move input texcoords to output - */ - for (i = 0; i < 2; i++) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ inst = vl_end(); @@ -1693,6 +1704,19 @@ static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) context->states.csc.vertex_buf_elems[1].nr_components = 2; context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); + context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + context->states.csc.vs_const_buf.size + ); + /* Create our fragment shader's constant buffer Const buffer contains the color conversion matrix and bias vectors @@ -1776,6 +1800,7 @@ static int vlDestroyCSC(struct VL_CONTEXT *context) context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); + context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); return 0; @@ -1986,6 +2011,7 @@ int vlEndRender(struct VL_CONTEXT *context) pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); return 0; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index 8a12318073..9ebda21a1c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -49,7 +49,7 @@ struct VL_CONTEXT struct pipe_shader_state *vertex_shader, *fragment_shader; struct pipe_vertex_buffer vertex_bufs[2]; struct pipe_vertex_element vertex_buf_elems[2]; - struct pipe_constant_buffer fs_const_buf; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; } csc; } states; }; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 68313cc750..13f7301f07 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -7,6 +7,7 @@ #include #include "vl_context.h" #include "vl_defs.h" +#include "vl_util.h" static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) { @@ -194,9 +195,6 @@ static int vlGrabBlocks pipe_surface_unmap(tex_surface); } - /* XXX: Texture cache is not invalidated when texture contents change */ - context->pipe->flush(context->pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL); - return 0; } @@ -214,8 +212,8 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) sfc = calloc(1, sizeof(struct VL_SURFACE)); sfc->context = context; - sfc->width = context->video_width; - sfc->height = context->video_height; + sfc->width = vlRoundUpPOT(context->video_width); + sfc->height = vlRoundUpPOT(context->video_height); sfc->format = context->video_format; memset(&template, 0, sizeof(struct pipe_texture)); @@ -227,6 +225,7 @@ int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); + /* XXX: Needed? */ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; sfc->texture = pipe->screen->texture_create(pipe->screen, &template); @@ -517,6 +516,7 @@ int vlPutSurface { unsigned int create_fb = 0; struct pipe_context *pipe; + struct VL_CSC_VS_CONSTS *vs_consts; assert(surface); @@ -568,9 +568,28 @@ int vlPutSurface vlEndRender(surface->context); + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + surface->context->states.csc.vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->src_scale.x = srcw / (float)surface->width; + vs_consts->src_scale.y = srch / (float)surface->height; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->width; + vs_consts->src_trans.y = srcy / (float)surface->height; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer); + pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + /* XXX: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 97753699db..4d210c9e0a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -75,6 +75,12 @@ struct VL_MC_FS_CONSTS struct VL_VERTEX4F y_divider; }; +struct VL_CSC_VS_CONSTS +{ + struct VL_VERTEX4F src_scale; + struct VL_VERTEX4F src_trans; +}; + struct VL_CSC_FS_CONSTS { struct VL_VERTEX4F bias; diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c new file mode 100644 index 0000000000..2421ae2210 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -0,0 +1,17 @@ +#include "vl_util.h" +#include + +unsigned int vlRoundUpPOT(unsigned int x) +{ + unsigned int i; + + assert(x > 0); + + --x; + + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) + x |= x >> i; + + return x + 1; +} + diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h new file mode 100644 index 0000000000..e4b72c4f87 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -0,0 +1,7 @@ +#ifndef vl_util_h +#define vl_util_h + +unsigned int vlRoundUpPOT(unsigned int x); + +#endif + -- cgit v1.2.3 From 6235141fd2c7af21c2b41ca66f06abc3cb0bbc24 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 6 Jul 2008 22:04:29 -0400 Subject: g3dvl: IDCT part 1. Very basic IDCT support is in, performed CPU-side for now. --- src/gallium/state_trackers/g3dvl/vl_context.c | 241 +++++++++++++++++++++++++- src/gallium/state_trackers/g3dvl/vl_context.h | 10 ++ src/gallium/state_trackers/g3dvl/vl_surface.c | 136 ++++++++++++++- 3 files changed, 379 insertions(+), 8 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 58971bd7c7..1668ad1651 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -11,22 +11,262 @@ #include #include "vl_shader_build.h" #include "vl_data.h" +#include "vl_defs.h" #include "vl_util.h" +static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move pos in to pos out + * mov o1, i1 ; Move texcoord in to texcoord out */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = context->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + static int vlInitIDCT(struct VL_CONTEXT *context) { + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int i; + assert(context); + pipe = context->pipe; + + context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; + context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; + context->states.idct.viewport.scale[2] = 1; + context->states.idct.viewport.scale[3] = 1; + context->states.idct.viewport.translate[0] = 0; + context->states.idct.viewport.translate[1] = 0; + context->states.idct.viewport.translate[2] = 0; + context->states.idct.viewport.translate[3] = 0; + + context->states.idct.render_target.width = VL_BLOCK_WIDTH; + context->states.idct.render_target.height = VL_BLOCK_HEIGHT; + context->states.idct.render_target.num_cbufs = 1; + context->states.idct.render_target.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8L8_UNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); + + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.width[0] = 16; + template.height[0] = 1; + context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); + + for (i = 0; i < 2; ++i) + { + context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; + context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; + /* + context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); + context->states.idct.vertex_bufs[i].max_index = 3; + context->states.idct.vertex_bufs[i].buffer_offset = 0; + context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct VL_VERTEX2F) * 4 + ); + + context->states.idct.vertex_buf_elems[i].src_offset = 0; + context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; + context->states.idct.vertex_buf_elems[i].nr_components = 2; + context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + */ + } + + vlCreateVertexShaderFrameIDCT(context); + vlCreateFragmentShaderFrameIDCT(context); return 0; } static int vlDestroyIDCT(struct VL_CONTEXT *context) { + //unsigned int i; + assert(context); + context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); + + //for (i = 0; i < 2; ++i) + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); + + pipe_texture_release(&context->states.idct.texture); + pipe_texture_release(&context->states.idct.basis); + + //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); + //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); + //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); return 0; } @@ -1271,7 +1511,6 @@ int vlCreateDataBufsMC(struct VL_CONTEXT *context) context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Create our texcoord buffers and texcoord buffer elements */ - /* TODO: Should be able to use 1 texcoord buf for chroma textures, 1 buf for ref surfaces */ for (i = 1; i < 3; ++i) { context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index 9ebda21a1c..bff318854a 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -26,6 +26,16 @@ struct VL_CONTEXT struct { + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *sampler; + struct pipe_texture *texture; + struct pipe_texture *basis; + struct pipe_shader_state *frame_vs; + struct pipe_shader_state *frame_fs; + struct pipe_vertex_buffer *vertex_bufs[2]; + struct pipe_vertex_element *vertex_buf_elems[2]; + //struct pipe_constant_buffer vs_const_buf, fs_const_buf; } idct; struct diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 13f7301f07..145ea32892 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -9,6 +9,59 @@ #include "vl_defs.h" #include "vl_util.h" +static int vlTransformBlock(short *src, short *dst, short bias) +{ + static const float basis[8][8] = + { + {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, + {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, + {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, + {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, + {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, + {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, + {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, + {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} + }; + + unsigned int x, y; + short tmp[64]; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + tmp[y * VL_BLOCK_WIDTH + x] = (short) + ( + src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + + src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + + src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + + src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + + src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + + src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + + src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + + src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] + ); + + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + { + dst[y * VL_BLOCK_WIDTH + x] = bias + (short) + ( + tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + + tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + + tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + + tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + + tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + + tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + + tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + + tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] + ); + if (dst[y * VL_BLOCK_WIDTH + x] > 255) + dst[y * VL_BLOCK_WIDTH + x] = 255; + else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) + dst[y * VL_BLOCK_WIDTH + x] = 0; + } + return 0; +} + static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -102,6 +155,9 @@ static int vlGrabBlocks unsigned int tex_pitch; unsigned int tb, sb = 0; + const int do_idct = 1; + short temp_block[64]; + assert(context); assert(blocks); @@ -121,6 +177,17 @@ static int vlGrabBlocks { if (dct_type == VL_DCT_FRAME_CODED) if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else vlGrabFrameCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -128,6 +195,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + } + else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -136,6 +214,17 @@ static int vlGrabBlocks ); else if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFieldCodedFullBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else vlGrabFieldCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -143,6 +232,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFieldCodedDiffBlock + ( + temp_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + } + else vlGrabFieldCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -173,6 +273,17 @@ static int vlGrabBlocks if ((coded_block_pattern >> (1 - tb)) & 1) { if (sample_type == VL_FULL_SAMPLE) + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + vlGrabFrameCodedFullBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else vlGrabFrameCodedFullBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -180,6 +291,17 @@ static int vlGrabBlocks tex_pitch ); else + if (do_idct) + { + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + vlGrabFrameCodedDiffBlock + ( + temp_block, + texels, + tex_pitch + ); + } + else vlGrabFrameCodedDiffBlock ( blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, @@ -266,6 +388,8 @@ int vlRenderIMacroBlock if (picture_type != VL_FRAME_PICTURE) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -298,8 +422,6 @@ int vlRenderIMacroBlock pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -335,6 +457,8 @@ int vlRenderPMacroBlock if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -390,8 +514,6 @@ int vlRenderPMacroBlock pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -428,6 +550,8 @@ int vlRenderBMacroBlock if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) return 0; + vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); + pipe = surface->context->pipe; vs_consts = pipe->winsys->buffer_map @@ -492,8 +616,6 @@ int vlRenderBMacroBlock pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); return 0; @@ -589,7 +711,7 @@ int vlPutSurface pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - /* XXX: Need to take destx, desty into consideration */ + /* TODO: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, -- cgit v1.2.3 From c243573fafe8e83d4964535b201c499164d7c172 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 19 Jul 2008 16:22:56 -0400 Subject: g3dvl: Fix some memory leaks in the winsys. --- src/gallium/state_trackers/g3dvl/vl_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 1668ad1651..3d4ca7cf4e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -2208,8 +2208,6 @@ int vlDestroyContext(struct VL_CONTEXT *context) vlDestroy(context); - context->pipe->destroy(context->pipe); - free(context); return 0; -- cgit v1.2.3 From 0c25ac52425e6d6eb037b99ab90f41b47e3f4491 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Tue, 22 Jul 2008 22:26:26 -0400 Subject: g3dvl: Add Nouveau winsys, libdriclient. Nouveau winsys is based on Mesa's Nouveau winsys and soft-links to most of it. The 'nouveau_context' and 'nouveau_screen' code contains most of the changes, 'nouveau_winsys_pipe', 'nouveau_swapbuffers' and 'nouveau_lock' contain some minor changes. The driclient library contains the DRI userland stuff, most of which was based on Mesa's DRI code. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 68 ++++++++++++++++++---- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 12 ++++ src/gallium/state_trackers/g3dvl/vl_shader_build.h | 1 + src/gallium/state_trackers/g3dvl/vl_surface.c | 4 +- 5 files changed, 73 insertions(+), 14 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 50e3c843b5..c6a22cad4e 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -2,7 +2,7 @@ TARGET = libg3dvl.a OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary +CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 3d4ca7cf4e..850a769376 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -330,6 +330,10 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -354,7 +358,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -432,7 +436,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -499,6 +503,10 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -527,7 +535,7 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -603,6 +611,10 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -643,7 +655,7 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -700,6 +712,10 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -761,7 +777,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -821,6 +837,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -928,7 +948,7 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -998,6 +1018,10 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1030,7 +1054,7 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1104,6 +1128,10 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1148,7 +1176,7 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1207,6 +1235,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -1283,7 +1315,7 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -1346,6 +1378,10 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -1479,7 +1515,7 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } @@ -1771,6 +1807,10 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* mov o0, i0 ; Move pos in to pos out */ inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -1789,7 +1829,7 @@ static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); + //free(tokens); return 0; } @@ -1839,6 +1879,10 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl s0 ; Sampler for tex containing picture to display */ decl = vl_decl_samplers(0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1870,7 +1914,7 @@ static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); + //free(tokens); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index 365ad69725..1dc5be6fdb 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -74,6 +74,18 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde return decl; } +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h index 9e64bbeeae..878d7e2c45 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.h +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -14,6 +14,7 @@ struct tgsi_full_declaration vl_decl_interpolated_input ); struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); struct tgsi_full_instruction vl_inst2 ( diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 145ea32892..9b91ab4e22 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "vl_context.h" #include "vl_defs.h" #include "vl_util.h" @@ -711,12 +712,13 @@ int vlPutSurface pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + bind_pipe_drawable(pipe, drawable); /* TODO: Need to take destx, desty into consideration */ pipe->winsys->flush_frontbuffer ( pipe->winsys, surface->context->states.csc.framebuffer.cbufs[0], - &drawable + pipe->priv ); vlBeginRender(surface->context); -- cgit v1.2.3 From a8da04cb861b8f9caf3acd33f52f64621f0c15e2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 23 Jul 2008 23:35:23 -0400 Subject: nv all: Copy shader tokens on create, free on delete. Must copy token stream on shader create, client is allowed to free their copy after creating the state object. --- src/gallium/state_trackers/g3dvl/vl_context.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 850a769376..638900b3f4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -358,7 +358,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -436,7 +436,7 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -535,7 +535,7 @@ static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -655,7 +655,7 @@ static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -777,7 +777,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -948,7 +948,7 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -1054,7 +1054,7 @@ static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -1176,7 +1176,7 @@ static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) vs.tokens = tokens; context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - //free(tokens); + free(tokens); return 0; } @@ -1315,7 +1315,7 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } @@ -1515,7 +1515,7 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) fs.tokens = tokens; context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - //free(tokens); + free(tokens); return 0; } -- cgit v1.2.3 From 1d27b4bc0f291ec955e59b1ead943100d8a15505 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 30 Jul 2008 16:47:10 -0400 Subject: g3dvl: Use R16_SNORM instead of A8L8_UNORM for block rendering. --- src/gallium/state_trackers/g3dvl/vl_context.c | 248 ++++++++++++-------------- src/gallium/state_trackers/g3dvl/vl_data.c | 3 +- src/gallium/state_trackers/g3dvl/vl_surface.c | 178 ++++-------------- 3 files changed, 153 insertions(+), 276 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 638900b3f4..542ba996a7 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -365,7 +365,7 @@ static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) { - const unsigned int max_tokens = 50; + const unsigned int max_tokens = 100; struct pipe_context *pipe; struct pipe_shader_state fs; @@ -402,11 +402,19 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture @@ -419,16 +427,30 @@ static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) } /* - * tex2d o0.x, i0, s0 ; Read texel from luma texture into .x channel - * tex2d o0.y, i1, s1 ; Read texel from chroma Cb texture into .y channel - * tex2d o0.z, i1, s2 ; Read texel from chroma Cr texture into .z channel - */ + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ inst = vl_end(); @@ -701,10 +723,7 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - */ + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -729,38 +748,29 @@ static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ @@ -826,11 +836,10 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -854,42 +863,35 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i2, s3 ; Read texel from ref macroblock top field - tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field */ + /* + * tex2d t1, i2, s3 ; Read texel from ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field + */ for (i = 0; i < 2; ++i) { inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); @@ -897,8 +899,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) } /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c2.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_INPUT, 2); + /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -909,8 +911,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -921,8 +923,8 @@ static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1224,11 +1226,10 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -1253,38 +1254,29 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* @@ -1297,8 +1289,8 @@ static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1366,12 +1358,11 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) } /* - * decl c0 ; Multiplier to shift 9th bit of differential into place - * decl c1 ; Bias to get differential back to a signed value - * decl c2 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels * ; and for Y-mod-2 top/bottom field selection */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 2); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* decl o0 ; Fragment color */ @@ -1396,43 +1387,34 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) } /* - * tex2d t0.xw, i0, s0 ; Read texel from luma texture into .x and .w channels - * mov t1.x, t0.w ; Move 9th bit from .w channel to .x - * tex2d t0.yw, i1, s1 ; Read texel from chroma Cb texture into .y and .w channels - * mov t1.y, t0.w ; Move 9th bit from .w channel to .y - * tex2d t0.zw, i1, s2 ; Read texel from chroma Cr texture into .z and .w channels - * mov t1.z, t0.w ; Move 9th bit from .w channel to .z + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - inst.FullDstRegisters[0].DstRegister.WriteMask = (TGSI_WRITEMASK_X << i) | TGSI_WRITEMASK_W; + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } - /* mul t1, t1, c0 ; Muliply 9th bit by multiplier to shift it into place */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t0, t0, t1 ; Add luma and chroma low and high parts to get normalized unsigned 9-bit values */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c1 ; Subtract bias to get back signed values */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c2.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 2); + /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1443,8 +1425,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t4, c2.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 2); + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1455,8 +1437,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* mul t3, t3, c2.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 2); + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1497,8 +1479,8 @@ static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* lerp t1, c2.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; @@ -1678,7 +1660,7 @@ static int vlInitMC(struct VL_CONTEXT *context) memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_A8L8_UNORM; + template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; template.width[0] = 8; template.height[0] = 8 * 4; diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 7e6ee8ac12..0e5c8c77f9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -86,8 +86,7 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) */ const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { - {256.0f, 256.0f, 256.0f, 0.0f}, - {256.0f / 255.0f, 256.0f / 255.0f, 256.0f / 255.0f, 0.0f}, + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} }; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 9b91ab4e22..6d4e14b95c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -63,7 +63,7 @@ static int vlTransformBlock(short *src, short *dst, short bias) return 0; } -static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -78,18 +78,7 @@ static int vlGrabFrameCodedFullBlock(short *src, short *dst, unsigned int dst_pi return 0; } -static int vlGrabFrameCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int x, y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - return 0; -} - -static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pitch) +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -114,30 +103,17 @@ static int vlGrabFieldCodedFullBlock(short *src, short *dst, unsigned int dst_pi return 0; } -static int vlGrabFieldCodedDiffBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int x, y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch * 2 + x] = src[y * VL_BLOCK_WIDTH + x] + 0x100; - - return 0; -} - static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) { - unsigned int x, y; + unsigned int y; for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - dst[y * dst_pitch + x] = 0x100; + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); return 0; } @@ -156,7 +132,6 @@ static int vlGrabBlocks unsigned int tex_pitch; unsigned int tb, sb = 0; - const int do_idct = 1; short temp_block[64]; assert(context); @@ -176,80 +151,26 @@ static int vlGrabBlocks { if ((coded_block_pattern >> (5 - tb)) & 1) { + if (sample_type == VL_FULL_SAMPLE) + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); + else + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + if (dct_type == VL_DCT_FRAME_CODED) - if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFrameCodedFullBlock - ( - temp_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - } - else - vlGrabFrameCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFrameCodedDiffBlock - ( - temp_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - } - else - vlGrabFrameCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); + vlGrabFrameCodedBlock + ( + temp_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); else - if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFieldCodedFullBlock - ( - temp_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - } - else - vlGrabFieldCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFieldCodedDiffBlock - ( - temp_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - } - else - vlGrabFieldCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); + vlGrabFieldCodedBlock + ( + blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + ++sb; } else @@ -272,43 +193,18 @@ static int vlGrabBlocks tex_pitch = tex_surface->stride / tex_surface->block.size; if ((coded_block_pattern >> (1 - tb)) & 1) - { + { if (sample_type == VL_FULL_SAMPLE) - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - vlGrabFrameCodedFullBlock - ( - temp_block, - texels, - tex_pitch - ); - } - else - vlGrabFrameCodedFullBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels, - tex_pitch - ); + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); else - if (do_idct) - { - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); - vlGrabFrameCodedDiffBlock - ( - temp_block, - texels, - tex_pitch - ); - } - else - vlGrabFrameCodedDiffBlock - ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, - texels, - tex_pitch - ); + vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + + vlGrabFrameCodedBlock + ( + temp_block, + texels, + tex_pitch + ); ++sb; } -- cgit v1.2.3 From 7fde9febd6f212494730ebef916fe25c95d30be9 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 3 Aug 2008 18:21:13 -0400 Subject: g3dvl: Temporarily disable IDCT. --- src/gallium/state_trackers/g3dvl/vl_context.c | 4 ++-- src/gallium/state_trackers/g3dvl/vl_surface.c | 30 +++++++++++++++------------ 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 542ba996a7..d446d218a4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1625,8 +1625,8 @@ static int vlInitMC(struct VL_CONTEXT *context) context->states.mc.viewport.translate[2] = 0; context->states.mc.viewport.translate[3] = 0; - context->states.mc.render_target.width = context->video_width; - context->states.mc.render_target.height = context->video_height; + context->states.mc.render_target.width = vlRoundUpPOT(context->video_width); + context->states.mc.render_target.height = vlRoundUpPOT(context->video_height); context->states.mc.render_target.num_cbufs = 1; /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ context->states.mc.render_target.zsbuf = NULL; diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 6d4e14b95c..1386b1107c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -10,6 +10,9 @@ #include "vl_defs.h" #include "vl_util.h" +/*#define DO_IDCT*/ + +#ifdef DO_IDCT static int vlTransformBlock(short *src, short *dst, short bias) { static const float basis[8][8] = @@ -62,6 +65,7 @@ static int vlTransformBlock(short *src, short *dst, short bias) } return 0; } +#endif static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) { @@ -132,8 +136,6 @@ static int vlGrabBlocks unsigned int tex_pitch; unsigned int tb, sb = 0; - short temp_block[64]; - assert(context); assert(blocks); @@ -151,22 +153,23 @@ static int vlGrabBlocks { if ((coded_block_pattern >> (5 - tb)) & 1) { - if (sample_type == VL_FULL_SAMPLE) - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - else - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); +#endif if (dct_type == VL_DCT_FRAME_CODED) vlGrabFrameCodedBlock ( - temp_block, + cur_block, texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch ); else vlGrabFieldCodedBlock ( - blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, + cur_block, texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, tex_pitch ); @@ -194,14 +197,15 @@ static int vlGrabBlocks if ((coded_block_pattern >> (1 - tb)) & 1) { - if (sample_type == VL_FULL_SAMPLE) - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 128); - else - vlTransformBlock(blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT, temp_block, 0); + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); +#endif vlGrabFrameCodedBlock ( - temp_block, + cur_block, texels, tex_pitch ); -- cgit v1.2.3 From fb2732ff056a83418a379b81c1e5da50675c41ac Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 9 Aug 2008 20:52:36 -0400 Subject: g3dvl: Merged tgsi/util and tgsi/exec moved some headers around. --- src/gallium/state_trackers/g3dvl/vl_context.c | 4 ++-- src/gallium/state_trackers/g3dvl/vl_shader_build.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index d446d218a4..5616de0ba4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -7,8 +7,8 @@ #include #include #include -#include -#include +#include +#include #include "vl_shader_build.h" #include "vl_data.h" #include "vl_defs.h" diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index 1dc5be6fdb..5f30e23ff8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -1,7 +1,7 @@ #include "vl_shader_build.h" #include -#include -#include +#include +#include struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) { -- cgit v1.2.3 From 6858dd50c9b696c1c6044f5a403000f9d20b286b Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 16 Aug 2008 13:04:23 -0400 Subject: g3dvl: Modularized rendering, refactored to accommodate VAAPI, other APIs. --- src/gallium/state_trackers/g3dvl/Makefile | 4 +- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 694 ++++++ src/gallium/state_trackers/g3dvl/vl_basic_csc.h | 13 + src/gallium/state_trackers/g3dvl/vl_context.c | 2272 +------------------ src/gallium/state_trackers/g3dvl/vl_context.h | 118 +- src/gallium/state_trackers/g3dvl/vl_csc.h | 53 + src/gallium/state_trackers/g3dvl/vl_data.c | 130 +- src/gallium/state_trackers/g3dvl/vl_data.h | 19 +- src/gallium/state_trackers/g3dvl/vl_defs.h | 1 - src/gallium/state_trackers/g3dvl/vl_display.c | 48 + src/gallium/state_trackers/g3dvl/vl_display.h | 29 + src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 2315 ++++++++++++++++++++ src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 18 + src/gallium/state_trackers/g3dvl/vl_render.h | 33 + src/gallium/state_trackers/g3dvl/vl_screen.c | 115 + src/gallium/state_trackers/g3dvl/vl_screen.h | 63 + src/gallium/state_trackers/g3dvl/vl_shader_build.c | 37 +- src/gallium/state_trackers/g3dvl/vl_shader_build.h | 1 - src/gallium/state_trackers/g3dvl/vl_surface.c | 671 +----- src/gallium/state_trackers/g3dvl/vl_surface.h | 91 +- src/gallium/state_trackers/g3dvl/vl_types.h | 124 +- src/gallium/state_trackers/g3dvl/vl_util.c | 9 +- src/gallium/state_trackers/g3dvl/vl_util.h | 1 - 23 files changed, 3794 insertions(+), 3065 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_basic_csc.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_basic_csc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_csc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_display.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_display.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_render.h create mode 100644 src/gallium/state_trackers/g3dvl/vl_screen.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_screen.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index c6a22cad4e..9995c554ab 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,5 +1,6 @@ TARGET = libg3dvl.a -OBJECTS = vl_context.o vl_data.o vl_surface.o vl_shader_build.o vl_util.o +OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_data.o vl_shader_build.o vl_util.o vl_basic_csc.o \ + vl_r16snorm_mc.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl @@ -15,4 +16,3 @@ ${TARGET}: ${OBJECTS} clean: rm -rf ${OBJECTS} ${TARGET} - diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c new file mode 100644 index 0000000000..ea003a31d1 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -0,0 +1,694 @@ +#define VL_INTERNAL +#include "vl_basic_csc.h" +#include +#include +#include +#include +#include +#include +#include +#include "vl_csc.h" +#include "vl_surface.h" +#include "vl_shader_build.h" +#include "vl_types.h" + +struct vlVertexShaderConsts +{ + struct vlVertex4f src_scale; + struct vlVertex4f src_trans; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f bias; + float matrix[16]; +}; + +struct vlBasicCSC +{ + struct vlCSC base; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + void *sampler; + void *vertex_shader, *fragment_shader; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +static int vlResizeFrameBuffer +( + struct vlCSC *csc, + unsigned int width, + unsigned int height +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height) + return 0; + + if (basic_csc->framebuffer.cbufs[0]) + pipe->winsys->surface_release + ( + pipe->winsys, + &basic_csc->framebuffer.cbufs[0] + ); + + basic_csc->viewport.scale[0] = width; + basic_csc->viewport.scale[1] = height; + basic_csc->viewport.scale[2] = 1; + basic_csc->viewport.scale[3] = 1; + basic_csc->viewport.translate[0] = 0; + basic_csc->viewport.translate[1] = 0; + basic_csc->viewport.translate[2] = 0; + basic_csc->viewport.translate[3] = 0; + + basic_csc->framebuffer.width = width; + basic_csc->framebuffer.height = height; + basic_csc->framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); + pipe->winsys->surface_alloc_storage + ( + pipe->winsys, + basic_csc->framebuffer.cbufs[0], + width, + height, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ + PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, + 0 + ); + + return 0; +} + +static int vlBegin +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer); + pipe->set_viewport_state(pipe, &basic_csc->viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler); + /* Source texture set in vlPutSurface() */ + pipe->bind_vs_state(pipe, basic_csc->vertex_shader); + pipe->bind_fs_state(pipe, basic_csc->fragment_shader); + pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs); + pipe->set_vertex_elements(pipe, 2, basic_csc->vertex_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &basic_csc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &basic_csc->fs_const_buf); + + return 0; +} + +static int vlPutPictureCSC +( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(csc); + assert(surface); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + basic_csc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->src_scale.x = srcw / (float)surface->texture->width[0]; + vs_consts->src_scale.y = srch / (float)surface->texture->height[0]; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->texture->width[0]; + vs_consts->src_trans.y = srcy / (float)surface->texture->height[0]; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe->winsys->buffer_unmap(pipe->winsys, basic_csc->vs_const_buf.buffer); + + pipe->set_sampler_textures(pipe, 1, &surface->texture); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + + return 0; +} + +static int vlEnd +( + struct vlCSC *csc +) +{ + assert(csc); + + return 0; +} + +static struct pipe_surface* vlGetFrameBuffer +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + + return basic_csc->framebuffer.cbufs[0]; +} + +static int vlDestroy +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + unsigned int i; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer.cbufs[0]) + pipe->winsys->surface_release + ( + pipe->winsys, + &basic_csc->framebuffer.cbufs[0] + ); + + pipe->delete_sampler_state(pipe, basic_csc->sampler); + pipe->delete_vs_state(pipe, basic_csc->vertex_shader); + pipe->delete_fs_state(pipe, basic_csc->fragment_shader); + + for (i = 0; i < 2; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vertex_bufs[i].buffer); + + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); + + free(basic_csc); + + return 0; +} + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vlVertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vlVertex2f *surface_texcoords = surface_verts; + +/* + * Identity color conversion constants, for debugging + */ +static const struct vlFragmentShaderConsts identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const struct vlFragmentShaderConsts bt_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct vlFragmentShaderConsts bt_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +static int vlCreateVertexShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale texcoord rect to source size + * decl c1 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mov o0, i0 ; Move pos in to pos out */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o1, t0, c1 ; Translate texcoord rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + csc->vertex_shader = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + csc->fragment_shader = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateDataBufs +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + + assert(csc); + + pipe = csc->pipe; + + /* + Create our vertex buffer and vertex buffer element + VB contains 4 vertices that render a quad covering the entire window + to display a rendered surface + Quad is rendered as a tri strip + */ + csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[0].max_index = 3; + csc->vertex_bufs[0].buffer_offset = 0; + csc->vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vlVertex2f) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[0].buffer); + + csc->vertex_elems[0].src_offset = 0; + csc->vertex_elems[0].vertex_buffer_index = 0; + csc->vertex_elems[0].nr_components = 2; + csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our texcoord buffer and texcoord buffer element + Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[1].max_index = 3; + csc->vertex_bufs[1].buffer_offset = 0; + csc->vertex_bufs[1].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vlVertex2f) * 4 + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[1].buffer); + + csc->vertex_elems[1].src_offset = 0; + csc->vertex_elems[1].vertex_buffer_index = 1; + csc->vertex_elems[1].nr_components = 2; + csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + Create our vertex shader's constant buffer + Const buffer contains scaling and translation vectors + */ + csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + csc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + csc->vs_const_buf.size + ); + + /* + Create our fragment shader's constant buffer + Const buffer contains the color conversion matrix and bias vectors + */ + csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + csc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + csc->fs_const_buf.size + ); + + /* + TODO: Refactor this into a seperate function, + allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &bt_601, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, csc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(csc); + + pipe = csc->pipe; + + /* Delay creating the FB until vlPutSurface() so we know window size */ + csc->framebuffer.num_cbufs = 1; + csc->framebuffer.cbufs[0] = NULL; + csc->framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + csc->sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShader(csc); + vlCreateFragmentShader(csc); + vlCreateDataBufs(csc); + + return 0; +} + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(pipe); + assert(csc); + + basic_csc = calloc(1, sizeof(struct vlBasicCSC)); + + if (!basic_csc) + return 1; + + basic_csc->base.vlResizeFrameBuffer = &vlResizeFrameBuffer; + basic_csc->base.vlBegin = &vlBegin; + basic_csc->base.vlPutPicture = &vlPutPictureCSC; + basic_csc->base.vlEnd = &vlEnd; + basic_csc->base.vlGetFrameBuffer = &vlGetFrameBuffer; + basic_csc->base.vlDestroy = &vlDestroy; + basic_csc->pipe = pipe; + + vlInit(basic_csc); + + *csc = &basic_csc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.h b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h new file mode 100644 index 0000000000..2e17f1d814 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h @@ -0,0 +1,13 @@ +#ifndef vl_basic_csc_h +#define vl_basic_csc_h + +struct pipe_context; +struct vlCSC; + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 5616de0ba4..56d360c05b 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1,2088 +1,26 @@ +#define VL_INTERNAL #include "vl_context.h" #include #include #include -#include -#include #include -#include -#include -#include -#include -#include "vl_shader_build.h" -#include "vl_data.h" -#include "vl_defs.h" -#include "vl_util.h" +#include "vl_render.h" +#include "vl_r16snorm_mc.h" +#include "vl_csc.h" +#include "vl_basic_csc.h" -static int vlCreateVertexShaderFrameIDCT(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move pos in to pos out - * mov o1, i1 ; Move texcoord in to texcoord out */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - //context->states.idct.frame_vs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameIDCT(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* decl i0 ; Texcoords for s0 */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl s0 ; Sampler for tex containing picture to display */ - decl = vl_decl_samplers(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* tex2d t0, i0, s0 ; Read src pixel */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c2 - * dp4 o0.z, t0, c3 - * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - //context->states.idct.frame_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlInitIDCT(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - context->states.idct.viewport.scale[0] = VL_BLOCK_WIDTH; - context->states.idct.viewport.scale[1] = VL_BLOCK_HEIGHT; - context->states.idct.viewport.scale[2] = 1; - context->states.idct.viewport.scale[3] = 1; - context->states.idct.viewport.translate[0] = 0; - context->states.idct.viewport.translate[1] = 0; - context->states.idct.viewport.translate[2] = 0; - context->states.idct.viewport.translate[3] = 0; - - context->states.idct.render_target.width = VL_BLOCK_WIDTH; - context->states.idct.render_target.height = VL_BLOCK_HEIGHT; - context->states.idct.render_target.num_cbufs = 1; - context->states.idct.render_target.zsbuf = NULL; - - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.idct.sampler = pipe->create_sampler_state(pipe, &sampler); - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_A8L8_UNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - context->states.idct.texture = pipe->screen->texture_create(pipe->screen, &template); - - template.format = PIPE_FORMAT_A8R8G8B8_UNORM; - template.width[0] = 16; - template.height[0] = 1; - - context->states.idct.basis = pipe->screen->texture_create(pipe->screen, &template); - - for (i = 0; i < 2; ++i) - { - context->states.idct.vertex_bufs[i] = &context->states.csc.vertex_bufs[i]; - context->states.idct.vertex_buf_elems[i] = &context->states.csc.vertex_buf_elems[i]; - /* - context->states.idct.vertex_bufs[i].pitch = sizeof(struct VL_VERTEX2F); - context->states.idct.vertex_bufs[i].max_index = 3; - context->states.idct.vertex_bufs[i].buffer_offset = 0; - context->states.idct.vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 4 - ); - - context->states.idct.vertex_buf_elems[i].src_offset = 0; - context->states.idct.vertex_buf_elems[i].vertex_buffer_index = i; - context->states.idct.vertex_buf_elems[i].nr_components = 2; - context->states.idct.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - */ - } - - vlCreateVertexShaderFrameIDCT(context); - vlCreateFragmentShaderFrameIDCT(context); - - return 0; -} - -static int vlDestroyIDCT(struct VL_CONTEXT *context) -{ - //unsigned int i; - - assert(context); - - context->pipe->delete_sampler_state(context->pipe, context->states.idct.sampler); - - //for (i = 0; i < 2; ++i) - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vertex_bufs[i].buffer); - - pipe_texture_release(&context->states.idct.texture); - pipe_texture_release(&context->states.idct.basis); - - //context->pipe->delete_vs_state(context->pipe, context->states.idct.frame_vs); - //context->pipe->delete_fs_state(context->pipe, context->states.idct.frame_fs); - - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.vs_const_buf.buffer); - //context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.idct.fs_const_buf.buffer); - - return 0; -} - -static int vlCreateVertexShaderIMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - */ - for (i = 0; i < 2; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field ref macroblock texcoords - * decl o4 ; Bottom field ref macroblock texcoords - * decl o5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - mov o1, i1 ; Move input luma texcoords to output - mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock - add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o5, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Denormalized vertex pos - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move past ref macroblock texcoords into position - * decl c4 ; Unused - * decl c5 ; Translation vector to move future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Past ref macroblock texcoords - * decl o4 ; Future ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on past ref macroblock - add o4, t0, c5 ; Translate rect into position on future ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position - * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position - * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field past ref macroblock texcoords - * decl o4 ; Bottom field past ref macroblock texcoords - * decl o5 ; Top field future ref macroblock texcoords - * decl o6 ; Bottom field future ref macroblock texcoords - * decl o7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock - * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock - * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock - * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o7, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.mc.b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock - * tex2d t2, i3, s4 ; Read texel from future ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Texcoords for s4 - * decl i5 ; Texcoords for s4 - * decl i6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.mc.b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -int vlCreateDataBufsMC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - /* Create our vertex buffer and vertex buffer element */ - context->states.mc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); - context->states.mc.vertex_bufs[0].max_index = 23; - context->states.mc.vertex_bufs[0].buffer_offset = 0; - context->states.mc.vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 24 - ); - - context->states.mc.vertex_buf_elems[0].src_offset = 0; - context->states.mc.vertex_buf_elems[0].vertex_buffer_index = 0; - context->states.mc.vertex_buf_elems[0].nr_components = 2; - context->states.mc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Create our texcoord buffers and texcoord buffer elements */ - for (i = 1; i < 3; ++i) - { - context->states.mc.vertex_bufs[i].pitch = sizeof(struct VL_TEXCOORD2F); - context->states.mc.vertex_bufs[i].max_index = 23; - context->states.mc.vertex_bufs[i].buffer_offset = 0; - context->states.mc.vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - - context->states.mc.vertex_buf_elems[i].src_offset = 0; - context->states.mc.vertex_buf_elems[i].vertex_buffer_index = i; - context->states.mc.vertex_buf_elems[i].nr_components = 2; - context->states.mc.vertex_buf_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - } - - /* Fill buffers */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_chroma_420_texcoords, - sizeof(struct VL_VERTEX2F) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_luma_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - /* TODO: Accomodate 422, 444 */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_chroma_420_texcoords, - sizeof(struct VL_TEXCOORD2F) * 24 - ); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.vertex_bufs[i].buffer); - - /* Create our constant buffer */ - context->states.mc.vs_const_buf.size = sizeof(struct VL_MC_VS_CONSTS); - context->states.mc.vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.mc.vs_const_buf.size - ); - - context->states.mc.fs_const_buf.size = sizeof(struct VL_MC_FS_CONSTS); - context->states.mc.fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.mc.fs_const_buf.size - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.mc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &vl_mc_fs_consts, - sizeof(struct VL_MC_FS_CONSTS) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.mc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitMC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int filters[5]; - unsigned int i; - - assert(context); - - pipe = context->pipe; - - /* For MC we render to textures, which are rounded up to nearest POT */ - context->states.mc.viewport.scale[0] = vlRoundUpPOT(context->video_width); - context->states.mc.viewport.scale[1] = vlRoundUpPOT(context->video_height); - context->states.mc.viewport.scale[2] = 1; - context->states.mc.viewport.scale[3] = 1; - context->states.mc.viewport.translate[0] = 0; - context->states.mc.viewport.translate[1] = 0; - context->states.mc.viewport.translate[2] = 0; - context->states.mc.viewport.translate[3] = 0; - - context->states.mc.render_target.width = vlRoundUpPOT(context->video_width); - context->states.mc.render_target.height = vlRoundUpPOT(context->video_height); - context->states.mc.render_target.num_cbufs = 1; - /* FB for MC stage is a VL_SURFACE, set in vlSetRenderSurface() */ - context->states.mc.render_target.zsbuf = NULL; - - filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = context->video_format == VL_FORMAT_YCBCR_444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) - { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.mc.samplers[i] = pipe->create_sampler_state(pipe, &sampler); - } - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8 * 4; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - context->states.mc.textures[0] = pipe->screen->texture_create(pipe->screen, &template); - - if (context->video_format == VL_FORMAT_YCBCR_420) - template.height[0] = 8; - else if (context->video_format == VL_FORMAT_YCBCR_422) - template.height[0] = 8 * 2; - else if (context->video_format == VL_FORMAT_YCBCR_444) - template.height[0] = 8 * 4; - else - assert(0); - - context->states.mc.textures[1] = pipe->screen->texture_create(pipe->screen, &template); - context->states.mc.textures[2] = pipe->screen->texture_create(pipe->screen, &template); - - /* textures[3] & textures[4] are assigned from VL_SURFACEs for P and B macroblocks at render time */ - - vlCreateVertexShaderIMC(context); - vlCreateFragmentShaderIMC(context); - vlCreateVertexShaderFramePMC(context); - vlCreateVertexShaderFieldPMC(context); - vlCreateFragmentShaderFramePMC(context); - vlCreateFragmentShaderFieldPMC(context); - vlCreateVertexShaderFrameBMC(context); - vlCreateVertexShaderFieldBMC(context); - vlCreateFragmentShaderFrameBMC(context); - vlCreateFragmentShaderFieldBMC(context); - vlCreateDataBufsMC(context); - - return 0; -} - -static int vlDestroyMC(struct VL_CONTEXT *context) -{ - unsigned int i; - - assert(context); - - for (i = 0; i < 5; ++i) - context->pipe->delete_sampler_state(context->pipe, context->states.mc.samplers[i]); - - for (i = 0; i < 3; ++i) - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vertex_bufs[i].buffer); - - /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < 3; ++i) - pipe_texture_release(&context->states.mc.textures[i]); - - context->pipe->delete_vs_state(context->pipe, context->states.mc.i_vs); - context->pipe->delete_fs_state(context->pipe, context->states.mc.i_fs); - - for (i = 0; i < 2; ++i) - { - context->pipe->delete_vs_state(context->pipe, context->states.mc.p_vs[i]); - context->pipe->delete_fs_state(context->pipe, context->states.mc.p_fs[i]); - context->pipe->delete_vs_state(context->pipe, context->states.mc.b_vs[i]); - context->pipe->delete_fs_state(context->pipe, context->states.mc.b_fs[i]); - } - - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.vs_const_buf.buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.mc.fs_const_buf.buffer); - - return 0; -} - -static int vlCreateVertexShaderCSC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale texcoord rect to source size - * decl c1 ; Translation vector to move texcoord rect into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Vertex texcoords - */ - for (i = 0; i < 2; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mov o0, i0 ; Move pos in to pos out */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o1, t0, c1 ; Translate texcoord rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - context->states.csc.vertex_shader = pipe->create_vs_state(pipe, &vs); - //free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderCSC(struct VL_CONTEXT *context) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(context); - - pipe = context->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* decl i0 ; Texcoords for s0 */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl c0 ; Bias vector for CSC - * decl c1-c4 ; CSC matrix c1-c4 - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl s0 ; Sampler for tex containing picture to display */ - decl = vl_decl_samplers(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* tex2d t0, i0, s0 ; Read src pixel */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t0, t0, c0 ; Subtract bias vector from pixel */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix - * dp4 o0.y, t0, c2 - * dp4 o0.z, t0, c3 - * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - context->states.csc.fragment_shader = pipe->create_fs_state(pipe, &fs); - //free(tokens); - - return 0; -} - -static int vlCreateDataBufsCSC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - - assert(context); - - pipe = context->pipe; - - /* - Create our vertex buffer and vertex buffer element - VB contains 4 vertices that render a quad covering the entire window - to display a rendered surface - Quad is rendered as a tri strip - */ - context->states.csc.vertex_bufs[0].pitch = sizeof(struct VL_VERTEX2F); - context->states.csc.vertex_bufs[0].max_index = 3; - context->states.csc.vertex_bufs[0].buffer_offset = 0; - context->states.csc.vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_VERTEX2F) * 4 - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_surface_vertex_positions, - sizeof(struct VL_VERTEX2F) * 4 - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[0].buffer); - - context->states.csc.vertex_buf_elems[0].src_offset = 0; - context->states.csc.vertex_buf_elems[0].vertex_buffer_index = 0; - context->states.csc.vertex_buf_elems[0].nr_components = 2; - context->states.csc.vertex_buf_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - Create our texcoord buffer and texcoord buffer element - Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices - */ - context->states.csc.vertex_bufs[1].pitch = sizeof(struct VL_TEXCOORD2F); - context->states.csc.vertex_bufs[1].max_index = 3; - context->states.csc.vertex_bufs[1].buffer_offset = 0; - context->states.csc.vertex_bufs[1].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct VL_TEXCOORD2F) * 4 - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - vl_surface_texcoords, - sizeof(struct VL_TEXCOORD2F) * 4 - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.vertex_bufs[1].buffer); - - context->states.csc.vertex_buf_elems[1].src_offset = 0; - context->states.csc.vertex_buf_elems[1].vertex_buffer_index = 1; - context->states.csc.vertex_buf_elems[1].nr_components = 2; - context->states.csc.vertex_buf_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* - Create our vertex shader's constant buffer - Const buffer contains scaling and translation vectors - */ - context->states.csc.vs_const_buf.size = sizeof(struct VL_CSC_VS_CONSTS); - context->states.csc.vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.csc.vs_const_buf.size - ); - - /* - Create our fragment shader's constant buffer - Const buffer contains the color conversion matrix and bias vectors - */ - context->states.csc.fs_const_buf.size = sizeof(struct VL_CSC_FS_CONSTS); - context->states.csc.fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - context->states.csc.fs_const_buf.size - ); - - /* - TODO: Refactor this into a seperate function, - allow changing the CSC matrix at runtime to switch between regular & full versions - */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, context->states.csc.fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &vl_csc_fs_consts_601, - sizeof(struct VL_CSC_FS_CONSTS) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, context->states.csc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitCSC(struct VL_CONTEXT *context) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - - assert(context); - - pipe = context->pipe; - - /* Delay creating the FB until vlPutSurface() so we know window size */ - context->states.csc.framebuffer.num_cbufs = 1; - context->states.csc.framebuffer.cbufs[0] = NULL; - context->states.csc.framebuffer.zsbuf = NULL; - - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - /*sampler.min_lod = ;*/ - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - context->states.csc.sampler = pipe->create_sampler_state(pipe, &sampler); - - vlCreateVertexShaderCSC(context); - vlCreateFragmentShaderCSC(context); - vlCreateDataBufsCSC(context); - - return 0; -} - -static int vlDestroyCSC(struct VL_CONTEXT *context) -{ - assert(context); - - /* - Since we create the final FB when we display our first surface, - it may not be created if vlPutSurface() is never called - */ - if (context->states.csc.framebuffer.cbufs[0]) - context->pipe->winsys->surface_release(context->pipe->winsys, &context->states.csc.framebuffer.cbufs[0]); - context->pipe->delete_sampler_state(context->pipe, context->states.csc.sampler); - context->pipe->delete_vs_state(context->pipe, context->states.csc.vertex_shader); - context->pipe->delete_fs_state(context->pipe, context->states.csc.fragment_shader); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[0].buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vertex_bufs[1].buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.vs_const_buf.buffer); - context->pipe->winsys->buffer_destroy(context->pipe->winsys, context->states.csc.fs_const_buf.buffer); - - return 0; -} - -static int vlInitCommon(struct VL_CONTEXT *context) +static int vlInitCommon(struct vlContext *context) { struct pipe_context *pipe; struct pipe_rasterizer_state rast; struct pipe_blend_state blend; struct pipe_depth_stencil_alpha_state dsa; unsigned int i; - + assert(context); - + pipe = context->pipe; - + rast.flatshade = 1; rast.flatshade_first = 0; rast.light_twoside = 0; @@ -2113,9 +51,9 @@ static int vlInitCommon(struct VL_CONTEXT *context) rast.offset_units = 1; rast.offset_scale = 1; /*rast.sprite_coord_mode[i] = ;*/ - context->states.common.raster = pipe->create_rasterizer_state(pipe, &rast); - pipe->bind_rasterizer_state(pipe, context->states.common.raster); - + context->raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->raster); + blend.blend_enable = 0; blend.rgb_func = PIPE_BLEND_ADD; blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; @@ -2128,9 +66,9 @@ static int vlInitCommon(struct VL_CONTEXT *context) /* Needed to allow color writes to FB, even if blending disabled */ blend.colormask = PIPE_MASK_RGBA; blend.dither = 0; - context->states.common.blend = pipe->create_blend_state(pipe, &blend); - pipe->bind_blend_state(pipe, context->states.common.blend); - + context->blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->blend); + dsa.depth.enabled = 0; dsa.depth.writemask = 0; dsa.depth.func = PIPE_FUNC_ALWAYS; @@ -2149,134 +87,122 @@ static int vlInitCommon(struct VL_CONTEXT *context) dsa.alpha.enabled = 0; dsa.alpha.func = PIPE_FUNC_ALWAYS; dsa.alpha.ref = 0; - context->states.common.dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); - pipe->bind_depth_stencil_alpha_state(pipe, context->states.common.dsa); - - return 0; -} + context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->dsa); -static int vlDestroyCommon(struct VL_CONTEXT *context) -{ - assert(context); - - context->pipe->delete_blend_state(context->pipe, context->states.common.blend); - context->pipe->delete_rasterizer_state(context->pipe, context->states.common.raster); - context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->states.common.dsa); - return 0; } -static int vlInit(struct VL_CONTEXT *context) +int vlCreateContext +( + struct vlScreen *screen, + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context +) { + struct vlContext *ctx; + + assert(screen); assert(context); - - vlInitCommon(context); - vlInitCSC(context); - vlInitMC(context); - vlInitIDCT(context); - + assert(pipe); + + ctx = calloc(1, sizeof(struct vlContext)); + + if (!ctx) + return 1; + + ctx->screen = screen; + ctx->pipe = pipe; + ctx->picture_width = picture_width; + ctx->picture_height = picture_height; + ctx->picture_format = picture_format; + ctx->profile = profile; + ctx->entry_point = entry_point; + + vlInitCommon(ctx); + + vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render); + vlCreateBasicCSC(pipe, &ctx->csc); + + *context = ctx; + return 0; } -static int vlDestroy(struct VL_CONTEXT *context) +int vlDestroyContext +( + struct vlContext *context +) { assert(context); - + /* XXX: Must unbind shaders before we can delete them for some reason */ context->pipe->bind_vs_state(context->pipe, NULL); context->pipe->bind_fs_state(context->pipe, NULL); - - vlDestroyCommon(context); - vlDestroyCSC(context); - vlDestroyMC(context); - vlDestroyIDCT(context); - + + context->render->vlDestroy(context->render); + context->csc->vlDestroy(context->csc); + + context->pipe->delete_blend_state(context->pipe, context->blend); + context->pipe->delete_rasterizer_state(context->pipe, context->raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); + + free(context); + return 0; } -int vlCreateContext +struct vlScreen* vlContextGetScreen ( - Display *display, - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum VL_FORMAT video_format, - struct VL_CONTEXT **context + struct vlContext *context ) { - struct VL_CONTEXT *ctx; - - assert(display); - assert(pipe); assert(context); - - ctx = calloc(1, sizeof(struct VL_CONTEXT)); - - ctx->display = display; - ctx->pipe = pipe; - ctx->video_width = video_width; - ctx->video_height = video_height; - ctx->video_format = video_format; - - vlInit(ctx); - - /* Since we only change states in vlPutSurface() we need to start in render mode */ - vlBeginRender(ctx); - - *context = ctx; - - return 0; + + return context->screen; } -int vlDestroyContext(struct VL_CONTEXT *context) +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +) { assert(context); - - vlDestroy(context); - - free(context); - - return 0; + + return context->pipe; } -int vlBeginRender(struct VL_CONTEXT *context) +unsigned int vlGetPictureWidth +( + struct vlContext *context +) { - struct pipe_context *pipe; - assert(context); - - pipe = context->pipe; - - /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures set in vlRender*Macroblock() */ - pipe->set_vertex_buffers(pipe, 3, context->states.mc.vertex_bufs); - pipe->set_vertex_elements(pipe, 3, context->states.mc.vertex_buf_elems); - pipe->set_viewport_state(pipe, &context->states.mc.viewport); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.mc.vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.mc.fs_const_buf); - - return 0; + + return context->picture_width; } -int vlEndRender(struct VL_CONTEXT *context) +unsigned int vlGetPictureHeight +( + struct vlContext *context +) { - struct pipe_context *pipe; - assert(context); - - pipe = context->pipe; - - pipe->set_framebuffer_state(pipe, &context->states.csc.framebuffer); - pipe->set_viewport_state(pipe, &context->states.csc.viewport); - pipe->bind_sampler_states(pipe, 1, (void**)&context->states.csc.sampler); - /* Source texture set in vlPutSurface() */ - pipe->bind_vs_state(pipe, context->states.csc.vertex_shader); - pipe->bind_fs_state(pipe, context->states.csc.fragment_shader); - pipe->set_vertex_buffers(pipe, 2, context->states.csc.vertex_bufs); - pipe->set_vertex_elements(pipe, 2, context->states.csc.vertex_buf_elems); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &context->states.csc.vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &context->states.csc.fs_const_buf); - - return 0; + + return context->picture_height; } +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +) +{ + assert(context); + + return context->picture_format; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h index bff318854a..3d14634c44 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.h +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -1,83 +1,73 @@ #ifndef vl_context_h #define vl_context_h -#include -#include #include "vl_types.h" struct pipe_context; -struct VL_CONTEXT +#ifdef VL_INTERNAL +struct vlRender; +struct vlCSC; + +struct vlContext { - Display *display; + struct vlScreen *screen; struct pipe_context *pipe; - unsigned int video_width; - unsigned int video_height; - enum VL_FORMAT video_format; - - struct - { - struct - { - struct pipe_rasterizer_state *raster; - struct pipe_depth_stencil_alpha_state *dsa; - struct pipe_blend_state *blend; - } common; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *sampler; - struct pipe_texture *texture; - struct pipe_texture *basis; - struct pipe_shader_state *frame_vs; - struct pipe_shader_state *frame_fs; - struct pipe_vertex_buffer *vertex_bufs[2]; - struct pipe_vertex_element *vertex_buf_elems[2]; - //struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } idct; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[5]; - struct pipe_shader_state *i_vs, *p_vs[2], *b_vs[2]; - struct pipe_shader_state *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[3]; - struct pipe_vertex_element vertex_buf_elems[3]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } mc; - - struct - { - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_state *sampler; - struct pipe_shader_state *vertex_shader, *fragment_shader; - struct pipe_vertex_buffer vertex_bufs[2]; - struct pipe_vertex_element vertex_buf_elems[2]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; - } csc; - } states; + unsigned int picture_width; + unsigned int picture_height; + enum vlFormat picture_format; + enum vlProfile profile; + enum vlEntryPoint entry_point; + + void *raster; + void *dsa; + void *blend; + + struct vlRender *render; + struct vlCSC *csc; }; +#endif int vlCreateContext ( - Display *display, + struct vlScreen *screen, struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum VL_FORMAT video_format, - struct VL_CONTEXT **context + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context ); -int vlDestroyContext(struct VL_CONTEXT *context); +int vlDestroyContext +( + struct vlContext *context +); -int vlBeginRender(struct VL_CONTEXT *context); -int vlEndRender(struct VL_CONTEXT *context); +struct vlScreen* vlContextGetScreen +( + struct vlContext *context +); -#endif +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +); +unsigned int vlGetPictureWidth +( + struct vlContext *context +); + +unsigned int vlGetPictureHeight +( + struct vlContext *context +); + +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_csc.h b/src/gallium/state_trackers/g3dvl/vl_csc.h new file mode 100644 index 0000000000..36417a2792 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_csc.h @@ -0,0 +1,53 @@ +#ifndef vl_csc_h +#define vl_csc_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlCSC +{ + int (*vlResizeFrameBuffer) + ( + struct vlCSC *csc, + unsigned int width, + unsigned int height + ); + + int (*vlBegin) + ( + struct vlCSC *csc + ); + + int (*vlPutPicture) + ( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type + ); + + int (*vlEnd) + ( + struct vlCSC *csc + ); + + struct pipe_surface* (*vlGetFrameBuffer) + ( + struct vlCSC *csc + ); + + int (*vlDestroy) + ( + struct vlCSC *csc + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c index 0e5c8c77f9..f2476dbf1e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ b/src/gallium/state_trackers/g3dvl/vl_data.c @@ -6,17 +6,17 @@ * Need to be scaled to cover mbW*mbH macroblock pixels and translated into * position on target surface. */ -const struct VL_VERTEX2F vl_mb_vertex_positions[24] = +const struct vlVertex2f macroblock_verts[24] = { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} }; @@ -26,17 +26,17 @@ const struct VL_VERTEX2F vl_mb_vertex_positions[24] = * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. */ -const struct VL_TEXCOORD2F vl_luma_texcoords[24] = +const struct vlVertex2f macroblock_luma_texcoords[24] = { {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} }; @@ -45,7 +45,7 @@ const struct VL_TEXCOORD2F vl_luma_texcoords[24] = * Represents texcoords for the above for rendering 1 chroma block. * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. */ -const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; +const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 2 chroma blocks arranged @@ -53,30 +53,13 @@ const struct VL_TEXCOORD2F *vl_chroma_420_texcoords = (const struct VL_TEXCOORD2 * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. */ -const struct VL_TEXCOORD2F *vl_chroma_422_texcoords = (const struct VL_TEXCOORD2F*)vl_mb_vertex_positions; +const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 4 chroma blocks. * Same case as 4 luma blocks. */ -const struct VL_TEXCOORD2F *vl_chroma_444_texcoords = vl_luma_texcoords; - -/* - * Represents 2 triangles in a strip in normalized coords. - * Used to render the surface onto the frame buffer. - */ -const struct VL_VERTEX2F vl_surface_vertex_positions[4] = -{ - {0.0f, 0.0f}, - {0.0f, 1.0f}, - {1.0f, 0.0f}, - {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above. We can use the position values directly. - */ -const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*)vl_surface_vertex_positions; +const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; /* * Used when rendering P and B macroblocks, multiplier is applied to the A channel, @@ -84,97 +67,10 @@ const struct VL_TEXCOORD2F *vl_surface_texcoords = (const struct VL_TEXCOORD2F*) * get back the differential. The differential is then added to the samples from the * reference surface(s). */ +#if 0 const struct VL_MC_FS_CONSTS vl_mc_fs_consts = { {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} }; - -/* - * Identity color conversion constants, for debugging - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity = -{ - { - 0.0f, 0.0f, 0.0f, 0.0f - }, - { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.371f, 0.0f, - 1.0f, -0.336f, -0.698f, 0.0f, - 1.0f, 1.732f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.596f, 0.0f, - 1.164f, -0.391f, -0.813f, 0.0f, - 1.164f, 2.018f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [16,235] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709 = -{ - { - 0.0f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.0f, 0.0f, 1.540f, 0.0f, - 1.0f, -0.183f, -0.459f, 0.0f, - 1.0f, 1.816f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - -/* - * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: - * Y is in [16,235], Cb and Cr are in [16,240] - * R, G, and B are in [0,255] - */ -const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full = -{ - { - 0.062745098f, 0.501960784f, 0.501960784f, 0.0f - }, - { - 1.164f, 0.0f, 1.793f, 0.0f, - 1.164f, -0.213f, -0.534f, 0.0f, - 1.164f, 2.115f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - } -}; - +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h index 8f347273ad..f0de2e976c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_data.h +++ b/src/gallium/state_trackers/g3dvl/vl_data.h @@ -3,15 +3,18 @@ #include "vl_types.h" -extern const struct VL_VERTEX2F vl_mb_vertex_positions[24]; -extern const struct VL_TEXCOORD2F vl_luma_texcoords[24]; -extern const struct VL_TEXCOORD2F *vl_chroma_420_texcoords; -extern const struct VL_TEXCOORD2F *vl_chroma_422_texcoords; -extern const struct VL_TEXCOORD2F *vl_chroma_444_texcoords; +/* TODO: Needs to be rolled into the appropriate stage */ -extern const struct VL_VERTEX2F vl_surface_vertex_positions[4]; -extern const struct VL_TEXCOORD2F *vl_surface_texcoords; +extern const struct vlVertex2f macroblock_verts[24]; +extern const struct vlVertex2f macroblock_luma_texcoords[24]; +extern const struct vlVertex2f *macroblock_chroma_420_texcoords; +extern const struct vlVertex2f *macroblock_chroma_422_texcoords; +extern const struct vlVertex2f *macroblock_chroma_444_texcoords; +extern const struct vlVertex2f surface_verts[4]; +extern const struct vlVertex2f *surface_texcoords; + +/* extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; @@ -19,6 +22,6 @@ extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; +*/ #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h index e668a7a10e..d612d02502 100644 --- a/src/gallium/state_trackers/g3dvl/vl_defs.h +++ b/src/gallium/state_trackers/g3dvl/vl_defs.h @@ -9,4 +9,3 @@ #define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT) #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c new file mode 100644 index 0000000000..af80faa7f5 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -0,0 +1,48 @@ +#define VL_INTERNAL +#include "vl_display.h" +#include +#include + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +) +{ + struct vlDisplay *dpy; + + assert(native_display); + assert(display); + + dpy = calloc(1, sizeof(struct vlDisplay)); + + if (!dpy) + return 1; + + dpy->native = native_display; + *display = dpy; + + return 0; +} + +int vlDestroyDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + free(display); + + return 0; +} + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + return display->native; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_display.h b/src/gallium/state_trackers/g3dvl/vl_display.h new file mode 100644 index 0000000000..e11fd40799 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.h @@ -0,0 +1,29 @@ +#ifndef vl_display_h +#define vl_display_h + +#include "vl_types.h" + +#ifdef VL_INTERNAL +struct vlDisplay +{ + vlNativeDisplay native; +}; +#endif + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +); + +int vlDestroyDisplay +( + struct vlDisplay *display +); + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c new file mode 100644 index 0000000000..4fae224431 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -0,0 +1,2315 @@ +#define VL_INTERNAL +#include "vl_r16snorm_mc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "vl_render.h" +#include "vl_shader_build.h" +#include "vl_surface.h" +#include "vl_util.h" +#include "vl_types.h" +#include "vl_defs.h" + +struct vlVertexShaderConsts +{ + /*struct vlVertex4f scale; + struct vlVertex4f denorm;*/ + struct vlVertex4f scale; + struct vlVertex4f mb_pos_trans; + struct vlVertex4f denorm; + struct + { + struct vlVertex4f top_field; + struct vlVertex4f bottom_field; + } mb_tc_trans[2]; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f multiplier; + struct vlVertex4f div; +}; + +struct vlR16SnormMC +{ + struct vlRender base; + + unsigned int video_width, video_height; + enum vlFormat video_format; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[5]; + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_buffer vertex_bufs[3]; + struct pipe_vertex_element vertex_elems[3]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +int vlBegin +( + struct vlRender *render +) +{ + struct vlR16SnormMC *mc; + struct pipe_context *pipe; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + pipe = mc->pipe; + + /* Frame buffer set in vlRender*Macroblock() */ + /* Shaders, samplers, textures set in vlRender*Macroblock() */ + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_viewport_state(pipe, &mc->viewport); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); + + return 0; +} + +/*static int vlGrabMacroBlock +( + struct vlR16SnormMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + assert(mc); + assert(macroblock); + + + + return 0; +}*/ + +/*#define DO_IDCT*/ + +#ifdef DO_IDCT +static int vlTransformBlock(short *src, short *dst, short bias) +{ + static const float basis[8][8] = + { + {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, + {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, + {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, + {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, + {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, + {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, + {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, + {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} + }; + + unsigned int x, y; + short tmp[64]; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + tmp[y * VL_BLOCK_WIDTH + x] = (short) + ( + src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + + src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + + src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + + src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + + src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + + src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + + src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + + src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] + ); + + for (x = 0; x < VL_BLOCK_WIDTH; ++x) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + { + dst[y * VL_BLOCK_WIDTH + x] = bias + (short) + ( + tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + + tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + + tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + + tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + + tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + + tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + + tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + + tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] + ); + if (dst[y * VL_BLOCK_WIDTH + x] > 255) + dst[y * VL_BLOCK_WIDTH + x] = 255; + else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) + dst[y * VL_BLOCK_WIDTH + x] = 0; + } + return 0; +} +#endif + +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +enum vlSampleType +{ + vlSampleTypeFull, + vlSampleTypeDiff +}; + +static int vlGrabBlocks +( + struct vlR16SnormMC *mc, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + enum vlSampleType sample_type, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int tex_pitch; + unsigned int tb, sb = 0; + + assert(mc); + assert(blocks); + + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + for (tb = 0; tb < 4; ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); +#endif + + if (dct_type == vlDCTTypeFrameCoded) + vlGrabFrameCodedBlock + ( + cur_block, + texels + tb * tex_pitch * VL_BLOCK_HEIGHT, + tex_pitch + ); + else + vlGrabFieldCodedBlock + ( + cur_block, + texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + for (tb = 0; tb < 2; ++tb) + { + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + +#ifdef DO_IDCT + vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); +#endif + + vlGrabFrameCodedBlock + ( + cur_block, + texels, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels, tex_pitch); + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +int vlRenderIMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(blocks); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + pipe->set_sampler_textures(pipe, 3, mc->textures); + pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->i_vs); + pipe->bind_fs_state(pipe, mc->i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderPMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + enum vlMotionType mc_type, + short top_x, + short top_y, + short bottom_x, + short bottom_y, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *ref_surface, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + + if (mc_type == vlMotionTypeField) + { + vs_consts->denorm.x = (float)surface->texture->width[0]; + vs_consts->denorm.y = (float)surface->texture->height[0]; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + + mc->textures[3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderBMacroBlock +( + struct vlR16SnormMC *mc, + enum vlPictureType picture_type, + enum vlFieldOrder field_order, + unsigned int mbx, + unsigned int mby, + enum vlMotionType mc_type, + short top_past_x, + short top_past_y, + short bottom_past_x, + short bottom_past_y, + short top_future_x, + short top_future_y, + short bottom_future_x, + short bottom_future_y, + unsigned int coded_block_pattern, + enum vlDCTType dct_type, + short *blocks, + struct vlSurface *past_surface, + struct vlSurface *future_surface, + struct vlSurface *surface +) +{ + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(motion_vectors); + assert(blocks); + assert(ref_surface); + assert(surface); + + /* TODO: Implement interlaced rendering */ + if (picture_type != vlPictureTypeFrame) + return 0; + /* TODO: Implement other MC types */ + if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) + return 0; + + vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); + + pipe = mc->pipe; + + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; + vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; + vs_consts->scale.z = 1.0f; + vs_consts->scale.w = 1.0f; + vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; + vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; + vs_consts->mb_pos_trans.z = 0.0f; + vs_consts->mb_pos_trans.w = 0.0f; + vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].top_field.z = 0.0f; + vs_consts->mb_tc_trans[0].top_field.w = 0.0f; + vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[1].top_field.z = 0.0f; + vs_consts->mb_tc_trans[1].top_field.w = 0.0f; + + if (mc_type == vlMotionTypeField) + { + vs_consts->denorm.x = (float)surface->texture->width[0]; + vs_consts->denorm.y = (float)surface->texture->height[0]; + + vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0]; + vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0]; + vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; + vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; + + pipe->bind_vs_state(pipe, mc->b_vs[1]); + pipe->bind_fs_state(pipe, mc->b_fs[1]); + } + else + { + pipe->bind_vs_state(pipe, mc->b_vs[0]); + pipe->bind_fs_state(pipe, mc->b_fs[0]); + } + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + pipe->set_framebuffer_state(pipe, &mc->render_target); + + mc->textures[3] = past_surface->texture; + mc->textures[4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + return 0; +} + +int vlRenderMacroBlocksMpeg2R16Snorm +( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + struct vlR16SnormMC *mc; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + + /*for (i = 0; i < batch->num_macroblocks; ++i) + vlGrabMacroBlock(batch->macroblocks[i]);*/ + + for (i = 0; i < batch->num_macroblocks; ++i) + { + switch (batch->macroblocks[i].mb_type) + { + case vlMacroBlockTypeIntra: + { + vlRenderIMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + surface + ); + break; + } + case vlMacroBlockTypeFwdPredicted: + { + vlRenderPMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][0][0], + batch->macroblocks[i].PMV[0][0][1], + batch->macroblocks[i].PMV[1][0][0], + batch->macroblocks[i].PMV[1][0][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->past_surface, + surface + ); + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + vlRenderPMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][1][0], + batch->macroblocks[i].PMV[0][1][1], + batch->macroblocks[i].PMV[1][1][0], + batch->macroblocks[i].PMV[1][1][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->future_surface, + surface + ); + break; + } + case vlMacroBlockTypeBiPredicted: + { + vlRenderBMacroBlock + ( + mc, + batch->picture_type, + batch->field_order, + batch->macroblocks[i].mbx, + batch->macroblocks[i].mby, + batch->macroblocks[i].mo_type, + batch->macroblocks[i].PMV[0][0][0], + batch->macroblocks[i].PMV[0][0][1], + batch->macroblocks[i].PMV[1][0][0], + batch->macroblocks[i].PMV[1][0][1], + batch->macroblocks[i].PMV[0][1][0], + batch->macroblocks[i].PMV[0][1][1], + batch->macroblocks[i].PMV[1][1][0], + batch->macroblocks[i].PMV[1][1][1], + batch->macroblocks[i].cbp, + batch->macroblocks[i].dct_type, + batch->macroblocks[i].blocks, + batch->past_surface, + batch->future_surface, + surface + ); + break; + } + default: + assert(0); + } + } + + return 0; +} + +int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +int vlDestroy +( + struct vlRender *render +) +{ + struct vlR16SnormMC *mc; + struct pipe_context *pipe; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormMC*)render; + pipe = mc->pipe; + + for (i = 0; i < 5; ++i) + pipe->delete_sampler_state(pipe, mc->samplers[i]); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_release(&mc->textures[i]); + + pipe->delete_vs_state(pipe, mc->i_vs); + pipe->delete_fs_state(pipe, mc->i_fs); + + for (i = 0; i < 2; ++i) + { + pipe->delete_vs_state(pipe, mc->p_vs[i]); + pipe->delete_fs_state(pipe, mc->p_fs[i]); + pipe->delete_vs_state(pipe, mc->b_vs[i]); + pipe->delete_fs_state(pipe, mc->b_fs[i]); + } + + pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + + free(mc); + + return 0; +} + +/* + * Represents 8 triangles (4 quads, 1 per block) in noormalized coords + * that render a macroblock. + * Need to be scaled to cover mbW*mbH macroblock pixels and translated into + * position on target surface. + */ +const struct vlVertex2f macroblock_verts[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, + {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, + + {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, + {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, + + {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 4 luma blocks arranged + * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. + */ +const struct vlVertex2f macroblock_luma_texcoords[24] = +{ + {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, + {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, + + {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, + {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, + + {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, + {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, + + {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, + {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above for rendering 1 chroma block. + * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. + */ +const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; + +/* + * Represents texcoords for the above for rendering 2 chroma blocks arranged + * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at + * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. + * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. + */ +const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; + +/* + * Represents texcoords for the above for rendering 4 chroma blocks. + * Same case as 4 luma blocks. + */ +const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; + +/* + * Used when rendering P and B macroblocks, multiplier is applied to the A channel, + * which is then added to the L channel, then the bias is subtracted from that to + * get back the differential. The differential is then added to the samples from the + * reference surface(s). + */ +const struct vlFragmentShaderConsts fs_consts = +{ + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +static int vlCreateVertexShaderIMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration + ( + &decl, + &tokens[ti], + header, + max_tokens - ti + ); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field ref macroblock texcoords + * decl o4 ; Bottom field ref macroblock texcoords + * decl o5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + mov o1, i1 ; Move input luma texcoords to output + mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock + add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o5, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Unused + * decl c3 ; Translation vector to move past ref macroblock texcoords into position + * decl c4 ; Unused + * decl c5 ; Translation vector to move future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Past ref macroblock texcoords + * decl o4 ; Future ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o3, t0, c3 ; Translate rect into position on past ref macroblock + add o4, t0, c5 ; Translate rect into position on future ref macroblock */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale unit rect to macroblock size + * decl c1 ; Translation vector to move macroblock into position + * decl c2 ; Denorm coefficients + * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position + * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position + * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position + * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma texcoords + * decl o3 ; Top field past ref macroblock texcoords + * decl o4 ; Bottom field past ref macroblock texcoords + * decl o5 ; Top field future ref macroblock texcoords + * decl o6 ; Bottom field future ref macroblock texcoords + * decl o7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add t1, t0, c1 ; Translate rect into position */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mov o0, t1 ; Move vertex pos to output */ + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma texcoords to output + */ + for (i = 1; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock + * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock + * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock + * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o7, t1, c2 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock + * tex2d t2, i3, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0 + * decl i1 ; Texcoords for s1, s2 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s3 + * decl i4 ; Texcoords for s4 + * decl i5 ; Texcoords for s4 + * decl i6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i1, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +int vlCreateDataBufs +( + struct vlR16SnormMC *mc +) +{ + struct pipe_context *pipe; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* Create our vertex buffer and vertex buffer element */ + mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[0].max_index = 23; + mc->vertex_bufs[0].buffer_offset = 0; + mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 + ); + + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our texcoord buffers and texcoord buffer elements */ + for (i = 1; i < 3; ++i) + { + mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[i].max_index = 23; + mc->vertex_bufs[i].buffer_offset = 0; + mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 + ); + + mc->vertex_elems[i].src_offset = 0; + mc->vertex_elems[i].vertex_buffer_index = i; + mc->vertex_elems[i].nr_components = 2; + mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; + } + + /* Fill buffers */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_verts, + sizeof(struct vlVertex2f) * 24 + ); + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_luma_texcoords, + sizeof(struct vlVertex2f) * 24 + ); + /* TODO: Accomodate 422, 444 */ + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + macroblock_chroma_420_texcoords, + sizeof(struct vlVertex2f) * 24 + ); + + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer); + + /* Create our constant buffer */ + mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + mc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->vs_const_buf.size + ); + + mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + mc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlR16SnormMC *mc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); + mc->viewport.scale[2] = 1; + mc->viewport.scale[3] = 1; + mc->viewport.translate[0] = 0; + mc->viewport.translate[1] = 0; + mc->viewport.translate[2] = 0; + mc->viewport.translate[3] = 0; + + mc->render_target.width = vlRoundUpPOT(mc->video_width); + mc->render_target.height = vlRoundUpPOT(mc->video_height); + mc->render_target.num_cbufs = 1; + /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */ + mc->render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = 8; + template.height[0] = 8 * 4; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + mc->textures[0] = pipe->screen->texture_create(pipe->screen, &template); + + if (mc->video_format == vlFormatYCbCr420) + template.height[0] = 8; + else if (mc->video_format == vlFormatYCbCr422) + template.height[0] = 8 * 2; + else if (mc->video_format == vlFormatYCbCr444) + template.height[0] = 8 * 4; + else + assert(0); + + mc->textures[1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[2] = pipe->screen->texture_create(pipe->screen, &template); + + /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + + vlCreateVertexShaderIMB(mc); + vlCreateFragmentShaderIMB(mc); + vlCreateVertexShaderFramePMB(mc); + vlCreateVertexShaderFieldPMB(mc); + vlCreateFragmentShaderFramePMB(mc); + vlCreateFragmentShaderFieldPMB(mc); + vlCreateVertexShaderFrameBMB(mc); + vlCreateVertexShaderFieldBMB(mc); + vlCreateFragmentShaderFrameBMB(mc); + vlCreateFragmentShaderFieldBMB(mc); + vlCreateDataBufs(mc); + + return 0; +} + +int vlCreateR16SNormMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +) +{ + struct vlR16SnormMC *mc; + + assert(pipe); + assert(render); + + mc = calloc(1, sizeof(struct vlR16SnormMC)); + + mc->base.vlBegin = &vlBegin; + mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; + mc->base.vlEnd = &vlEnd; + mc->base.vlDestroy = &vlDestroy; + mc->pipe = pipe; + mc->video_width = video_width; + mc->video_height = video_height; + + vlInit(mc); + + *render = &mc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h new file mode 100644 index 0000000000..a6eecf05b6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h @@ -0,0 +1,18 @@ +#ifndef vl_mc_h +#define vl_mc_h + +#include "vl_types.h" + +struct pipe_context; +struct vlRender; + +int vlCreateR16SNormMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h new file mode 100644 index 0000000000..63016b5cbe --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_render.h @@ -0,0 +1,33 @@ +#ifndef vl_render_h +#define vl_render_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlRender +{ + int (*vlBegin) + ( + struct vlRender *render + ); + + int (*vlRenderMacroBlocksMpeg2) + ( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface + ); + + int (*vlEnd) + ( + struct vlRender *render + ); + + int (*vlDestroy) + ( + struct vlRender *render + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c new file mode 100644 index 0000000000..484f63b0d4 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -0,0 +1,115 @@ +#define VL_INTERNAL +#include "vl_screen.h" +#include +#include + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +) +{ + struct vlScreen *scrn; + + assert(display); + assert(pscreen); + assert(vl_screen); + + scrn = calloc(1, sizeof(struct vlScreen)); + + if (!scrn) + return 1; + + scrn->display = display; + scrn->ordinal = screen; + scrn->pscreen = pscreen; + *vl_screen = scrn; + + return 0; +} + +int vlDestroyScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + free(screen); + + return 0; +} + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->display; +} + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->pscreen; +} + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlProfileCount; +} + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +) +{ + assert(screen); + assert(profiles); + + profiles[0] = vlProfileMpeg2Simple; + profiles[1] = vlProfileMpeg2Main; + + return 0; +} + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlEntryPointCount; +} + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +) +{ + assert(screen); + assert(entry_points); + + entry_points[0] = vlEntryPointIDCT; + entry_points[1] = vlEntryPointMC; + entry_points[2] = vlEntryPointCSC; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.h b/src/gallium/state_trackers/g3dvl/vl_screen.h new file mode 100644 index 0000000000..98f3d429b6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.h @@ -0,0 +1,63 @@ +#ifndef vl_screen_h +#define vl_screen_h + +#include "vl_types.h" + +struct pipe_screen; + +#ifdef VL_INTERNAL +struct vlScreen +{ + struct vlDisplay *display; + unsigned int ordinal; + struct pipe_screen *pscreen; +}; +#endif + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +); + +int vlDestroyScreen +( + struct vlScreen *screen +); + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +); + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +); + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +); + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +); + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +); + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c index 5f30e23ff8..51f1721a33 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.c +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -13,7 +13,7 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -27,7 +27,7 @@ struct tgsi_full_declaration vl_decl_interpolated_input ) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - + assert ( interpolation == TGSI_INTERPOLATE_CONSTANT || @@ -42,21 +42,21 @@ struct tgsi_full_declaration vl_decl_interpolated_input decl.Declaration.Interpolate = interpolation;; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) { struct tgsi_full_declaration decl = tgsi_default_full_declaration(); - + decl.Declaration.File = TGSI_FILE_CONSTANT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = name; decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -70,7 +70,7 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde decl.Semantic.SemanticIndex = index; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -82,7 +82,7 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last decl.Declaration.File = TGSI_FILE_TEMPORARY; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -94,7 +94,7 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l decl.Declaration.File = TGSI_FILE_SAMPLER; decl.DeclarationRange.First = first; decl.DeclarationRange.Last = last; - + return decl; } @@ -108,7 +108,7 @@ struct tgsi_full_instruction vl_inst2 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -116,7 +116,7 @@ struct tgsi_full_instruction vl_inst2 inst.Instruction.NumSrcRegs = 1; inst.FullSrcRegisters[0].SrcRegister.File = src_file; inst.FullSrcRegisters[0].SrcRegister.Index = src_index; - + return inst; } @@ -132,7 +132,7 @@ struct tgsi_full_instruction vl_inst3 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -142,7 +142,7 @@ struct tgsi_full_instruction vl_inst3 inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; inst.FullSrcRegisters[1].SrcRegister.File = src2_file; inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - + return inst; } @@ -158,7 +158,7 @@ struct tgsi_full_instruction vl_tex ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = TGSI_OPCODE_TEX; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -169,7 +169,7 @@ struct tgsi_full_instruction vl_tex inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; inst.FullSrcRegisters[1].SrcRegister.File = src2_file; inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; - + return inst; } @@ -187,7 +187,7 @@ struct tgsi_full_instruction vl_inst4 ) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = opcode; inst.Instruction.NumDstRegs = 1; inst.FullDstRegisters[0].DstRegister.File = dst_file; @@ -199,18 +199,17 @@ struct tgsi_full_instruction vl_inst4 inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; inst.FullSrcRegisters[2].SrcRegister.File = src3_file; inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; - + return inst; } struct tgsi_full_instruction vl_end(void) { struct tgsi_full_instruction inst = tgsi_default_full_instruction(); - + inst.Instruction.Opcode = TGSI_OPCODE_END; inst.Instruction.NumDstRegs = 0; inst.Instruction.NumSrcRegs = 0; - + return inst; } - diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h index 878d7e2c45..dc615cb156 100644 --- a/src/gallium/state_trackers/g3dvl/vl_shader_build.h +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -59,4 +59,3 @@ struct tgsi_full_instruction vl_inst4 struct tgsi_full_instruction vl_end(void); #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 1386b1107c..ffc8122172 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -1,628 +1,177 @@ +#define VL_INTERNAL #include "vl_surface.h" #include #include -#include +#include +#include #include -#include #include #include +#include "vl_screen.h" #include "vl_context.h" -#include "vl_defs.h" +#include "vl_render.h" +#include "vl_csc.h" #include "vl_util.h" -/*#define DO_IDCT*/ - -#ifdef DO_IDCT -static int vlTransformBlock(short *src, short *dst, short bias) +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +) { - static const float basis[8][8] = - { - {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, - {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, - {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, - {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, - {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, - {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, - {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, - {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} - }; - - unsigned int x, y; - short tmp[64]; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - tmp[y * VL_BLOCK_WIDTH + x] = (short) - ( - src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + - src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + - src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + - src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + - src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + - src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + - src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + - src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] - ); - - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - { - dst[y * VL_BLOCK_WIDTH + x] = bias + (short) - ( - tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + - tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + - tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + - tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + - tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + - tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + - tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + - tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] - ); - if (dst[y * VL_BLOCK_WIDTH + x] > 255) - dst[y * VL_BLOCK_WIDTH + x] = 255; - else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) - dst[y * VL_BLOCK_WIDTH + x] = 0; - } - return 0; -} -#endif + struct vlSurface *sfc; + struct pipe_texture template; -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + assert(screen); + assert(surface); -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + sfc = calloc(1, sizeof(struct vlSurface)); -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memset - ( - dst + y * dst_pitch, - 0, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} + if (!sfc) + return 1; -static int vlGrabBlocks -( - struct VL_CONTEXT *context, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - enum VL_SAMPLE_TYPE sample_type, - short *blocks -) -{ - struct pipe_surface *tex_surface; - short *texels; - unsigned int tex_pitch; - unsigned int tb, sb = 0; - - assert(context); - assert(blocks); - - tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - for (tb = 0; tb < 4; ++tb) - { - if ((coded_block_pattern >> (5 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); -#endif - - if (dct_type == VL_DCT_FRAME_CODED) - vlGrabFrameCodedBlock - ( - cur_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - vlGrabFieldCodedBlock - ( - cur_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); - } - - pipe_surface_unmap(tex_surface); - - /* TODO: Implement 422, 444 */ - for (tb = 0; tb < 2; ++tb) - { - tex_surface = context->pipe->screen->get_tex_surface - ( - context->pipe->screen, - context->states.mc.textures[tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - if ((coded_block_pattern >> (1 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == VL_FULL_SAMPLE ? 128 : 0); -#endif - - vlGrabFrameCodedBlock - ( - cur_block, - texels, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels, tex_pitch); - - pipe_surface_unmap(tex_surface); - } - - return 0; -} + sfc->screen = screen; + sfc->width = width; + sfc->height = height; + sfc->format = format; -int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface) -{ - struct pipe_context *pipe; - struct pipe_texture template; - struct VL_SURFACE *sfc; - - assert(context); - assert(surface); - - pipe = context->pipe; - - sfc = calloc(1, sizeof(struct VL_SURFACE)); - - sfc->context = context; - sfc->width = vlRoundUpPOT(context->video_width); - sfc->height = vlRoundUpPOT(context->video_height); - sfc->format = context->video_format; - memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_A8R8G8B8_UNORM; template.last_level = 0; - template.width[0] = sfc->width; - template.height[0] = sfc->height; + template.width[0] = vlRoundUpPOT(sfc->width); + template.height[0] = vlRoundUpPOT(sfc->height); template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); - /* XXX: Needed? */ template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; - - sfc->texture = pipe->screen->texture_create(pipe->screen, &template); - + + sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template); + *surface = sfc; - + return 0; } -int vlDestroySurface(struct VL_SURFACE *surface) +int vlDestroySurface +( + struct vlSurface *surface +) { assert(surface); + pipe_texture_release(&surface->texture); free(surface); - + return 0; } -int vlRenderIMacroBlock +int vlRenderMacroBlocksMpeg2 ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *surface + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface ) { - struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(blocks); + assert(batch); assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_FULL_SAMPLE, blocks); - - pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + + surface->context->render->vlBegin(surface->context->render); + + surface->context->render->vlRenderMacroBlocksMpeg2 ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + surface->context->render, + batch, + surface ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - pipe->set_sampler_textures(pipe, 3, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 3, (void**)surface->context->states.mc.samplers); - pipe->bind_vs_state(pipe, surface->context->states.mc.i_vs); - pipe->bind_fs_state(pipe, surface->context->states.mc.i_fs); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + + surface->context->render->vlEnd(surface->context->render); + return 0; } -int vlRenderPMacroBlock +int vlPutPicture ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *ref_surface, - struct VL_SURFACE *surface + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type ) { + struct vlCSC *csc; struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); + assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - + assert(surface->context); + + csc = surface->context->csc; pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map + + csc->vlResizeFrameBuffer(csc, destw, desth); + + csc->vlBegin(csc); + + csc->vlPutPicture ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + csc, + surface, + srcx, + srcy, + srcw, + srch, + destx, + desty, + destw, + desth, + picture_type ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - - if (mc_type == VL_FIELD_MC) - { - vs_consts->denorm.x = (float)surface->width; - vs_consts->denorm.y = (float)surface->height; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector->bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector->bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[1]); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, surface->context->states.mc.p_vs[0]); - pipe->bind_fs_state(pipe, surface->context->states.mc.p_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface + + csc->vlEnd(csc); + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + bind_pipe_drawable(pipe, drawable); + /* TODO: Need to take destx, desty into consideration */ + pipe->winsys->flush_frontbuffer ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + pipe->winsys, + csc->vlGetFrameBuffer(csc), + pipe->priv ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - - surface->context->states.mc.textures[3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 4, (void**)surface->context->states.mc.samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + return 0; } -int vlRenderBMacroBlock +struct vlScreen* vlSurfaceGetScreen ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *past_surface, - struct VL_SURFACE *future_surface, - struct VL_SURFACE *surface + struct vlSurface *surface ) { - struct pipe_context *pipe; - struct VL_MC_VS_CONSTS *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != VL_FRAME_PICTURE) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != VL_FRAME_MC && mc_type != VL_FIELD_MC) - return 0; - - vlGrabBlocks(surface->context, coded_block_pattern, dct_type, VL_DIFFERENCE_SAMPLE, blocks); - - pipe = surface->context->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.mc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->width; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->height; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->width; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->height; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].top_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].top_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[1].top_field.z = 0.0f; - vs_consts->mb_tc_trans[1].top_field.w = 0.0f; - - if (mc_type == VL_FIELD_MC) - { - vs_consts->denorm.x = (float)surface->width; - vs_consts->denorm.y = (float)surface->height; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[0].bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[0].bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + motion_vector[1].bottom_field.x * 0.5f) / (float)surface->width; - vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + motion_vector[1].bottom_field.y * 0.5f) / (float)surface->height; - vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[1]); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, surface->context->states.mc.b_vs[0]); - pipe->bind_fs_state(pipe, surface->context->states.mc.b_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.mc.vs_const_buf.buffer); - - surface->context->states.mc.render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &surface->context->states.mc.render_target); - - surface->context->states.mc.textures[3] = past_surface->texture; - surface->context->states.mc.textures[4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, surface->context->states.mc.textures); - pipe->bind_sampler_states(pipe, 5, (void**)surface->context->states.mc.samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - return 0; + + return surface->screen; } -int vlPutSurface +struct vlContext* vlBindToContext ( - struct VL_SURFACE *surface, - Drawable drawable, - unsigned int srcx, - unsigned int srcy, - unsigned int srcw, - unsigned int srch, - unsigned int destx, - unsigned int desty, - unsigned int destw, - unsigned int desth, - enum VL_PICTURE picture_type + struct vlSurface *surface, + struct vlContext *context ) { - unsigned int create_fb = 0; - struct pipe_context *pipe; - struct VL_CSC_VS_CONSTS *vs_consts; - + struct vlContext *old; + assert(surface); - - pipe = surface->context->pipe; - - if (!surface->context->states.csc.framebuffer.cbufs[0]) - create_fb = 1; - else if - ( - surface->context->states.csc.framebuffer.width != destw || - surface->context->states.csc.framebuffer.height != desth - ) - { - pipe->winsys->surface_release - ( - pipe->winsys, - &surface->context->states.csc.framebuffer.cbufs[0] - ); - - create_fb = 1; - } - - if (create_fb) - { - surface->context->states.csc.viewport.scale[0] = destw; - surface->context->states.csc.viewport.scale[1] = desth; - surface->context->states.csc.viewport.scale[2] = 1; - surface->context->states.csc.viewport.scale[3] = 1; - surface->context->states.csc.viewport.translate[0] = 0; - surface->context->states.csc.viewport.translate[1] = 0; - surface->context->states.csc.viewport.translate[2] = 0; - surface->context->states.csc.viewport.translate[3] = 0; - - surface->context->states.csc.framebuffer.width = destw; - surface->context->states.csc.framebuffer.height = desth; - surface->context->states.csc.framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); - pipe->winsys->surface_alloc_storage - ( - pipe->winsys, - surface->context->states.csc.framebuffer.cbufs[0], - destw, - desth, - PIPE_FORMAT_A8R8G8B8_UNORM, - /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ - PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, - 0 - ); - } - - vlEndRender(surface->context); - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - surface->context->states.csc.vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->src_scale.x = srcw / (float)surface->width; - vs_consts->src_scale.y = srch / (float)surface->height; - vs_consts->src_scale.z = 1; - vs_consts->src_scale.w = 1; - vs_consts->src_trans.x = srcx / (float)surface->width; - vs_consts->src_trans.y = srcy / (float)surface->height; - vs_consts->src_trans.z = 0; - vs_consts->src_trans.w = 0; - - pipe->winsys->buffer_unmap(pipe->winsys, surface->context->states.csc.vs_const_buf.buffer); - - pipe->set_sampler_textures(pipe, 1, &surface->texture); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - bind_pipe_drawable(pipe, drawable); - /* TODO: Need to take destx, desty into consideration */ - pipe->winsys->flush_frontbuffer - ( - pipe->winsys, - surface->context->states.csc.framebuffer.cbufs[0], - pipe->priv - ); - - vlBeginRender(surface->context); - - return 0; -} + old = surface->context; + surface->context = context; + + return old; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h index 9f56b77e1e..b975e131fa 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.h +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -1,81 +1,66 @@ #ifndef vl_surface_h #define vl_surface_h -#include #include "vl_types.h" +#ifdef VL_INTERNAL struct pipe_texture; -struct VL_SURFACE +struct vlSurface { - struct VL_CONTEXT *context; + struct vlScreen *screen; + struct vlContext *context; unsigned int width; unsigned int height; - enum VL_FORMAT format; + enum vlFormat format; struct pipe_texture *texture; }; +#endif -int vlCreateSurface(struct VL_CONTEXT *context, struct VL_SURFACE **surface); +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +); -int vlDestroySurface(struct VL_SURFACE *surface); +int vlDestroySurface +( + struct vlSurface *surface +); -int vlRenderIMacroBlock +int vlRenderMacroBlocksMpeg2 ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *surface + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface ); -int vlRenderPMacroBlock +int vlPutPicture ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *ref_surface, - struct VL_SURFACE *surface + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type ); -int vlRenderBMacroBlock +struct vlScreen* vlSurfaceGetScreen ( - enum VL_PICTURE picture_type, - enum VL_FIELD_ORDER field_order, - unsigned int mbx, - unsigned int mby, - enum VL_MC_TYPE mc_type, - struct VL_MOTION_VECTOR *motion_vector, - unsigned int coded_block_pattern, - enum VL_DCT_TYPE dct_type, - short *blocks, - struct VL_SURFACE *past_surface, - struct VL_SURFACE *future_surface, - struct VL_SURFACE *surface + struct vlSurface *surface ); -int vlPutSurface +struct vlContext* vlBindToContext ( - struct VL_SURFACE *surface, - Drawable drawable, - unsigned int srcx, - unsigned int srcy, - unsigned int srcw, - unsigned int srch, - unsigned int destx, - unsigned int desty, - unsigned int destw, - unsigned int desth, - enum VL_PICTURE picture_type + struct vlSurface *surface, + struct vlContext *context ); #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 4d210c9e0a..504ba8ac81 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -1,102 +1,106 @@ #ifndef vl_types_h #define vl_types_h -enum VL_FORMAT -{ - VL_FORMAT_YCBCR_420, - VL_FORMAT_YCBCR_422, - VL_FORMAT_YCBCR_444 -}; +#if 1 /*#ifdef X11*/ +#include -enum VL_PICTURE -{ - VL_TOP_FIELD, - VL_BOTTOM_FIELD, - VL_FRAME_PICTURE -}; +typedef Display* vlNativeDisplay; +typedef Drawable vlNativeDrawable; +#endif + +struct vlDisplay; +struct vlScreen; +struct vlContext; +struct vlSurface; -enum VL_FIELD_ORDER +enum vlProfile { - VL_FIELD_FIRST, - VL_FIELD_SECOND + vlProfileMpeg2Simple, + vlProfileMpeg2Main, + + vlProfileCount }; -enum VL_DCT_TYPE +enum vlEntryPoint { - VL_DCT_FIELD_CODED, - VL_DCT_FRAME_CODED + vlEntryPointIDCT, + vlEntryPointMC, + vlEntryPointCSC, + + vlEntryPointCount }; -enum VL_SAMPLE_TYPE +enum vlFormat { - VL_FULL_SAMPLE, - VL_DIFFERENCE_SAMPLE + vlFormatYCbCr420, + vlFormatYCbCr422, + vlFormatYCbCr444 }; -enum VL_MC_TYPE +enum vlPictureType { - VL_FIELD_MC, - VL_FRAME_MC, - VL_DUAL_PRIME_MC, - VL_16x8_MC = VL_FRAME_MC + vlPictureTypeTopField, + vlPictureTypeBottomField, + vlPictureTypeFrame }; -struct VL_VERTEX4F +enum vlMotionType { - float x, y, z, w; + vlMotionTypeField, + vlMotionTypeFrame, + vlMotionTypeDualPrime, + vlMotionType16x8 }; -struct VL_VERTEX2F +enum vlFieldOrder { - float x, y; + vlFieldOrderFirst, + vlFieldOrderSecond }; -struct VL_TEXCOORD2F +enum vlDCTType { - float s, t; + vlDCTTypeFrameCoded, + vlDCTTypeFieldCoded }; -struct VL_MC_VS_CONSTS +struct vlVertex2f { - struct VL_VERTEX4F scale; - struct VL_VERTEX4F mb_pos_trans; - struct VL_VERTEX4F denorm; - struct - { - struct VL_VERTEX4F top_field; - struct VL_VERTEX4F bottom_field; - } mb_tc_trans[2]; + float x, y; }; -struct VL_MC_FS_CONSTS +struct vlVertex4f { - struct VL_VERTEX4F multiplier; - struct VL_VERTEX4F bias; - struct VL_VERTEX4F y_divider; + float x, y, z, w; }; -struct VL_CSC_VS_CONSTS +enum vlMacroBlockType { - struct VL_VERTEX4F src_scale; - struct VL_VERTEX4F src_trans; + vlMacroBlockTypeIntra, + vlMacroBlockTypeFwdPredicted, + vlMacroBlockTypeBkwdPredicted, + vlMacroBlockTypeBiPredicted }; -struct VL_CSC_FS_CONSTS +struct vlMpeg2MacroBlock { - struct VL_VERTEX4F bias; - float matrix[16]; + unsigned int mbx, mby; + enum vlMacroBlockType mb_type; + enum vlMotionType mo_type; + enum vlDCTType dct_type; + int PMV[2][2][2]; + unsigned int cbp; + short *blocks; }; -struct VL_MOTION_VECTOR +struct vlMpeg2MacroBlockBatch { - struct - { - int x, y; - } top_field, bottom_field; + struct vlSurface *past_surface; + struct vlSurface *future_surface; + enum vlPictureType picture_type; + enum vlFieldOrder field_order; + unsigned int num_macroblocks; + struct vlMpeg2MacroBlock *macroblocks; }; -struct VL_CONTEXT; -struct VL_SURFACE; - #endif - diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c index 2421ae2210..50aa9af66f 100644 --- a/src/gallium/state_trackers/g3dvl/vl_util.c +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -4,14 +4,13 @@ unsigned int vlRoundUpPOT(unsigned int x) { unsigned int i; - + assert(x > 0); - + --x; - + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) x |= x >> i; - + return x + 1; } - diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h index e4b72c4f87..bc98e79df4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_util.h +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -4,4 +4,3 @@ unsigned int vlRoundUpPOT(unsigned int x); #endif - -- cgit v1.2.3 From 7f100d04ddacf9f6517c9aff1e2de5257eb77fb0 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 18 Aug 2008 00:04:29 -0400 Subject: g3dvl: Use rotating buffers to avoid waiting for map(). --- src/gallium/state_trackers/g3dvl/Makefile | 4 +- src/gallium/state_trackers/g3dvl/vl_data.c | 76 ----------------------- src/gallium/state_trackers/g3dvl/vl_data.h | 27 -------- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 46 +++++++++----- 4 files changed, 34 insertions(+), 119 deletions(-) delete mode 100644 src/gallium/state_trackers/g3dvl/vl_data.c delete mode 100644 src/gallium/state_trackers/g3dvl/vl_data.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 9995c554ab..bd77c62bc5 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,9 +1,9 @@ TARGET = libg3dvl.a -OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_data.o vl_shader_build.o vl_util.o vl_basic_csc.o \ +OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ vl_r16snorm_mc.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -Werror -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl +CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_data.c b/src/gallium/state_trackers/g3dvl/vl_data.c deleted file mode 100644 index f2476dbf1e..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_data.c +++ /dev/null @@ -1,76 +0,0 @@ -#include "vl_data.h" - -/* - * Represents 8 triangles (4 quads, 1 per block) in noormalized coords - * that render a macroblock. - * Need to be scaled to cover mbW*mbH macroblock pixels and translated into - * position on target surface. - */ -const struct vlVertex2f macroblock_verts[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, - {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - - {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, - {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - - {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 4 luma blocks arranged - * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. - */ -const struct vlVertex2f macroblock_luma_texcoords[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - - {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, - {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - - {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, - {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 1 chroma block. - * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. - */ -const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 2 chroma blocks arranged - * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - */ -const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 4 chroma blocks. - * Same case as 4 luma blocks. - */ -const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; - -/* - * Used when rendering P and B macroblocks, multiplier is applied to the A channel, - * which is then added to the L channel, then the bias is subtracted from that to - * get back the differential. The differential is then added to the samples from the - * reference surface(s). - */ -#if 0 -const struct VL_MC_FS_CONSTS vl_mc_fs_consts = -{ - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; -#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_data.h b/src/gallium/state_trackers/g3dvl/vl_data.h deleted file mode 100644 index f0de2e976c..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_data.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef vl_data_h -#define vl_data_h - -#include "vl_types.h" - -/* TODO: Needs to be rolled into the appropriate stage */ - -extern const struct vlVertex2f macroblock_verts[24]; -extern const struct vlVertex2f macroblock_luma_texcoords[24]; -extern const struct vlVertex2f *macroblock_chroma_420_texcoords; -extern const struct vlVertex2f *macroblock_chroma_422_texcoords; -extern const struct vlVertex2f *macroblock_chroma_444_texcoords; - -extern const struct vlVertex2f surface_verts[4]; -extern const struct vlVertex2f *surface_texcoords; - -/* -extern const struct VL_MC_FS_CONSTS vl_mc_fs_consts; - -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_identity; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_601_full; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709; -extern const struct VL_CSC_FS_CONSTS vl_csc_fs_consts_709_full; -*/ - -#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c index 4fae224431..80b09a6d1d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -15,6 +15,8 @@ #include "vl_types.h" #include "vl_defs.h" +#define NUM_BUFS 4 /* Number of rotating buffers to use */ + struct vlVertexShaderConsts { /*struct vlVertex4f scale; @@ -41,12 +43,13 @@ struct vlR16SnormMC unsigned int video_width, video_height; enum vlFormat video_format; + unsigned int cur_buf; struct pipe_context *pipe; struct pipe_viewport_state viewport; struct pipe_framebuffer_state render_target; struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[5]; + struct pipe_texture *textures[NUM_BUFS][5]; void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[3]; @@ -230,7 +233,7 @@ static int vlGrabBlocks tex_surface = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[0], + mc->textures[mc->cur_buf % NUM_BUFS][0], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -276,7 +279,7 @@ static int vlGrabBlocks tex_surface = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[tb + 1], + mc->textures[mc->cur_buf % NUM_BUFS][tb + 1], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -361,12 +364,14 @@ int vlRenderIMacroBlock 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); pipe->set_framebuffer_state(pipe, &mc->render_target); - pipe->set_sampler_textures(pipe, 3, mc->textures); + pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); pipe->bind_vs_state(pipe, mc->i_vs); pipe->bind_fs_state(pipe, mc->i_fs); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -458,11 +463,13 @@ int vlRenderPMacroBlock ); pipe->set_framebuffer_state(pipe, &mc->render_target); - mc->textures[3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures); + mc->textures[mc->cur_buf % NUM_BUFS][3] = ref_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -567,12 +574,14 @@ int vlRenderBMacroBlock ); pipe->set_framebuffer_state(pipe, &mc->render_target); - mc->textures[3] = past_surface->texture; - mc->textures[4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures); + mc->textures[mc->cur_buf % NUM_BUFS][3] = past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUFS][4] = future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUFS]); pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); + + mc->cur_buf++; return 0; } @@ -724,8 +733,12 @@ int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < 3; ++i) - pipe_texture_release(&mc->textures[i]); + for (i = 0; i < NUM_BUFS; ++i) + { + pipe_texture_release(&mc->textures[i][0]); + pipe_texture_release(&mc->textures[i][1]); + pipe_texture_release(&mc->textures[i][2]); + } pipe->delete_vs_state(pipe, mc->i_vs); pipe->delete_fs_state(pipe, mc->i_fs); @@ -2252,7 +2265,8 @@ static int vlInit template.compressed = 0; pf_get_block(template.format, &template.block); - mc->textures[0] = pipe->screen->texture_create(pipe->screen, &template); + for (i = 0; i < NUM_BUFS; ++i) + mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); if (mc->video_format == vlFormatYCbCr420) template.height[0] = 8; @@ -2263,8 +2277,11 @@ static int vlInit else assert(0); - mc->textures[1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[2] = pipe->screen->texture_create(pipe->screen, &template); + for (i = 0; i < NUM_BUFS; ++i) + { + mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); + } /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ @@ -2306,6 +2323,7 @@ int vlCreateR16SNormMC mc->pipe = pipe; mc->video_width = video_width; mc->video_height = video_height; + mc->cur_buf = 0; vlInit(mc); -- cgit v1.2.3 From 4d9d192672508eaa9b2a70f84e933f11108bf09f Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 28 Aug 2008 23:25:13 -0400 Subject: g3dvl: Buffer the entire frame before rendering. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 4 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 45 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 4 +- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2341 ++++++++++++++++++++ .../state_trackers/g3dvl/vl_r16snorm_mc_buf.h | 18 + src/gallium/state_trackers/g3dvl/vl_render.h | 5 + src/gallium/state_trackers/g3dvl/vl_surface.c | 2 + 8 files changed, 2400 insertions(+), 21 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index bd77c62bc5..4f7a953484 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,6 +1,6 @@ TARGET = libg3dvl.a OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ - vl_r16snorm_mc.o + vl_r16snorm_mc.o vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 56d360c05b..fe107e406d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -6,6 +6,7 @@ #include #include "vl_render.h" #include "vl_r16snorm_mc.h" +#include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" #include "vl_basic_csc.h" @@ -126,7 +127,8 @@ int vlCreateContext vlInitCommon(ctx); - vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render); + /*vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);*/ + vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); vlCreateBasicCSC(pipe, &ctx->csc); *context = ctx; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c index 80b09a6d1d..3272220ef8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c @@ -57,7 +57,7 @@ struct vlR16SnormMC struct pipe_constant_buffer vs_const_buf, fs_const_buf; }; -int vlBegin +static int vlBegin ( struct vlRender *render ) @@ -312,7 +312,7 @@ static int vlGrabBlocks return 0; } -int vlRenderIMacroBlock +static int vlRenderIMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -370,13 +370,13 @@ int vlRenderIMacroBlock pipe->bind_fs_state(pipe, mc->i_fs); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderPMacroBlock +static int vlRenderPMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -468,13 +468,13 @@ int vlRenderPMacroBlock pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderBMacroBlock +static int vlRenderBMacroBlock ( struct vlR16SnormMC *mc, enum vlPictureType picture_type, @@ -580,13 +580,13 @@ int vlRenderBMacroBlock pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - + mc->cur_buf++; return 0; } -int vlRenderMacroBlocksMpeg2R16Snorm +static int vlRenderMacroBlocksMpeg2R16Snorm ( struct vlRender *render, struct vlMpeg2MacroBlockBatch *batch, @@ -702,7 +702,17 @@ int vlRenderMacroBlocksMpeg2R16Snorm return 0; } -int vlEnd +static int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlFlush ( struct vlRender *render ) @@ -712,7 +722,7 @@ int vlEnd return 0; } -int vlDestroy +static int vlDestroy ( struct vlRender *render ) @@ -765,7 +775,7 @@ int vlDestroy * Need to be scaled to cover mbW*mbH macroblock pixels and translated into * position on target surface. */ -const struct vlVertex2f macroblock_verts[24] = +static const struct vlVertex2f macroblock_verts[24] = { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, @@ -785,7 +795,7 @@ const struct vlVertex2f macroblock_verts[24] = * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. */ -const struct vlVertex2f macroblock_luma_texcoords[24] = +static const struct vlVertex2f macroblock_luma_texcoords[24] = { {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, @@ -804,7 +814,7 @@ const struct vlVertex2f macroblock_luma_texcoords[24] = * Represents texcoords for the above for rendering 1 chroma block. * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. */ -const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; +static const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 2 chroma blocks arranged @@ -812,13 +822,13 @@ const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. */ -const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; +static const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; /* * Represents texcoords for the above for rendering 4 chroma blocks. * Same case as 4 luma blocks. */ -const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; +static const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; /* * Used when rendering P and B macroblocks, multiplier is applied to the A channel, @@ -826,7 +836,7 @@ const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texco * get back the differential. The differential is then added to the samples from the * reference surface(s). */ -const struct vlFragmentShaderConsts fs_consts = +static const struct vlFragmentShaderConsts fs_consts = { {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, {0.5f, 2.0f, 0.0f, 0.0f} @@ -2093,7 +2103,7 @@ static int vlCreateFragmentShaderFieldBMB return 0; } -int vlCreateDataBufs +static int vlCreateDataBufs ( struct vlR16SnormMC *mc ) @@ -2319,6 +2329,7 @@ int vlCreateR16SNormMC mc->base.vlBegin = &vlBegin; mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; mc->base.vlEnd = &vlEnd; + mc->base.vlFlush = &vlFlush; mc->base.vlDestroy = &vlDestroy; mc->pipe = pipe; mc->video_width = video_width; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h index a6eecf05b6..9842926bf7 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h @@ -1,5 +1,5 @@ -#ifndef vl_mc_h -#define vl_mc_h +#ifndef vl_r16snorm_mc_h +#define vl_r16snorm_mc_h #include "vl_types.h" diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c new file mode 100644 index 0000000000..fc383cb8f6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -0,0 +1,2341 @@ +#define VL_INTERNAL +#include "vl_r16snorm_mc_buf.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vl_render.h" +#include "vl_shader_build.h" +#include "vl_surface.h" +#include "vl_util.h" +#include "vl_types.h" +#include "vl_defs.h" + +#define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */ + +enum vlMacroBlockTypeEx +{ + vlMacroBlockExTypeIntra, + vlMacroBlockExTypeFwdPredictedFrame, + vlMacroBlockExTypeFwdPredictedField, + vlMacroBlockExTypeBkwdPredictedFrame, + vlMacroBlockExTypeBkwdPredictedField, + vlMacroBlockExTypeBiPredictedFrame, + vlMacroBlockExTypeBiPredictedField, + + vlNumMacroBlockExTypes +}; + +struct vlVertexShaderConsts +{ + struct vlVertex4f denorm; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f multiplier; + struct vlVertex4f div; +}; + +struct vlR16SnormBufferedMC +{ + struct vlRender base; + + unsigned int video_width, video_height; + enum vlFormat video_format; + + unsigned int cur_buf; + struct vlSurface *buffered_surface; + struct vlSurface *past_surface, *future_surface; + struct vlVertex2f surface_tex_inv_size; + unsigned int num_macroblocks[vlNumMacroBlockExTypes]; + unsigned int total_num_macroblocks; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + struct pipe_sampler_state *samplers[5]; + struct pipe_texture *textures[NUM_BUF_SETS][5]; + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3]; + struct pipe_vertex_element vertex_elems[5]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +static int vlBegin +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + dst += VL_BLOCK_HEIGHT * dst_pitch; + + for (; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static int vlGrabBlocks +( + struct vlR16SnormBufferedMC *mc, + unsigned int mbx, + unsigned int mby, + enum vlDCTType dct_type, + unsigned int coded_block_pattern, + short *blocks +) +{ + struct pipe_surface *tex_surface; + short *texels; + unsigned int tex_pitch; + unsigned int x, y, tb = 0, sb = 0; + unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT; + + assert(mc); + assert(blocks); + + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][0], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + texels += mbpy * tex_pitch + mbpx; + + for (y = 0; y < 2; ++y) + { + for (x = 0; x < 2; ++x, ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + if (dct_type == vlDCTTypeFrameCoded) + { + vlGrabFrameCodedBlock + ( + cur_block, + texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + else + { + vlGrabFieldCodedBlock + ( + cur_block, + texels + y * tex_pitch + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + + ++sb; + } + else + vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch); + } + } + + pipe_surface_unmap(tex_surface); + + /* TODO: Implement 422, 444 */ + mbpx >>= 1; + mbpy >>= 1; + + for (tb = 0; tb < 2; ++tb) + { + tex_surface = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); + tex_pitch = tex_surface->stride / tex_surface->block.size; + + texels += mbpy * tex_pitch + mbpx; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + vlGrabFrameCodedBlock + ( + cur_block, + texels, + tex_pitch + ); + + ++sb; + } + else + vlGrabNoBlock(texels, tex_pitch); + + pipe_surface_unmap(tex_surface); + } + + return 0; +} + +#if 0 +static int vlGrabMacroBlock +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlVertex2f *vb; + enum vlMacroBlockTypeEx mb_type_ex; + struct vlVertex2f mo_vec[2]; + unsigned int i; + + assert(mc); + assert(macroblock); + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeIntra: + { + mb_type_ex = vlMacroBlockExTypeIntra; + break; + } + case vlMacroBlockTypeFwdPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + break; + } + case vlMacroBlockTypeBiPredicted: + { + mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + break; + } + default: + assert(0); + } + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeBiPredicted: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer); + + /* fall-through */ + } + case vlMacroBlockTypeFwdPredicted: + case vlMacroBlockTypeBkwdPredicted: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + + if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) + { + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer); + + /* fall-through */ + } + case vlMacroBlockTypeIntra: + { + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_type_ex] * 24; + + vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; + vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; + vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; + + vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; + vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; + vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; + + vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; + vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; + vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; + + vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; + vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; + vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; + + vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; + vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; + vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; + + vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; + vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; + vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; + + vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; + vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; + vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; + + vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; + vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; + vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer); + + break; + } + default: + assert(0); + } + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_type_ex]++; + mc->total_num_macroblocks++; + + return 0; +} +#else +static int vlGrabMacroBlock +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlVertex2f *vb; + unsigned int mb_buf_id; + struct vlVertex2f mo_vec[2]; + unsigned int i; + + assert(mc); + assert(macroblock); + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeIntra: + { + mb_buf_id = vlMacroBlockExTypeIntra; + break; + } + case vlMacroBlockTypeFwdPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + break; + } + case vlMacroBlockTypeBkwdPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + break; + } + case vlMacroBlockTypeBiPredicted: + { + mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + break; + } + default: + assert(0); + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 24; + + vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; + vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; + vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; + + vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; + vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; + vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; + + vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; + vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; + vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; + + vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; + vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; + vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; + + vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; + vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; + vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; + + vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; + vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; + vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; + + vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; + vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; + vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; + + vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; + vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; + vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer); + + if (macroblock->mb_type == vlMacroBlockTypeIntra) + { + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + return 0; + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; + + if (macroblock->mb_type == vlMacroBlockTypeFwdPredicted || macroblock->mb_type == vlMacroBlockTypeBiPredicted) + { + mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer); + + if (macroblock->mb_type != vlMacroBlockTypeBiPredicted) + { + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + return 0; + } + + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; + + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer); + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks[mb_buf_id]++; + mc->total_num_macroblocks++; + + return 0; +} +#endif + +static int vlFlush +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(mc); + + mc = (struct vlR16SnormBufferedMC*)render; + pipe = mc->pipe; + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + mc->buffered_surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + pipe->set_framebuffer_state(pipe, &mc->render_target); + pipe->set_viewport_state(pipe, &mc->viewport); + vs_consts = pipe->winsys->buffer_map + ( + pipe->winsys, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + vs_consts->denorm.x = mc->buffered_surface->texture->width[0]; + vs_consts->denorm.y = mc->buffered_surface->texture->height[0]; + + pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); + + if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0) + { + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]); + pipe->set_vertex_elements(pipe, 1, mc->vertex_elems); + pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->i_vs); + pipe->bind_fs_state(pipe, mc->i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]); + pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]); + pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->b_vs[0]); + pipe->bind_fs_state(pipe, mc->b_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24); + } + + if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]); + pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; + mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); + pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + pipe->bind_vs_state(pipe, mc->b_vs[1]); + pipe->bind_fs_state(pipe, mc->b_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); + } + + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); + mc->total_num_macroblocks = 0; + + return 0; +} + +static int vlRenderMacroBlocksMpeg2R16SnormBuffered +( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + struct vlR16SnormBufferedMC *mc; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + + if (mc->buffered_surface) + { + if + ( + mc->buffered_surface != surface /*|| + mc->past_surface != batch->past_surface || + mc->future_surface != batch->future_surface*/ + ) + { + vlFlush(&mc->base); + mc->buffered_surface = surface; + mc->past_surface = batch->past_surface; + mc->future_surface = batch->future_surface; + mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; + mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + } + } + else + { + mc->buffered_surface = surface; + mc->past_surface = batch->past_surface; + mc->future_surface = batch->future_surface; + mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; + mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + } + + for (i = 0; i < batch->num_macroblocks; ++i) + vlGrabMacroBlock(mc, &batch->macroblocks[i]); + + return 0; +} + +static int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlDestroy +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + unsigned int g, h, i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + pipe = mc->pipe; + + for (i = 0; i < 5; ++i) + pipe->delete_sampler_state(pipe, mc->samplers[i]); + + for (g = 0; g < NUM_BUF_SETS; ++g) + for (h = 0; h < 7; ++h) + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < NUM_BUF_SETS; ++i) + { + pipe_texture_release(&mc->textures[i][0]); + pipe_texture_release(&mc->textures[i][1]); + pipe_texture_release(&mc->textures[i][2]); + } + + pipe->delete_vs_state(pipe, mc->i_vs); + pipe->delete_fs_state(pipe, mc->i_fs); + + for (i = 0; i < 2; ++i) + { + pipe->delete_vs_state(pipe, mc->p_vs[i]); + pipe->delete_fs_state(pipe, mc->p_fs[i]); + pipe->delete_vs_state(pipe, mc->b_vs[i]); + pipe->delete_fs_state(pipe, mc->b_fs[i]); + } + + pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); + pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + + free(mc); + + return 0; +} + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct vlFragmentShaderConsts fs_consts = +{ + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +static int vlCreateVertexShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma & chroma texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Luma/chroma texcoords */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; Ref surface top field texcoords + * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Ref macroblock texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; Ref surface top field texcoords + * decl i2 ; Ref surface bottom field texcoords + */ + for (i = 0; i < 3; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Texcoord denorm coefficients */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Top field ref macroblock texcoords + * decl o3 ; Bottom field ref macroblock texcoords + * decl o4 ; Denormalized vertex pos + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 3; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o4, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + */ + for (i = 0; i < 2; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i1, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s3 + * decl i3 ; Denormalized vertex pos + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from ref macroblock top field + * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, luma/chroma texcoords + * decl i1 ; First ref surface top field texcoords + * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream) + * decl i3 ; Second ref surface top field texcoords + * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; First ref macroblock texcoords + * decl o3 ; Second ref macroblock texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos, Luma/chroma texcoords + * decl i1 ; First ref surface top field texcoords + * decl i2 ; First ref surface bottom field texcoords + * decl i3 ; Second ref surface top field texcoords + * decl i4 ; Second ref surface bottom field texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Denorm coefficients */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma/chroma texcoords + * decl o2 ; Top field past ref macroblock texcoords + * decl o3 ; Bottom field past ref macroblock texcoords + * decl o4 ; Top field future ref macroblock texcoords + * decl o5 ; Bottom field future ref macroblock texcoords + * decl o6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; i++) + { + decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o2, i1 ; Move past top field texcoords to output + * mov o3, i2 ; Move past bottom field texcoords to output + * mov o4, i3 ; Move future top field texcoords to output + * mov o5, i4 ; Move future bottom field texcoords to output + */ + for (i = 0; i < 6; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o6, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s4 + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from past ref macroblock + * tex2d t2, i2, s4 ; Read texel from future ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Texcoords for s0, s1, s2 + * decl i1 ; Texcoords for s3 + * decl i2 ; Texcoords for s3 + * decl i3 ; Texcoords for s4 + * decl i4 ; Texcoords for s4 + * decl i5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for past ref surface texture + * decl s4 ; Sampler for future ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateDataBufs +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; + const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; + const unsigned int num_mb_per_frame = mbw * mbh; + + struct pipe_context *pipe; + unsigned int g, h, i; + + assert(mc); + + pipe = mc->pipe; + + for (g = 0; g < NUM_BUF_SETS; ++g) + { + for (h = 0; h < 7; ++h) + { + /* Create our vertex buffer and vertex buffer element */ + mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[g][h][0].buffer_offset = 0; + mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 * num_mb_per_frame + ); + } + } + + /* Position & block luma, block chroma texcoord element */ + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + for (g = 0; g < NUM_BUF_SETS; ++g) + { + for (h = 0; h < 7; ++h) + { + for (i = 1; i < 3; ++i) + { + mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[g][h][i].buffer_offset = 0; + mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame + ); + } + } + } + + /* First ref surface top field texcoord element */ + mc->vertex_elems[1].src_offset = 0; + mc->vertex_elems[1].vertex_buffer_index = 1; + mc->vertex_elems[1].nr_components = 2; + mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[2].vertex_buffer_index = 1; + mc->vertex_elems[2].nr_components = 2; + mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + mc->vertex_elems[3].src_offset = 0; + mc->vertex_elems[3].vertex_buffer_index = 2; + mc->vertex_elems[3].nr_components = 2; + mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[4].vertex_buffer_index = 2; + mc->vertex_elems[4].nr_components = 2; + mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our constant buffer */ + mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); + mc->vs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->vs_const_buf.size + ); + + mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); + mc->fs_const_buf.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + mc->fs_const_buf.size + ); + + memcpy + ( + pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlR16SnormBufferedMC *mc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + /* For MC we render to textures, which are rounded up to nearest POT */ + mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); + mc->viewport.scale[2] = 1; + mc->viewport.scale[3] = 1; + mc->viewport.translate[0] = 0; + mc->viewport.translate[1] = 0; + mc->viewport.translate[2] = 0; + mc->viewport.translate[3] = 0; + + mc->render_target.width = vlRoundUpPOT(mc->video_width); + mc->render_target.height = vlRoundUpPOT(mc->video_height); + mc->render_target.num_cbufs = 1; + /* FB for MC stage is a vlSurface created by the user, set at render time */ + mc->render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = vlRoundUpPOT(mc->video_width); + template.height[0] = vlRoundUpPOT(mc->video_height); + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + + for (i = 0; i < NUM_BUF_SETS; ++i) + mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); + + if (mc->video_format == vlFormatYCbCr420) + { + template.width[0] = vlRoundUpPOT(mc->video_width / 2); + template.height[0] = vlRoundUpPOT(mc->video_height / 2); + } + else if (mc->video_format == vlFormatYCbCr422) + template.height[0] = vlRoundUpPOT(mc->video_height / 2); + + for (i = 0; i < NUM_BUF_SETS; ++i) + { + mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); + } + + /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + + vlCreateVertexShaderIMB(mc); + vlCreateFragmentShaderIMB(mc); + vlCreateVertexShaderFramePMB(mc); + vlCreateVertexShaderFieldPMB(mc); + vlCreateFragmentShaderFramePMB(mc); + vlCreateFragmentShaderFieldPMB(mc); + vlCreateVertexShaderFrameBMB(mc); + vlCreateVertexShaderFieldBMB(mc); + vlCreateFragmentShaderFrameBMB(mc); + vlCreateFragmentShaderFieldBMB(mc); + vlCreateDataBufs(mc); + + return 0; +} + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +) +{ + struct vlR16SnormBufferedMC *mc; + + assert(pipe); + assert(render); + + mc = calloc(1, sizeof(struct vlR16SnormBufferedMC)); + + mc->base.vlBegin = &vlBegin; + mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; + mc->base.vlEnd = &vlEnd; + mc->base.vlFlush = &vlFlush; + mc->base.vlDestroy = &vlDestroy; + mc->pipe = pipe; + mc->video_width = video_width; + mc->video_height = video_height; + + mc->cur_buf = 0; + mc->buffered_surface = NULL; + mc->past_surface = NULL; + mc->future_surface = NULL; + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); + mc->total_num_macroblocks = 0; + + vlInit(mc); + + *render = &mc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h new file mode 100644 index 0000000000..30f67db3e7 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h @@ -0,0 +1,18 @@ +#ifndef vl_r16snorm_mc_buf_h +#define vl_r16snorm_mc_buf_h + +#include "vl_types.h" + +struct pipe_context; +struct vlRender; + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int video_width, + unsigned int video_height, + enum vlFormat video_format, + struct vlRender **render +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h index 63016b5cbe..166030b498 100644 --- a/src/gallium/state_trackers/g3dvl/vl_render.h +++ b/src/gallium/state_trackers/g3dvl/vl_render.h @@ -24,6 +24,11 @@ struct vlRender struct vlRender *render ); + int (*vlFlush) + ( + struct vlRender *render + ); + int (*vlDestroy) ( struct vlRender *render diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index ffc8122172..687fd1ec29 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -113,6 +113,8 @@ int vlPutPicture assert(surface); assert(surface->context); + surface->context->render->vlFlush(surface->context->render); + csc = surface->context->csc; pipe = surface->context->pipe; -- cgit v1.2.3 From e959c23a31c11f0bcc5775e6e0eb48c5c3d70cf3 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 29 Aug 2008 00:22:09 -0400 Subject: g3dvl: Re-enable buffer rotation, disable high quality 420->444 conversion. Using linear interpolation when upscaling the chroma blocks causes some discoloration around the edges. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 238 +-------------------- 1 file changed, 4 insertions(+), 234 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index fc383cb8f6..2d9558587c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -241,7 +241,6 @@ static int vlGrabBlocks return 0; } -#if 0 static int vlGrabMacroBlock ( struct vlR16SnormBufferedMC *mc, @@ -456,237 +455,6 @@ static int vlGrabMacroBlock return 0; } -#else -static int vlGrabMacroBlock -( - struct vlR16SnormBufferedMC *mc, - struct vlMpeg2MacroBlock *macroblock -) -{ - const struct vlVertex2f unit = - { - mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, - mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT - }; - const struct vlVertex2f half = - { - mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), - mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) - }; - - struct vlVertex2f *vb; - unsigned int mb_buf_id; - struct vlVertex2f mo_vec[2]; - unsigned int i; - - assert(mc); - assert(macroblock); - - switch (macroblock->mb_type) - { - case vlMacroBlockTypeIntra: - { - mb_buf_id = vlMacroBlockExTypeIntra; - break; - } - case vlMacroBlockTypeFwdPredicted: - { - mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; - break; - } - case vlMacroBlockTypeBkwdPredicted: - { - mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; - break; - } - case vlMacroBlockTypeBiPredicted: - { - mb_buf_id = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; - break; - } - default: - assert(0); - } - - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_buf_id] * 24; - - vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; - vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; - vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; - - vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; - vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; - vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; - - vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; - vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; - vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; - - vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; - vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; - vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; - - vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; - vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; - vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; - - vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; - vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; - vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; - - vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; - vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; - vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; - - vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; - vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; - vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; - - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][0].buffer); - - if (macroblock->mb_type == vlMacroBlockTypeIntra) - { - vlGrabBlocks - ( - mc, - macroblock->mbx, - macroblock->mby, - macroblock->dct_type, - macroblock->cbp, - macroblock->blocks - ); - - mc->num_macroblocks[mb_buf_id]++; - mc->total_num_macroblocks++; - return 0; - } - - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; - - if (macroblock->mb_type == vlMacroBlockTypeFwdPredicted || macroblock->mb_type == vlMacroBlockTypeBiPredicted) - { - mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; - - if (macroblock->mo_type == vlMotionTypeField) - { - mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; - } - } - else - { - mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; - - if (macroblock->mo_type == vlMotionTypeField) - { - mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; - } - } - - if (macroblock->mo_type == vlMotionTypeFrame) - { - for (i = 0; i < 24 * 2; i += 2) - { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - } - } - else - { - for (i = 0; i < 24 * 2; i += 2) - { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - vb[i + 1].x = mo_vec[1].x; - vb[i + 1].y = mo_vec[1].y; - } - } - - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][1].buffer); - - if (macroblock->mb_type != vlMacroBlockTypeBiPredicted) - { - vlGrabBlocks - ( - mc, - macroblock->mbx, - macroblock->mby, - macroblock->dct_type, - macroblock->cbp, - macroblock->blocks - ); - - mc->num_macroblocks[mb_buf_id]++; - mc->total_num_macroblocks++; - return 0; - } - - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_buf_id] * 2 * 24; - - mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; - - if (macroblock->mo_type == vlMotionTypeFrame) - { - for (i = 0; i < 24 * 2; i += 2) - { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - } - } - else - { - mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; - mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; - - for (i = 0; i < 24 * 2; i += 2) - { - vb[i].x = mo_vec[0].x; - vb[i].y = mo_vec[0].y; - vb[i + 1].x = mo_vec[1].x; - vb[i + 1].y = mo_vec[1].y; - } - } - - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_buf_id][2].buffer); - - vlGrabBlocks - ( - mc, - macroblock->mbx, - macroblock->mby, - macroblock->dct_type, - macroblock->cbp, - macroblock->blocks - ); - - mc->num_macroblocks[mb_buf_id]++; - mc->total_num_macroblocks++; - - return 0; -} -#endif static int vlFlush ( @@ -818,6 +586,7 @@ static int vlFlush memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); mc->total_num_macroblocks = 0; + mc->cur_buf++; return 0; } @@ -2231,8 +2000,9 @@ static int vlInit mc->render_target.zsbuf = NULL; filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; + /* FIXME: Linear causes discoloration around block edges */ + filters[1] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; + filters[2] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; filters[3] = PIPE_TEX_FILTER_LINEAR; filters[4] = PIPE_TEX_FILTER_LINEAR; -- cgit v1.2.3 From 29f876cc90605ad7de1141443d3b242395eed5ee Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 30 Aug 2008 13:26:06 -0400 Subject: g3dvl: Inline hint for relatively small, frequently called functions. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 2d9558587c..08aed4542e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -79,7 +79,7 @@ static int vlBegin return 0; } -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -94,7 +94,7 @@ static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) return 0; } -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) { unsigned int y; @@ -119,7 +119,7 @@ static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) return 0; } -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch) { unsigned int y; @@ -130,11 +130,11 @@ static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) 0, VL_BLOCK_WIDTH * 2 ); - + return 0; } -static int vlGrabBlocks +static inline int vlGrabBlocks ( struct vlR16SnormBufferedMC *mc, unsigned int mbx, @@ -164,7 +164,7 @@ static int vlGrabBlocks tex_pitch = tex_surface->stride / tex_surface->block.size; texels += mbpy * tex_pitch + mbpx; - + for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) @@ -241,7 +241,7 @@ static int vlGrabBlocks return 0; } -static int vlGrabMacroBlock +static inline int vlGrabMacroBlock ( struct vlR16SnormBufferedMC *mc, struct vlMpeg2MacroBlock *macroblock -- cgit v1.2.3 From 72bcb69459d336fe0c2cf9da57a9b98a933299ca Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 31 Aug 2008 01:01:51 -0400 Subject: g3dvl: Some clean ups. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 27 +++++++++++++--------- src/gallium/state_trackers/g3dvl/vl_types.h | 4 +++- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 08aed4542e..900635228f 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -17,6 +17,15 @@ #include "vl_types.h" #include "vl_defs.h" +/* + * TODO: Dynamically determine number of buf sets to use, based on + * video size and available mem, since we can easily run out of memory + * for high res videos. + * Note: Destroying previous frame's buffers and creating new ones + * doesn't work, since the buffer are not actually destroyed until their + * fence is signalled, and if we render fast enough we will create faster + * than we destroy. + */ #define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */ enum vlMacroBlockTypeEx @@ -55,7 +64,6 @@ struct vlR16SnormBufferedMC struct vlSurface *past_surface, *future_surface; struct vlVertex2f surface_tex_inv_size; unsigned int num_macroblocks[vlNumMacroBlockExTypes]; - unsigned int total_num_macroblocks; struct pipe_context *pipe; struct pipe_viewport_state viewport; @@ -130,7 +138,7 @@ static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch) 0, VL_BLOCK_WIDTH * 2 ); - + return 0; } @@ -164,7 +172,7 @@ static inline int vlGrabBlocks tex_pitch = tex_surface->stride / tex_surface->block.size; texels += mbpy * tex_pitch + mbpx; - + for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) @@ -451,7 +459,6 @@ static inline int vlGrabMacroBlock ); mc->num_macroblocks[mb_type_ex]++; - mc->total_num_macroblocks++; return 0; } @@ -584,8 +591,7 @@ static int vlFlush pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); } - memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); - mc->total_num_macroblocks = 0; + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes); mc->cur_buf++; return 0; @@ -665,7 +671,7 @@ static int vlDestroy pipe->delete_sampler_state(pipe, mc->samplers[i]); for (g = 0; g < NUM_BUF_SETS; ++g) - for (h = 0; h < 7; ++h) + for (h = 0; h < vlNumMacroBlockExTypes; ++h) for (i = 0; i < 3; ++i) pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer); @@ -1871,7 +1877,7 @@ static int vlCreateDataBufs for (g = 0; g < NUM_BUF_SETS; ++g) { - for (h = 0; h < 7; ++h) + for (h = 0; h < vlNumMacroBlockExTypes; ++h) { /* Create our vertex buffer and vertex buffer element */ mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f); @@ -1895,7 +1901,7 @@ static int vlCreateDataBufs for (g = 0; g < NUM_BUF_SETS; ++g) { - for (h = 0; h < 7; ++h) + for (h = 0; h < vlNumMacroBlockExTypes; ++h) { for (i = 1; i < 3; ++i) { @@ -2100,8 +2106,7 @@ int vlCreateR16SNormBufferedMC mc->buffered_surface = NULL; mc->past_surface = NULL; mc->future_surface = NULL; - memset(mc->num_macroblocks, 0, sizeof(unsigned int) * 7); - mc->total_num_macroblocks = 0; + memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes); vlInit(mc); diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index 504ba8ac81..b432bfde93 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -79,7 +79,9 @@ enum vlMacroBlockType vlMacroBlockTypeIntra, vlMacroBlockTypeFwdPredicted, vlMacroBlockTypeBkwdPredicted, - vlMacroBlockTypeBiPredicted + vlMacroBlockTypeBiPredicted, + + vlNumMacroBlockTypes }; struct vlMpeg2MacroBlock -- cgit v1.2.3 From ff1a5066513fc75fb0fbbe7fe8a3f1ff27fbf6d3 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 1 Sep 2008 20:13:50 -0400 Subject: g3dvl: Use one VB for all MBs, sort MBs at flush to determine placement. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 278 ++++++++++++--------- 1 file changed, 166 insertions(+), 112 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 900635228f..b3deab4656 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -63,7 +63,8 @@ struct vlR16SnormBufferedMC struct vlSurface *buffered_surface; struct vlSurface *past_surface, *future_surface; struct vlVertex2f surface_tex_inv_size; - unsigned int num_macroblocks[vlNumMacroBlockExTypes]; + unsigned int num_macroblocks; + struct vlMpeg2MacroBlock *macroblocks; struct pipe_context *pipe; struct pipe_viewport_state viewport; @@ -72,7 +73,7 @@ struct vlR16SnormBufferedMC struct pipe_texture *textures[NUM_BUF_SETS][5]; void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][vlNumMacroBlockExTypes][3]; + struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; struct pipe_vertex_element vertex_elems[5]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; }; @@ -249,11 +250,77 @@ static inline int vlGrabBlocks return 0; } +static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb) +{ + assert(mb); + + switch (mb->mb_type) + { + case vlMacroBlockTypeIntra: + return vlMacroBlockExTypeIntra; + case vlMacroBlockTypeFwdPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + case vlMacroBlockTypeBkwdPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + case vlMacroBlockTypeBiPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + default: + assert(0); + } + + /* Unreachable */ + return -1; +} + static inline int vlGrabMacroBlock ( struct vlR16SnormBufferedMC *mc, struct vlMpeg2MacroBlock *macroblock ) +{ + assert(mc); + assert(macroblock); + + mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx; + mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby; + mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type; + mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type; + mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type; + mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0]; + mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1]; + mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0]; + mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1]; + mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0]; + mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1]; + mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0]; + mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1]; + mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp; + mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks; + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks++; + + return 0; +} + +static inline int vlGrabMacroBlockVB +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock, + unsigned int pos +) { const struct vlVertex2f unit = { @@ -267,42 +334,12 @@ static inline int vlGrabMacroBlock }; struct vlVertex2f *vb; - enum vlMacroBlockTypeEx mb_type_ex; struct vlVertex2f mo_vec[2]; unsigned int i; assert(mc); assert(macroblock); - switch (macroblock->mb_type) - { - case vlMacroBlockTypeIntra: - { - mb_type_ex = vlMacroBlockExTypeIntra; - break; - } - case vlMacroBlockTypeFwdPredicted: - { - mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; - break; - } - case vlMacroBlockTypeBkwdPredicted: - { - mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; - break; - } - case vlMacroBlockTypeBiPredicted: - { - mb_type_ex = macroblock->mo_type == vlMotionTypeFrame ? - vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; - break; - } - default: - assert(0); - } - switch (macroblock->mb_type) { case vlMacroBlockTypeBiPredicted: @@ -310,9 +347,9 @@ static inline int vlGrabMacroBlock vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + ) + pos * 2 * 24; mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; @@ -339,7 +376,7 @@ static inline int vlGrabMacroBlock } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][2].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer); /* fall-through */ } @@ -349,9 +386,9 @@ static inline int vlGrabMacroBlock vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_type_ex] * 2 * 24; + ) + pos * 2 * 24; if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) { @@ -395,7 +432,7 @@ static inline int vlGrabMacroBlock } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][1].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer); /* fall-through */ } @@ -404,9 +441,9 @@ static inline int vlGrabMacroBlock vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer, + mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE - ) + mc->num_macroblocks[mb_type_ex] * 24; + ) + pos * 24; vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; @@ -440,7 +477,7 @@ static inline int vlGrabMacroBlock vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][mb_type_ex][0].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); break; } @@ -448,18 +485,6 @@ static inline int vlGrabMacroBlock assert(0); } - vlGrabBlocks - ( - mc, - macroblock->mbx, - macroblock->mby, - macroblock->dct_type, - macroblock->cbp, - macroblock->blocks - ); - - mc->num_macroblocks[mb_type_ex]++; - return 0; } @@ -471,12 +496,37 @@ static int vlFlush struct vlR16SnormBufferedMC *mc; struct pipe_context *pipe; struct vlVertexShaderConsts *vs_consts; + unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0}; + unsigned int offset[vlNumMacroBlockExTypes]; + unsigned int vb_start = 0; + unsigned int i; - assert(mc); + assert(render); mc = (struct vlR16SnormBufferedMC*)render; pipe = mc->pipe; + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + num_macroblocks[mb_type_ex]++; + } + + offset[0] = 0; + + for (i = 1; i < vlNumMacroBlockExTypes; ++i) + offset[i] = offset[i - 1] + num_macroblocks[i - 1]; + + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]); + + offset[mb_type_ex]++; + } + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface ( pipe->screen, @@ -497,23 +547,25 @@ static int vlFlush vs_consts->denorm.y = mc->buffered_surface->texture->height[0]; pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); - if (mc->num_macroblocks[vlMacroBlockExTypeIntra] > 0) + if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) { - pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeIntra]); + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 1, mc->vertex_elems); pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); pipe->bind_vs_state(pipe, mc->i_vs); pipe->bind_fs_state(pipe, mc->i_fs); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeIntra] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) + if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedFrame]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -521,12 +573,13 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) + if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeFwdPredictedField]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -534,12 +587,13 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) + if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedFrame]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -547,12 +601,13 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) + if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBkwdPredictedField]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -560,12 +615,13 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) + if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedFrame]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; @@ -574,12 +630,13 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->b_vs[0]); pipe->bind_fs_state(pipe, mc->b_fs[0]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24; } - if (mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) + if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][vlMacroBlockExTypeBiPredictedField]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; @@ -588,10 +645,11 @@ static int vlFlush pipe->bind_vs_state(pipe, mc->b_vs[1]); pipe->bind_fs_state(pipe, mc->b_fs[1]); - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, mc->num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24; } - memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes); + mc->num_macroblocks = 0; mc->cur_buf++; return 0; @@ -660,7 +718,7 @@ static int vlDestroy { struct vlR16SnormBufferedMC *mc; struct pipe_context *pipe; - unsigned int g, h, i; + unsigned int h, i; assert(render); @@ -670,10 +728,9 @@ static int vlDestroy for (i = 0; i < 5; ++i) pipe->delete_sampler_state(pipe, mc->samplers[i]); - for (g = 0; g < NUM_BUF_SETS; ++g) - for (h = 0; h < vlNumMacroBlockExTypes; ++h) + for (h = 0; h < NUM_BUF_SETS; ++h) for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[g][h][i].buffer); + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ for (i = 0; i < NUM_BUF_SETS; ++i) @@ -697,6 +754,7 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + free(mc->macroblocks); free(mc); return 0; @@ -1869,28 +1927,25 @@ static int vlCreateDataBufs const unsigned int num_mb_per_frame = mbw * mbh; struct pipe_context *pipe; - unsigned int g, h, i; + unsigned int h, i; assert(mc); pipe = mc->pipe; - for (g = 0; g < NUM_BUF_SETS; ++g) + for (h = 0; h < NUM_BUF_SETS; ++h) { - for (h = 0; h < vlNumMacroBlockExTypes; ++h) - { - /* Create our vertex buffer and vertex buffer element */ - mc->vertex_bufs[g][h][0].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[g][h][0].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[g][h][0].buffer_offset = 0; - mc->vertex_bufs[g][h][0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 * num_mb_per_frame - ); - } + /* Create our vertex buffer and vertex buffer element */ + mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f); + mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[h][0].buffer_offset = 0; + mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 24 * num_mb_per_frame + ); } /* Position & block luma, block chroma texcoord element */ @@ -1899,23 +1954,20 @@ static int vlCreateDataBufs mc->vertex_elems[0].nr_components = 2; mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - for (g = 0; g < NUM_BUF_SETS; ++g) + for (h = 0; h < NUM_BUF_SETS; ++h) { - for (h = 0; h < vlNumMacroBlockExTypes; ++h) + for (i = 1; i < 3; ++i) { - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[g][h][i].pitch = sizeof(struct vlVertex2f) * 2; - mc->vertex_bufs[g][h][i].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[g][h][i].buffer_offset = 0; - mc->vertex_bufs[g][h][i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame - ); - } + mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[h][i].buffer_offset = 0; + mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame + ); } } @@ -1971,6 +2023,8 @@ static int vlCreateDataBufs pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame); + return 0; } @@ -2106,7 +2160,7 @@ int vlCreateR16SNormBufferedMC mc->buffered_surface = NULL; mc->past_surface = NULL; mc->future_surface = NULL; - memset(mc->num_macroblocks, 0, sizeof(unsigned int) * vlNumMacroBlockExTypes); + mc->num_macroblocks = 0; vlInit(mc); -- cgit v1.2.3 From 2ab5e69f16ed9e9b399ddd526f1370032d3d10a4 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 1 Sep 2008 20:22:41 -0400 Subject: g3dvl: Use consistent variable names. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 44 +++++++++++----------- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.h | 6 +-- 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index b3deab4656..13c6fd5568 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -56,8 +56,8 @@ struct vlR16SnormBufferedMC { struct vlRender base; - unsigned int video_width, video_height; - enum vlFormat video_format; + unsigned int picture_width, picture_height; + enum vlFormat picture_format; unsigned int cur_buf; struct vlSurface *buffered_surface; @@ -1922,8 +1922,8 @@ static int vlCreateDataBufs struct vlR16SnormBufferedMC *mc ) { - const unsigned int mbw = align(mc->video_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; - const unsigned int mbh = align(mc->video_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; + const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; + const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; const unsigned int num_mb_per_frame = mbw * mbh; struct pipe_context *pipe; @@ -2044,8 +2044,8 @@ static int vlInit pipe = mc->pipe; /* For MC we render to textures, which are rounded up to nearest POT */ - mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); - mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); + mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height); mc->viewport.scale[2] = 1; mc->viewport.scale[3] = 1; mc->viewport.translate[0] = 0; @@ -2053,16 +2053,16 @@ static int vlInit mc->viewport.translate[2] = 0; mc->viewport.translate[3] = 0; - mc->render_target.width = vlRoundUpPOT(mc->video_width); - mc->render_target.height = vlRoundUpPOT(mc->video_height); + mc->render_target.width = vlRoundUpPOT(mc->picture_width); + mc->render_target.height = vlRoundUpPOT(mc->picture_height); mc->render_target.num_cbufs = 1; /* FB for MC stage is a vlSurface created by the user, set at render time */ mc->render_target.zsbuf = NULL; filters[0] = PIPE_TEX_FILTER_NEAREST; /* FIXME: Linear causes discoloration around block edges */ - filters[1] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; - filters[2] = /*mc->video_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; + filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; + filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; filters[3] = PIPE_TEX_FILTER_LINEAR; filters[4] = PIPE_TEX_FILTER_LINEAR; @@ -2091,8 +2091,8 @@ static int vlInit template.target = PIPE_TEXTURE_2D; template.format = PIPE_FORMAT_R16_SNORM; template.last_level = 0; - template.width[0] = vlRoundUpPOT(mc->video_width); - template.height[0] = vlRoundUpPOT(mc->video_height); + template.width[0] = vlRoundUpPOT(mc->picture_width); + template.height[0] = vlRoundUpPOT(mc->picture_height); template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); @@ -2100,13 +2100,13 @@ static int vlInit for (i = 0; i < NUM_BUF_SETS; ++i) mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); - if (mc->video_format == vlFormatYCbCr420) + if (mc->picture_format == vlFormatYCbCr420) { - template.width[0] = vlRoundUpPOT(mc->video_width / 2); - template.height[0] = vlRoundUpPOT(mc->video_height / 2); + template.width[0] = vlRoundUpPOT(mc->picture_width / 2); + template.height[0] = vlRoundUpPOT(mc->picture_height / 2); } - else if (mc->video_format == vlFormatYCbCr422) - template.height[0] = vlRoundUpPOT(mc->video_height / 2); + else if (mc->picture_format == vlFormatYCbCr422) + template.height[0] = vlRoundUpPOT(mc->picture_height / 2); for (i = 0; i < NUM_BUF_SETS; ++i) { @@ -2134,9 +2134,9 @@ static int vlInit int vlCreateR16SNormBufferedMC ( struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, struct vlRender **render ) { @@ -2153,8 +2153,8 @@ int vlCreateR16SNormBufferedMC mc->base.vlFlush = &vlFlush; mc->base.vlDestroy = &vlDestroy; mc->pipe = pipe; - mc->video_width = video_width; - mc->video_height = video_height; + mc->picture_width = picture_width; + mc->picture_height = picture_height; mc->cur_buf = 0; mc->buffered_surface = NULL; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h index 30f67db3e7..27177d64ca 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h @@ -9,9 +9,9 @@ struct vlRender; int vlCreateR16SNormBufferedMC ( struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, struct vlRender **render ); -- cgit v1.2.3 From 7891efdac125185d216a8da8b044db0f06f34f0e Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 3 Sep 2008 11:50:38 -0400 Subject: g3dvl: Define block texcoords for each vertex instead of reusing pos. This is needed for zero-block optimization. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 243 +++++++++++---------- 1 file changed, 130 insertions(+), 113 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 13c6fd5568..4d778e7fe3 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -74,7 +74,7 @@ struct vlR16SnormBufferedMC void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; - struct pipe_vertex_element vertex_elems[5]; + struct pipe_vertex_element vertex_elems[6]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; }; @@ -443,39 +443,63 @@ static inline int vlGrabMacroBlockVB mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 24; + ) + pos * 2 * 24; vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; - vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y + half.y; - vb[2].x = macroblock->mbx * unit.x + half.x; vb[2].y = macroblock->mby * unit.y; - - vb[3].x = macroblock->mbx * unit.x + half.x; vb[3].y = macroblock->mby * unit.y; - vb[4].x = macroblock->mbx * unit.x; vb[4].y = macroblock->mby * unit.y + half.y; - vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y + half.y; + vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y; + vb[2].x = macroblock->mbx * unit.x; vb[2].y = macroblock->mby * unit.y + half.y; + vb[3].x = macroblock->mbx * unit.x; vb[3].y = macroblock->mby * unit.y + half.y; + vb[4].x = macroblock->mbx * unit.x + half.x; vb[4].y = macroblock->mby * unit.y; + vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y; vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; - vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y + half.y; - vb[8].x = macroblock->mbx * unit.x + unit.x; vb[8].y = macroblock->mby * unit.y; - - vb[9].x = macroblock->mbx * unit.x + unit.x; vb[9].y = macroblock->mby * unit.y; + vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y; + vb[8].x = macroblock->mbx * unit.x; vb[8].y = macroblock->mby * unit.y + half.y; + vb[9].x = macroblock->mbx * unit.x; vb[9].y = macroblock->mby * unit.y + half.y; vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; - vb[11].x = macroblock->mbx * unit.x + unit.x; vb[11].y = macroblock->mby * unit.y + half.y; + vb[11].x = macroblock->mbx * unit.x + half.x; vb[11].y = macroblock->mby * unit.y + half.y; - vb[12].x = macroblock->mbx * unit.x; vb[12].y = macroblock->mby * unit.y + half.y; - vb[13].x = macroblock->mbx * unit.x; vb[13].y = macroblock->mby * unit.y + unit.y; + vb[12].x = macroblock->mbx * unit.x + half.x; vb[12].y = macroblock->mby * unit.y; + vb[13].x = macroblock->mbx * unit.x + half.x; vb[13].y = macroblock->mby * unit.y; vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; - vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; - vb[16].x = macroblock->mbx * unit.x; vb[16].y = macroblock->mby * unit.y + unit.y; - vb[17].x = macroblock->mbx * unit.x + half.x; vb[17].y = macroblock->mby * unit.y + unit.y; - - vb[18].x = macroblock->mbx * unit.x + half.x; vb[18].y = macroblock->mby * unit.y + half.y; - vb[19].x = macroblock->mbx * unit.x + half.x; vb[19].y = macroblock->mby * unit.y + unit.y; - vb[20].x = macroblock->mbx * unit.x + unit.x; vb[20].y = macroblock->mby * unit.y + half.y; - - vb[21].x = macroblock->mbx * unit.x + unit.x; vb[21].y = macroblock->mby * unit.y + half.y; - vb[22].x = macroblock->mbx * unit.x + half.x; vb[22].y = macroblock->mby * unit.y + unit.y; - vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + unit.y; + vb[16].x = macroblock->mbx * unit.x + unit.x; vb[16].y = macroblock->mby * unit.y; + vb[17].x = macroblock->mbx * unit.x + unit.x; vb[17].y = macroblock->mby * unit.y; + + vb[18].x = macroblock->mbx * unit.x + unit.x; vb[18].y = macroblock->mby * unit.y; + vb[19].x = macroblock->mbx * unit.x + unit.x; vb[19].y = macroblock->mby * unit.y; + vb[20].x = macroblock->mbx * unit.x + half.x; vb[20].y = macroblock->mby * unit.y + half.y; + vb[21].x = macroblock->mbx * unit.x + half.x; vb[21].y = macroblock->mby * unit.y + half.y; + vb[22].x = macroblock->mbx * unit.x + unit.x; vb[22].y = macroblock->mby * unit.y + half.y; + vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + half.y; + + vb[24].x = macroblock->mbx * unit.x; vb[24].y = macroblock->mby * unit.y + half.y; + vb[25].x = macroblock->mbx * unit.x; vb[25].y = macroblock->mby * unit.y + half.y; + vb[26].x = macroblock->mbx * unit.x; vb[26].y = macroblock->mby * unit.y + unit.y; + vb[27].x = macroblock->mbx * unit.x; vb[27].y = macroblock->mby * unit.y + unit.y; + vb[28].x = macroblock->mbx * unit.x + half.x; vb[28].y = macroblock->mby * unit.y + half.y; + vb[29].x = macroblock->mbx * unit.x + half.x; vb[29].y = macroblock->mby * unit.y + half.y; + + vb[30].x = macroblock->mbx * unit.x + half.x; vb[30].y = macroblock->mby * unit.y + half.y; + vb[31].x = macroblock->mbx * unit.x + half.x; vb[31].y = macroblock->mby * unit.y + half.y; + vb[32].x = macroblock->mbx * unit.x; vb[32].y = macroblock->mby * unit.y + unit.y; + vb[33].x = macroblock->mbx * unit.x; vb[33].y = macroblock->mby * unit.y + unit.y; + vb[34].x = macroblock->mbx * unit.x + half.x; vb[34].y = macroblock->mby * unit.y + unit.y; + vb[35].x = macroblock->mbx * unit.x + half.x; vb[35].y = macroblock->mby * unit.y + unit.y; + + vb[36].x = macroblock->mbx * unit.x + half.x; vb[36].y = macroblock->mby * unit.y + half.y; + vb[37].x = macroblock->mbx * unit.x + half.x; vb[37].y = macroblock->mby * unit.y + half.y; + vb[38].x = macroblock->mbx * unit.x + half.x; vb[38].y = macroblock->mby * unit.y + unit.y; + vb[39].x = macroblock->mbx * unit.x + half.x; vb[39].y = macroblock->mby * unit.y + unit.y; + vb[40].x = macroblock->mbx * unit.x + unit.x; vb[40].y = macroblock->mby * unit.y + half.y; + vb[41].x = macroblock->mbx * unit.x + unit.x; vb[41].y = macroblock->mby * unit.y + half.y; + + vb[42].x = macroblock->mbx * unit.x + unit.x; vb[42].y = macroblock->mby * unit.y + half.y; + vb[43].x = macroblock->mbx * unit.x + unit.x; vb[43].y = macroblock->mby * unit.y + half.y; + vb[44].x = macroblock->mbx * unit.x + half.x; vb[44].y = macroblock->mby * unit.y + unit.y; + vb[45].x = macroblock->mbx * unit.x + half.x; vb[45].y = macroblock->mby * unit.y + unit.y; + vb[46].x = macroblock->mbx * unit.x + unit.x; vb[46].y = macroblock->mby * unit.y + unit.y; + vb[47].x = macroblock->mbx * unit.x + unit.x; vb[47].y = macroblock->mby * unit.y + unit.y; mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); @@ -553,7 +577,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) { pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 1, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 2, mc->vertex_elems); pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); pipe->bind_vs_state(pipe, mc->i_vs); @@ -566,7 +590,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -580,7 +604,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -594,7 +618,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -608,7 +632,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -622,7 +646,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -637,7 +661,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 5, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -806,9 +830,10 @@ static int vlCreateVertexShaderIMB ti = 3; /* - * decl i0 ; Vertex pos, luma & chroma texcoords + * decl i0 ; Vertex pos + * decl i1 ; Luma/chroma texcoords */ - for (i = 0; i < 3; i++) + for (i = 0; i < 2; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -826,11 +851,11 @@ static int vlCreateVertexShaderIMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma/chroma texcoords to output */ for (i = 0; i < 2; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -976,11 +1001,12 @@ static int vlCreateVertexShaderFramePMB ti = 3; /* - * decl i0 ; Vertex pos, luma/chroma texcoords - * decl i1 ; Ref surface top field texcoords - * decl i2 ; Ref surface bottom field texcoords (unused, packed in the same stream) + * decl i0 ; Vertex pos + * decl i1 ; Luma/chroma texcoords + * decl i2 ; Ref surface top field texcoords + * decl i3 ; Ref surface bottom field texcoords (unused, packed in the same stream) */ - for (i = 0; i < 3; i++) + for (i = 0; i < 4; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -999,16 +1025,16 @@ static int vlCreateVertexShaderFramePMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma/chroma texcoords to output */ for (i = 0; i < 2; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o2, i0, i1 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 1); + /* add o2, i0, i2 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ @@ -1057,10 +1083,11 @@ static int vlCreateVertexShaderFieldPMB /* * decl i0 ; Vertex pos, luma/chroma texcoords - * decl i1 ; Ref surface top field texcoords - * decl i2 ; Ref surface bottom field texcoords + * decl i1 ; Texcoord denorm coefficients + * decl i2 ; Ref surface top field texcoords + * decl i3 ; Ref surface bottom field texcoords */ - for (i = 0; i < 3; i++) + for (i = 0; i < 4; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1085,21 +1112,21 @@ static int vlCreateVertexShaderFieldPMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma/chroma texcoords to output */ - for (i = 0; i < 3; ++i) + for (i = 0; i < 2; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i == 0 ? 0 : i - 1); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i1 ; Translate vertex pos by motion vec to form top field macroblock texcoords - * add o3, i0, i2 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + * add o2, i0, i2 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o3, i0, i3 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords */ for (i = 0; i < 2; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1430,12 +1457,13 @@ static int vlCreateVertexShaderFrameBMB /* * decl i0 ; Vertex pos, luma/chroma texcoords - * decl i1 ; First ref surface top field texcoords - * decl i2 ; First ref surface bottom field texcoords (unused, packed in the same stream) - * decl i3 ; Second ref surface top field texcoords - * decl i4 ; Second ref surface bottom field texcoords (unused, packed in the same stream) + * decl i1 ; Luma/chroma texcoords + * decl i2 ; First ref surface top field texcoords + * decl i3 ; First ref surface bottom field texcoords (unused, packed in the same stream) + * decl i4 ; Second ref surface top field texcoords + * decl i5 ; Second ref surface bottom field texcoords (unused, packed in the same stream) */ - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1455,21 +1483,21 @@ static int vlCreateVertexShaderFrameBMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i0 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma/chroma texcoords to output */ for (i = 0; i < 2; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i1 ; Translate vertex pos by motion vec to form first ref macroblock texcoords - * add o3, i0, i3 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + * add o2, i0, i2 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o3, i0, i4 ; Translate vertex pos by motion vec to form second ref macroblock texcoords */ for (i = 0; i < 2; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i * 2 + 1); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 1) * 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1519,12 +1547,13 @@ static int vlCreateVertexShaderFieldBMB /* * decl i0 ; Vertex pos, Luma/chroma texcoords - * decl i1 ; First ref surface top field texcoords - * decl i2 ; First ref surface bottom field texcoords - * decl i3 ; Second ref surface top field texcoords - * decl i4 ; Second ref surface bottom field texcoords + * decl i1 ; Luma/chroma texcoords + * decl i2 ; First ref surface top field texcoords + * decl i3 ; First ref surface bottom field texcoords + * decl i4 ; Second ref surface top field texcoords + * decl i5 ; Second ref surface bottom field texcoords */ - for (i = 0; i < 5; i++) + for (i = 0; i < 6; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1555,27 +1584,23 @@ static int vlCreateVertexShaderFieldBMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i0 ; Move input luma/chroma texcoords to output - * mov o2, i1 ; Move past top field texcoords to output - * mov o3, i2 ; Move past bottom field texcoords to output - * mov o4, i3 ; Move future top field texcoords to output - * mov o5, i4 ; Move future bottom field texcoords to output + * mov o1, i1 ; Move input luma/chroma texcoords to output */ - for (i = 0; i < 6; ++i) + for (i = 0; i < 2; ++i) { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, 0); + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i1 ; Translate vertex pos by motion vec to form first top field macroblock texcoords - * add o3, i0, i2 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords - * add o4, i0, i3 ; Translate vertex pos by motion vec to form second top field macroblock texcoords - * add o5, i0, i4 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + * add o2, i0, i2 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o3, i0, i3 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o4, i0, i4 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords */ for (i = 0; i < 4; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 1); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1933,30 +1958,10 @@ static int vlCreateDataBufs pipe = mc->pipe; + /* Create our vertex buffers */ for (h = 0; h < NUM_BUF_SETS; ++h) { - /* Create our vertex buffer and vertex buffer element */ - mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[h][0].buffer_offset = 0; - mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 * num_mb_per_frame - ); - } - - /* Position & block luma, block chroma texcoord element */ - mc->vertex_elems[0].src_offset = 0; - mc->vertex_elems[0].vertex_buffer_index = 0; - mc->vertex_elems[0].nr_components = 2; - mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - for (h = 0; h < NUM_BUF_SETS; ++h) - { - for (i = 1; i < 3; ++i) + for (i = 0; i < 3; ++i) { mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2; mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1; @@ -1971,30 +1976,42 @@ static int vlCreateDataBufs } } - /* First ref surface top field texcoord element */ - mc->vertex_elems[1].src_offset = 0; - mc->vertex_elems[1].vertex_buffer_index = 1; + /* Position element */ + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Block luma, block chroma texcoord element */ + mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[1].vertex_buffer_index = 0; mc->vertex_elems[1].nr_components = 2; mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* First ref surface bottom field texcoord element */ - mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f); + /* First ref surface top field texcoord element */ + mc->vertex_elems[2].src_offset = 0; mc->vertex_elems[2].vertex_buffer_index = 1; mc->vertex_elems[2].nr_components = 2; mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* Second ref surface top field texcoord element */ - mc->vertex_elems[3].src_offset = 0; - mc->vertex_elems[3].vertex_buffer_index = 2; + /* First ref surface bottom field texcoord element */ + mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[3].vertex_buffer_index = 1; mc->vertex_elems[3].nr_components = 2; mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* Second ref surface bottom field texcoord element */ - mc->vertex_elems[4].src_offset = sizeof(struct vlVertex2f); + /* Second ref surface top field texcoord element */ + mc->vertex_elems[4].src_offset = 0; mc->vertex_elems[4].vertex_buffer_index = 2; mc->vertex_elems[4].nr_components = 2; mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* Second ref surface bottom field texcoord element */ + mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[5].vertex_buffer_index = 2; + mc->vertex_elems[5].nr_components = 2; + mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* Create our constant buffer */ mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); mc->vs_const_buf.buffer = pipe->winsys->buffer_create -- cgit v1.2.3 From 0bfbe834d35946fe75eb991d03ed777e115f418d Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 4 Sep 2008 00:16:51 -0400 Subject: g3dvl: Define texcoords seperately for luma, Cb, Cr textures. Need to be able to address each texture separately to do zero-block. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 566 +++++++++++++-------- 1 file changed, 343 insertions(+), 223 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 4d778e7fe3..b5aa79cdb4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -74,7 +74,7 @@ struct vlR16SnormBufferedMC void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; - struct pipe_vertex_element vertex_elems[6]; + struct pipe_vertex_element vertex_elems[8]; struct pipe_constant_buffer vs_const_buf, fs_const_buf; }; @@ -315,6 +315,71 @@ static inline int vlGrabMacroBlock return 0; } +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zx, zy) \ + (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + \ + /*if ((cbp) & (lm)) \ + {*/ \ + (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + /*} \ + else \ + { \ + (vb)[0].luma_tc.x = (zx); (vb)[0].luma_tc.y = (zy); \ + (vb)[1].luma_tc.x = (zx); (vb)[1].luma_tc.y = (zy) + (hy); \ + (vb)[2].luma_tc.x = (zx) + (hx); (vb)[2].luma_tc.y = (zy); \ + (vb)[3].luma_tc.x = (zx) + (hx); (vb)[3].luma_tc.y = (zy); \ + (vb)[4].luma_tc.x = (zx); (vb)[4].luma_tc.y = (zy) + (hy); \ + (vb)[5].luma_tc.x = ((zx) + (hx); (vb)[5].luma_tc.y = (zy) + (hy); \ + }*/ \ + \ + /*if ((cbp) & (cbm)) \ + {*/ \ + (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + /*} \ + else \ + { \ + (vb)[0].cb_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ + (vb)[1].cb_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ + (vb)[2].cb_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ + (vb)[3].cb_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ + (vb)[4].cb_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ + (vb)[5].cb_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ + }*/ \ + \ + /*if ((cbp) & (crm)) \ + {*/ \ + (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + /*} \ + else \ + { \ + (vb)[0].cr_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ + (vb)[1].cr_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ + (vb)[2].cr_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ + (vb)[3].cr_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ + (vb)[4].cr_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ + (vb)[5].cr_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ + }*/ + static inline int vlGrabMacroBlockVB ( struct vlR16SnormBufferedMC *mc, @@ -322,18 +387,6 @@ static inline int vlGrabMacroBlockVB unsigned int pos ) { - const struct vlVertex2f unit = - { - mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, - mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT - }; - const struct vlVertex2f half = - { - mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), - mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) - }; - - struct vlVertex2f *vb; struct vlVertex2f mo_vec[2]; unsigned int i; @@ -344,6 +397,8 @@ static inline int vlGrabMacroBlockVB { case vlMacroBlockTypeBiPredicted: { + struct vlVertex2f *vb; + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, @@ -383,6 +438,8 @@ static inline int vlGrabMacroBlockVB case vlMacroBlockTypeFwdPredicted: case vlMacroBlockTypeBkwdPredicted: { + struct vlVertex2f *vb; + vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, @@ -438,68 +495,63 @@ static inline int vlGrabMacroBlockVB } case vlMacroBlockTypeIntra: { - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlMacroBlockVertexStream0 + { + struct vlVertex2f pos; + struct vlVertex2f luma_tc; + struct vlVertex2f cb_tc; + struct vlVertex2f cr_tc; + } *vb; + + vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 2 * 24; + ) + pos * 24; - vb[0].x = macroblock->mbx * unit.x; vb[0].y = macroblock->mby * unit.y; - vb[1].x = macroblock->mbx * unit.x; vb[1].y = macroblock->mby * unit.y; - vb[2].x = macroblock->mbx * unit.x; vb[2].y = macroblock->mby * unit.y + half.y; - vb[3].x = macroblock->mbx * unit.x; vb[3].y = macroblock->mby * unit.y + half.y; - vb[4].x = macroblock->mbx * unit.x + half.x; vb[4].y = macroblock->mby * unit.y; - vb[5].x = macroblock->mbx * unit.x + half.x; vb[5].y = macroblock->mby * unit.y; - - vb[6].x = macroblock->mbx * unit.x + half.x; vb[6].y = macroblock->mby * unit.y; - vb[7].x = macroblock->mbx * unit.x + half.x; vb[7].y = macroblock->mby * unit.y; - vb[8].x = macroblock->mbx * unit.x; vb[8].y = macroblock->mby * unit.y + half.y; - vb[9].x = macroblock->mbx * unit.x; vb[9].y = macroblock->mby * unit.y + half.y; - vb[10].x = macroblock->mbx * unit.x + half.x; vb[10].y = macroblock->mby * unit.y + half.y; - vb[11].x = macroblock->mbx * unit.x + half.x; vb[11].y = macroblock->mby * unit.y + half.y; - - vb[12].x = macroblock->mbx * unit.x + half.x; vb[12].y = macroblock->mby * unit.y; - vb[13].x = macroblock->mbx * unit.x + half.x; vb[13].y = macroblock->mby * unit.y; - vb[14].x = macroblock->mbx * unit.x + half.x; vb[14].y = macroblock->mby * unit.y + half.y; - vb[15].x = macroblock->mbx * unit.x + half.x; vb[15].y = macroblock->mby * unit.y + half.y; - vb[16].x = macroblock->mbx * unit.x + unit.x; vb[16].y = macroblock->mby * unit.y; - vb[17].x = macroblock->mbx * unit.x + unit.x; vb[17].y = macroblock->mby * unit.y; - - vb[18].x = macroblock->mbx * unit.x + unit.x; vb[18].y = macroblock->mby * unit.y; - vb[19].x = macroblock->mbx * unit.x + unit.x; vb[19].y = macroblock->mby * unit.y; - vb[20].x = macroblock->mbx * unit.x + half.x; vb[20].y = macroblock->mby * unit.y + half.y; - vb[21].x = macroblock->mbx * unit.x + half.x; vb[21].y = macroblock->mby * unit.y + half.y; - vb[22].x = macroblock->mbx * unit.x + unit.x; vb[22].y = macroblock->mby * unit.y + half.y; - vb[23].x = macroblock->mbx * unit.x + unit.x; vb[23].y = macroblock->mby * unit.y + half.y; - - vb[24].x = macroblock->mbx * unit.x; vb[24].y = macroblock->mby * unit.y + half.y; - vb[25].x = macroblock->mbx * unit.x; vb[25].y = macroblock->mby * unit.y + half.y; - vb[26].x = macroblock->mbx * unit.x; vb[26].y = macroblock->mby * unit.y + unit.y; - vb[27].x = macroblock->mbx * unit.x; vb[27].y = macroblock->mby * unit.y + unit.y; - vb[28].x = macroblock->mbx * unit.x + half.x; vb[28].y = macroblock->mby * unit.y + half.y; - vb[29].x = macroblock->mbx * unit.x + half.x; vb[29].y = macroblock->mby * unit.y + half.y; - - vb[30].x = macroblock->mbx * unit.x + half.x; vb[30].y = macroblock->mby * unit.y + half.y; - vb[31].x = macroblock->mbx * unit.x + half.x; vb[31].y = macroblock->mby * unit.y + half.y; - vb[32].x = macroblock->mbx * unit.x; vb[32].y = macroblock->mby * unit.y + unit.y; - vb[33].x = macroblock->mbx * unit.x; vb[33].y = macroblock->mby * unit.y + unit.y; - vb[34].x = macroblock->mbx * unit.x + half.x; vb[34].y = macroblock->mby * unit.y + unit.y; - vb[35].x = macroblock->mbx * unit.x + half.x; vb[35].y = macroblock->mby * unit.y + unit.y; - - vb[36].x = macroblock->mbx * unit.x + half.x; vb[36].y = macroblock->mby * unit.y + half.y; - vb[37].x = macroblock->mbx * unit.x + half.x; vb[37].y = macroblock->mby * unit.y + half.y; - vb[38].x = macroblock->mbx * unit.x + half.x; vb[38].y = macroblock->mby * unit.y + unit.y; - vb[39].x = macroblock->mbx * unit.x + half.x; vb[39].y = macroblock->mby * unit.y + unit.y; - vb[40].x = macroblock->mbx * unit.x + unit.x; vb[40].y = macroblock->mby * unit.y + half.y; - vb[41].x = macroblock->mbx * unit.x + unit.x; vb[41].y = macroblock->mby * unit.y + half.y; - - vb[42].x = macroblock->mbx * unit.x + unit.x; vb[42].y = macroblock->mby * unit.y + half.y; - vb[43].x = macroblock->mbx * unit.x + unit.x; vb[43].y = macroblock->mby * unit.y + half.y; - vb[44].x = macroblock->mbx * unit.x + half.x; vb[44].y = macroblock->mby * unit.y + unit.y; - vb[45].x = macroblock->mbx * unit.x + half.x; vb[45].y = macroblock->mby * unit.y + unit.y; - vb[46].x = macroblock->mbx * unit.x + unit.x; vb[46].y = macroblock->mby * unit.y + unit.y; - vb[47].x = macroblock->mbx * unit.x + unit.x; vb[47].y = macroblock->mby * unit.y + unit.y; + SET_BLOCK + ( + vb, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, 0, 0, half.x, half.y, + 32, 2, 1, mc->zero_block.x, mc->zero_block.y + ); + + SET_BLOCK + ( + vb + 6, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, half.x, 0, half.x, half.y, + 16, 2, 1, mc->zero_block.x, mc->zero_block.y + ); + + SET_BLOCK + ( + vb + 12, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, 0, half.y, half.x, half.y, + 8, 2, 1, mc->zero_block.x, mc->zero_block.y + ); + + SET_BLOCK + ( + vb + 18, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, half.x, half.y, half.x, half.y, + 4, 2, 1, mc->zero_block.x, mc->zero_block.y + ); mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); @@ -577,7 +629,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) { pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 2, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); pipe->bind_vs_state(pipe, mc->i_vs); @@ -590,7 +642,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -604,7 +656,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -618,7 +670,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -632,7 +684,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); @@ -646,7 +698,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) { pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -661,7 +713,7 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) { pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); - pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); @@ -831,9 +883,11 @@ static int vlCreateVertexShaderIMB /* * decl i0 ; Vertex pos - * decl i1 ; Luma/chroma texcoords + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords */ - for (i = 0; i < 2; i++) + for (i = 0; i < 4; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -841,9 +895,11 @@ static int vlCreateVertexShaderIMB /* * decl o0 ; Vertex pos - * decl o1 ; Luma/chroma texcoords + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords */ - for (i = 0; i < 2; i++) + for (i = 0; i < 4; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -851,9 +907,11 @@ static int vlCreateVertexShaderIMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); @@ -903,9 +961,16 @@ static int vlCreateFragmentShaderIMB ti = 3; - /* decl i0 ; Luma/chroma texcoords */ - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); @@ -933,14 +998,14 @@ static int vlCreateFragmentShaderIMB /* * tex2d t1, i0, s0 ; Read texel from luma texture * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); @@ -1002,11 +1067,13 @@ static int vlCreateVertexShaderFramePMB /* * decl i0 ; Vertex pos - * decl i1 ; Luma/chroma texcoords - * decl i2 ; Ref surface top field texcoords - * decl i3 ; Ref surface bottom field texcoords (unused, packed in the same stream) + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) */ - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1014,10 +1081,12 @@ static int vlCreateVertexShaderFramePMB /* * decl o0 ; Vertex pos - * decl o1 ; Luma/chroma texcoords - * decl o2 ; Ref macroblock texcoords + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords */ - for (i = 0; i < 3; i++) + for (i = 0; i < 5; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1025,16 +1094,18 @@ static int vlCreateVertexShaderFramePMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* add o2, i0, i2 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 2); + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ @@ -1082,12 +1153,14 @@ static int vlCreateVertexShaderFieldPMB ti = 3; /* - * decl i0 ; Vertex pos, luma/chroma texcoords - * decl i1 ; Texcoord denorm coefficients - * decl i2 ; Ref surface top field texcoords - * decl i3 ; Ref surface bottom field texcoords + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref macroblock top field texcoords + * decl i5 ; Ref macroblock bottom field texcoords */ - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1099,39 +1172,43 @@ static int vlCreateVertexShaderFieldPMB /* * decl o0 ; Vertex pos - * decl o1 ; Luma/chroma texcoords - * decl o2 ; Top field ref macroblock texcoords - * decl o3 ; Bottom field ref macroblock texcoords - * decl o4 ; Denormalized vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock top field texcoords + * decl o5 ; Ref macroblock bottom field texcoords + * decl o6 ; Denormalized vertex pos */ - for (i = 0; i < 5; i++) + for (i = 0; i < 7; i++) { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + decl = vl_decl_output((i == 0 || i == 6) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i2 ; Translate vertex pos by motion vec to form top field macroblock texcoords - * add o3, i0, i3 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords */ for (i = 0; i < 2; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 2); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o4, i0, c0 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + /* mul o6, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ @@ -1179,10 +1256,12 @@ static int vlCreateFragmentShaderFramePMB ti = 3; /* - * decl i0 ; Texcoords for s0, s1, s2 - * decl i1 ; Texcoords for s3 + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1215,14 +1294,14 @@ static int vlCreateFragmentShaderFramePMB /* * tex2d t1, i0, s0 ; Read texel from luma texture * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); @@ -1238,8 +1317,8 @@ static int vlCreateFragmentShaderFramePMB inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - /* tex2d t1, i1, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 1, TGSI_FILE_SAMPLER, 3); + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* add o0, t0, t1 ; Add ref and differential to form final output */ @@ -1291,12 +1370,14 @@ static int vlCreateFragmentShaderFieldPMB ti = 3; /* - * decl i0 ; Texcoords for s0, s1, s2 - * decl i1 ; Texcoords for s3 - * decl i2 ; Texcoords for s3 - * decl i3 ; Denormalized vertex pos + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock top field texcoords + * decl i4 ; Ref macroblock bottom field texcoords + * decl i5 ; Denormalized vertex pos */ - for (i = 0; i < 4; ++i) + for (i = 0; i < 6; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1332,14 +1413,14 @@ static int vlCreateFragmentShaderFieldPMB /* * tex2d t1, i0, s0 ; Read texel from luma texture * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); @@ -1356,18 +1437,18 @@ static int vlCreateFragmentShaderFieldPMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - * tex2d t1, i1, s3 ; Read texel from ref macroblock top field - * tex2d t2, i2, s3 ; Read texel from ref macroblock bottom field + * tex2d t1, i3, s3 ; Read texel from ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field */ for (i = 0; i < 2; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* XXX: Pos values off by 0.5? */ - /* sub t4, i3.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 3, TGSI_FILE_CONSTANT, 1); + /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1456,14 +1537,16 @@ static int vlCreateVertexShaderFrameBMB ti = 3; /* - * decl i0 ; Vertex pos, luma/chroma texcoords - * decl i1 ; Luma/chroma texcoords - * decl i2 ; First ref surface top field texcoords - * decl i3 ; First ref surface bottom field texcoords (unused, packed in the same stream) - * decl i4 ; Second ref surface top field texcoords - * decl i5 ; Second ref surface bottom field texcoords (unused, packed in the same stream) + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) */ - for (i = 0; i < 6; i++) + for (i = 0; i < 8; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1471,11 +1554,13 @@ static int vlCreateVertexShaderFrameBMB /* * decl o0 ; Vertex pos - * decl o1 ; Luma/chroma texcoords - * decl o2 ; First ref macroblock texcoords - * decl o3 ; Second ref macroblock texcoords + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords */ - for (i = 0; i < 4; i++) + for (i = 0; i < 6; i++) { decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1483,21 +1568,23 @@ static int vlCreateVertexShaderFrameBMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i2 ; Translate vertex pos by motion vec to form first ref macroblock texcoords - * add o3, i0, i4 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords */ for (i = 0; i < 2; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 1) * 2); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1546,14 +1633,16 @@ static int vlCreateVertexShaderFieldBMB ti = 3; /* - * decl i0 ; Vertex pos, Luma/chroma texcoords - * decl i1 ; Luma/chroma texcoords - * decl i2 ; First ref surface top field texcoords - * decl i3 ; First ref surface bottom field texcoords - * decl i4 ; Second ref surface top field texcoords - * decl i5 ; Second ref surface bottom field texcoords + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords */ - for (i = 0; i < 6; i++) + for (i = 0; i < 8; i++) { decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1565,16 +1654,18 @@ static int vlCreateVertexShaderFieldBMB /* * decl o0 ; Vertex pos - * decl o1 ; Luma/chroma texcoords - * decl o2 ; Top field past ref macroblock texcoords - * decl o3 ; Bottom field past ref macroblock texcoords - * decl o4 ; Top field future ref macroblock texcoords - * decl o5 ; Bottom field future ref macroblock texcoords - * decl o6 ; Denormalized vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock top field texcoords + * decl o5 ; First ref macroblock Bottom field texcoords + * decl o6 ; Second ref macroblock top field texcoords + * decl o7 ; Second ref macroblock Bottom field texcoords + * decl o8 ; Denormalized vertex pos */ - for (i = 0; i < 7; i++) + for (i = 0; i < 9; i++) { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + decl = vl_decl_output((i == 0 || i == 8) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -1584,28 +1675,30 @@ static int vlCreateVertexShaderFieldBMB /* * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma/chroma texcoords to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output */ - for (i = 0; i < 2; ++i) + for (i = 0; i < 4; ++i) { inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } /* - * add o2, i0, i2 ; Translate vertex pos by motion vec to form first top field macroblock texcoords - * add o3, i0, i3 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords - * add o4, i0, i4 ; Translate vertex pos by motion vec to form second top field macroblock texcoords - * add o5, i0, i5 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords */ for (i = 0; i < 4; ++i) { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 2, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 2); + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } - /* mul o6, i0, c0 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + /* mul o8, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* end */ @@ -1653,11 +1746,13 @@ static int vlCreateFragmentShaderFrameBMB ti = 3; /* - * decl i0 ; Texcoords for s0, s1, s2 - * decl i1 ; Texcoords for s3 - * decl i2 ; Texcoords for s4 + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords */ - for (i = 0; i < 3; ++i) + for (i = 0; i < 5; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1682,8 +1777,8 @@ static int vlCreateFragmentShaderFrameBMB * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture */ for (i = 0; i < 5; ++i) { @@ -1694,14 +1789,14 @@ static int vlCreateFragmentShaderFrameBMB /* * tex2d t1, i0, s0 ; Read texel from luma texture * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); @@ -1718,12 +1813,12 @@ static int vlCreateFragmentShaderFrameBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - * tex2d t1, i1, s3 ; Read texel from past ref macroblock - * tex2d t2, i2, s4 ; Read texel from future ref macroblock + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock */ for (i = 0; i < 2; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, i + 3); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1784,14 +1879,16 @@ static int vlCreateFragmentShaderFieldBMB ti = 3; /* - * decl i0 ; Texcoords for s0, s1, s2 - * decl i1 ; Texcoords for s3 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - * decl i4 ; Texcoords for s4 - * decl i5 ; Denormalized vertex pos + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock top field texcoords + * decl i4 ; First ref macroblock bottom field texcoords + * decl i5 ; Second ref macroblock top field texcoords + * decl i6 ; Second ref macroblock bottom field texcoords + * decl i7 ; Denormalized vertex pos */ - for (i = 0; i < 6; ++i) + for (i = 0; i < 8; ++i) { decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1817,8 +1914,8 @@ static int vlCreateFragmentShaderFieldBMB * decl s0 ; Sampler for luma texture * decl s1 ; Sampler for chroma Cb texture * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture */ for (i = 0; i < 5; ++i) { @@ -1829,14 +1926,14 @@ static int vlCreateFragmentShaderFieldBMB /* * tex2d t1, i0, s0 ; Read texel from luma texture * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i0, s1 ; Read texel from chroma Cb texture + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i0, s2 ; Read texel from chroma Cr texture + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture * mov t0.z, t1.x ; Move Cr sample into .z component */ for (i = 0; i < 3; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, i); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); @@ -1853,8 +1950,8 @@ static int vlCreateFragmentShaderFieldBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* XXX: Pos values off by 0.5? */ - /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); + /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1); inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; @@ -1890,12 +1987,12 @@ static int vlCreateFragmentShaderFieldBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - * tex2d t1, i1, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i2, s3 ; Read texel from past ref macroblock bottom field + * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field */ for (i = 0; i < 2; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 1, TGSI_FILE_SAMPLER, 3); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1905,12 +2002,12 @@ static int vlCreateFragmentShaderFieldBMB ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); /* - * tex2d t4, i3, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i4, s4 ; Read texel from future ref macroblock bottom field + * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field */ for (i = 0; i < 2; ++i) { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 4); + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4); ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); } @@ -1961,7 +2058,18 @@ static int vlCreateDataBufs /* Create our vertex buffers */ for (h = 0; h < NUM_BUF_SETS; ++h) { - for (i = 0; i < 3; ++i) + mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4; + mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1; + mc->vertex_bufs[h][0].buffer_offset = 0; + mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame + ); + + for (i = 1; i < 3; ++i) { mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2; mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1; @@ -1982,36 +2090,48 @@ static int vlCreateDataBufs mc->vertex_elems[0].nr_components = 2; mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* Block luma, block chroma texcoord element */ + /* Luma, texcoord element */ mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f); mc->vertex_elems[1].vertex_buffer_index = 0; mc->vertex_elems[1].nr_components = 2; mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* First ref surface top field texcoord element */ - mc->vertex_elems[2].src_offset = 0; - mc->vertex_elems[2].vertex_buffer_index = 1; + /* Chroma Cr texcoord element */ + mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2; + mc->vertex_elems[2].vertex_buffer_index = 0; mc->vertex_elems[2].nr_components = 2; mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* First ref surface bottom field texcoord element */ - mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f); - mc->vertex_elems[3].vertex_buffer_index = 1; + /* Chroma Cb texcoord element */ + mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3; + mc->vertex_elems[3].vertex_buffer_index = 0; mc->vertex_elems[3].nr_components = 2; mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* Second ref surface top field texcoord element */ + /* First ref surface top field texcoord element */ mc->vertex_elems[4].src_offset = 0; - mc->vertex_elems[4].vertex_buffer_index = 2; + mc->vertex_elems[4].vertex_buffer_index = 1; mc->vertex_elems[4].nr_components = 2; mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; - /* Second ref surface bottom field texcoord element */ + /* First ref surface bottom field texcoord element */ mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f); - mc->vertex_elems[5].vertex_buffer_index = 2; + mc->vertex_elems[5].vertex_buffer_index = 1; mc->vertex_elems[5].nr_components = 2; mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* Second ref surface top field texcoord element */ + mc->vertex_elems[6].src_offset = 0; + mc->vertex_elems[6].vertex_buffer_index = 2; + mc->vertex_elems[6].nr_components = 2; + mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[7].vertex_buffer_index = 2; + mc->vertex_elems[7].nr_components = 2; + mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + /* Create our constant buffer */ mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); mc->vs_const_buf.buffer = pipe->winsys->buffer_create -- cgit v1.2.3 From 2f41095e04036654259bc2efc3324a92f32c8ca1 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 4 Sep 2008 04:10:43 -0400 Subject: g3dvl: Zero-block optimization. For blocks whose contents are derived completely from ref surfaces, don't bother zero-ing the corresponding block in the luma/chroma textures, except for the first such luma, chroma Cb, and chroma Cr each frame. All later zero blocks are textured from that first zero block. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 92 +++++++++++++--------- 1 file changed, 54 insertions(+), 38 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index b5aa79cdb4..93839e8aa9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -63,6 +63,7 @@ struct vlR16SnormBufferedMC struct vlSurface *buffered_surface; struct vlSurface *past_surface, *future_surface; struct vlVertex2f surface_tex_inv_size; + struct vlVertex2f zero_block[3]; unsigned int num_macroblocks; struct vlMpeg2MacroBlock *macroblocks; @@ -203,8 +204,13 @@ static inline int vlGrabBlocks ++sb; } - else + else if (mc->zero_block[0].x < 0.0f) + { vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch); + + mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x; + mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y; + } } } @@ -241,9 +247,14 @@ static inline int vlGrabBlocks ++sb; } - else + else if (mc->zero_block[tb + 1].x < 0.0f) + { vlGrabNoBlock(texels, tex_pitch); + mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x; + mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y; + } + pipe_surface_unmap(tex_surface); } @@ -315,7 +326,7 @@ static inline int vlGrabMacroBlock return 0; } -#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zx, zy) \ +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \ (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ @@ -323,62 +334,62 @@ static inline int vlGrabMacroBlock (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ \ - /*if ((cbp) & (lm)) \ - {*/ \ + if ((cbp) & (lm)) \ + { \ (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].luma_tc.x = (zx); (vb)[0].luma_tc.y = (zy); \ - (vb)[1].luma_tc.x = (zx); (vb)[1].luma_tc.y = (zy) + (hy); \ - (vb)[2].luma_tc.x = (zx) + (hx); (vb)[2].luma_tc.y = (zy); \ - (vb)[3].luma_tc.x = (zx) + (hx); (vb)[3].luma_tc.y = (zy); \ - (vb)[4].luma_tc.x = (zx); (vb)[4].luma_tc.y = (zy) + (hy); \ - (vb)[5].luma_tc.x = ((zx) + (hx); (vb)[5].luma_tc.y = (zy) + (hy); \ - }*/ \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ \ - /*if ((cbp) & (cbm)) \ - {*/ \ + if ((cbp) & (cbm)) \ + { \ (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].cb_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ - (vb)[1].cb_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ - (vb)[2].cb_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ - (vb)[3].cb_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ - (vb)[4].cb_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ - (vb)[5].cb_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ - }*/ \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ \ - /*if ((cbp) & (crm)) \ - {*/ \ + if ((cbp) & (crm)) \ + { \ (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ - /*} \ + } \ else \ { \ - (vb)[0].cr_tc.x = (zx); (vb)[0].cb_tc.y = (zy); \ - (vb)[1].cr_tc.x = (zx); (vb)[1].cb_tc.y = (zy) + (hy); \ - (vb)[2].cr_tc.x = (zx) + (hx); (vb)[2].cb_tc.y = (zy); \ - (vb)[3].cr_tc.x = (zx) + (hx); (vb)[3].cb_tc.y = (zy); \ - (vb)[4].cr_tc.x = (zx); (vb)[4].cb_tc.y = (zy) + (hy); \ - (vb)[5].cr_tc.x = ((zx) + (hx); (vb)[5].cb_tc.y = (zy) + (hy); \ - }*/ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } static inline int vlGrabMacroBlockVB ( @@ -526,7 +537,7 @@ static inline int vlGrabMacroBlockVB vb, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, 0, 0, half.x, half.y, - 32, 2, 1, mc->zero_block.x, mc->zero_block.y + 32, 2, 1, mc->zero_block ); SET_BLOCK @@ -534,7 +545,7 @@ static inline int vlGrabMacroBlockVB vb + 6, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, half.x, 0, half.x, half.y, - 16, 2, 1, mc->zero_block.x, mc->zero_block.y + 16, 2, 1, mc->zero_block ); SET_BLOCK @@ -542,7 +553,7 @@ static inline int vlGrabMacroBlockVB vb + 12, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, 0, half.y, half.x, half.y, - 8, 2, 1, mc->zero_block.x, mc->zero_block.y + 8, 2, 1, mc->zero_block ); SET_BLOCK @@ -550,7 +561,7 @@ static inline int vlGrabMacroBlockVB vb + 18, macroblock->cbp, macroblock->mbx, macroblock->mby, unit.x, unit.y, half.x, half.y, half.x, half.y, - 4, 2, 1, mc->zero_block.x, mc->zero_block.y + 4, 2, 1, mc->zero_block ); mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); @@ -725,6 +736,8 @@ static int vlFlush vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24; } + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; mc->num_macroblocks = 0; mc->cur_buf++; @@ -2277,7 +2290,8 @@ int vlCreateR16SNormBufferedMC struct vlRender **render ) { - struct vlR16SnormBufferedMC *mc; + struct vlR16SnormBufferedMC *mc; + unsigned int i; assert(pipe); assert(render); @@ -2297,6 +2311,8 @@ int vlCreateR16SNormBufferedMC mc->buffered_surface = NULL; mc->past_surface = NULL; mc->future_surface = NULL; + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; mc->num_macroblocks = 0; vlInit(mc); -- cgit v1.2.3 From 59f23e92e24b93eb48f4e0552dd8e397aefd1714 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 12 Sep 2008 13:22:43 -0400 Subject: g3dvl: Update softlinks to nouveau winsys files, related fixes. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 93839e8aa9..cfbad08038 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -6,10 +6,10 @@ #include #include #include -#include #include #include #include +#include #include "vl_render.h" #include "vl_shader_build.h" #include "vl_surface.h" -- cgit v1.2.3 From 42a42dec3dbb5e150584b3d0b2e14e9b555a4ac1 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Tue, 9 Sep 2008 02:27:05 -0400 Subject: g3dvl: Fullscreen fixes. Respect src & dst coords during scaling, clear FB. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 83 ++++++++++++++----------- src/gallium/state_trackers/g3dvl/vl_surface.c | 4 +- src/gallium/state_trackers/g3dvl/vl_surface.h | 2 + 3 files changed, 53 insertions(+), 36 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index ea003a31d1..9f9dafc8a9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -14,6 +14,8 @@ struct vlVertexShaderConsts { + struct vlVertex4f dst_scale; + struct vlVertex4f dst_trans; struct vlVertex4f src_scale; struct vlVertex4f src_trans; }; @@ -87,6 +89,9 @@ static int vlResizeFrameBuffer 0 ); + /* Clear to black, in case video doesn't fill the entire window */ + pipe->clear(pipe, basic_csc->framebuffer.cbufs[0], 0); + return 0; } @@ -149,6 +154,15 @@ static int vlPutPictureCSC PIPE_BUFFER_USAGE_CPU_WRITE ); + vs_consts->dst_scale.x = destw / (float)basic_csc->framebuffer.cbufs[0]->width; + vs_consts->dst_scale.y = desth / (float)basic_csc->framebuffer.cbufs[0]->height; + vs_consts->dst_scale.z = 1; + vs_consts->dst_scale.w = 1; + vs_consts->dst_trans.x = destx / (float)basic_csc->framebuffer.cbufs[0]->width; + vs_consts->dst_trans.y = desty / (float)basic_csc->framebuffer.cbufs[0]->height; + vs_consts->dst_trans.z = 0; + vs_consts->dst_trans.w = 0; + vs_consts->src_scale.x = srcw / (float)surface->texture->width[0]; vs_consts->src_scale.y = srch / (float)surface->texture->height[0]; vs_consts->src_scale.z = 1; @@ -376,10 +390,12 @@ static int vlCreateVertexShader } /* - * decl c0 ; Scaling vector to scale texcoord rect to source size - * decl c1 ; Translation vector to move texcoord rect into position + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* @@ -392,21 +408,19 @@ static int vlCreateVertexShader ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* decl t0 */ - decl = vl_decl_temps(0, 0); + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - /* mov o0, i0 ; Move pos in to pos out */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_INPUT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i1, c0 ; Scale unit texcoord rect to source size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 1, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o1, t0, c1 ; Translate texcoord rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + /* + * madd o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * madd o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } /* end */ inst = vl_end(); @@ -487,9 +501,8 @@ static int vlCreateFragmentShader * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix * dp4 o0.y, t0, c2 * dp4 o0.z, t0, c3 - * dp4 o0.w, t0, c4 ; XXX: Don't need 4th coefficient */ - for (i = 0; i < 4; ++i) + for (i = 0; i < 3; ++i) { inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; @@ -519,11 +532,11 @@ static int vlCreateDataBufs pipe = csc->pipe; /* - Create our vertex buffer and vertex buffer element - VB contains 4 vertices that render a quad covering the entire window - to display a rendered surface - Quad is rendered as a tri strip - */ + * Create our vertex buffer and vertex buffer element + * VB contains 4 vertices that render a quad covering the entire window + * to display a rendered surface + * Quad is rendered as a tri strip + */ csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); csc->vertex_bufs[0].max_index = 3; csc->vertex_bufs[0].buffer_offset = 0; @@ -550,9 +563,9 @@ static int vlCreateDataBufs csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; /* - Create our texcoord buffer and texcoord buffer element - Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices - */ + * Create our texcoord buffer and texcoord buffer element + * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f); csc->vertex_bufs[1].max_index = 3; csc->vertex_bufs[1].buffer_offset = 0; @@ -579,9 +592,9 @@ static int vlCreateDataBufs csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; /* - Create our vertex shader's constant buffer - Const buffer contains scaling and translation vectors - */ + * Create our vertex shader's constant buffer + * Const buffer contains scaling and translation vectors + */ csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); csc->vs_const_buf.buffer = pipe->winsys->buffer_create ( @@ -592,9 +605,9 @@ static int vlCreateDataBufs ); /* - Create our fragment shader's constant buffer - Const buffer contains the color conversion matrix and bias vectors - */ + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); csc->fs_const_buf.buffer = pipe->winsys->buffer_create ( @@ -605,9 +618,9 @@ static int vlCreateDataBufs ); /* - TODO: Refactor this into a seperate function, - allow changing the CSC matrix at runtime to switch between regular & full versions - */ + * TODO: Refactor this into a seperate function, + * allow changing the CSC matrix at runtime to switch between regular & full versions + */ memcpy ( pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 687fd1ec29..07a9be2a99 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -104,6 +104,8 @@ int vlPutPicture int desty, int destw, int desth, + int drawable_w, + int drawable_h, enum vlPictureType picture_type ) { @@ -118,7 +120,7 @@ int vlPutPicture csc = surface->context->csc; pipe = surface->context->pipe; - csc->vlResizeFrameBuffer(csc, destw, desth); + csc->vlResizeFrameBuffer(csc, drawable_w, drawable_h); csc->vlBegin(csc); diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h index b975e131fa..3e05050345 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.h +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -49,6 +49,8 @@ int vlPutPicture int desty, int destw, int desth, + int drawable_w, + int drawable_h, enum vlPictureType picture_type ); -- cgit v1.2.3 From f3f449a49136ae2fd2dc3cf62d2c24dd42505e7d Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 10 Sep 2008 19:37:56 -0400 Subject: g3dvl: Implement surface sync functions. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 20 ++++---- src/gallium/state_trackers/g3dvl/vl_surface.c | 59 +++++++++++++++++++++- src/gallium/state_trackers/g3dvl/vl_surface.h | 30 ++++++++--- src/gallium/state_trackers/g3dvl/vl_types.h | 7 +++ 4 files changed, 97 insertions(+), 19 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index cfbad08038..75d326b36e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -591,6 +591,10 @@ static int vlFlush assert(render); mc = (struct vlR16SnormBufferedMC*)render; + + if (!mc->buffered_surface) + return 0; + pipe = mc->pipe; for (i = 0; i < mc->num_macroblocks; ++i) @@ -736,8 +740,12 @@ static int vlFlush vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24; } + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); + for (i = 0; i < 3; ++i) mc->zero_block[i].x = -1.0f; + + mc->buffered_surface = NULL; mc->num_macroblocks = 0; mc->cur_buf++; @@ -760,12 +768,7 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered if (mc->buffered_surface) { - if - ( - mc->buffered_surface != surface /*|| - mc->past_surface != batch->past_surface || - mc->future_surface != batch->future_surface*/ - ) + if (mc->buffered_surface != surface) { vlFlush(&mc->base); mc->buffered_surface = surface; @@ -1027,7 +1030,6 @@ static int vlCreateFragmentShaderIMB inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } /* mul o0, t0, c0 ; Rescale texel to correct range */ @@ -1323,7 +1325,6 @@ static int vlCreateFragmentShaderFramePMB inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } /* mul t0, t0, c0 ; Rescale texel to correct range */ @@ -1442,7 +1443,6 @@ static int vlCreateFragmentShaderFieldPMB inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } /* mul t0, t0, c0 ; Rescale texel to correct range */ @@ -1818,7 +1818,6 @@ static int vlCreateFragmentShaderFrameBMB inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } /* mul t0, t0, c0 ; Rescale texel to correct range */ @@ -1955,7 +1954,6 @@ static int vlCreateFragmentShaderFieldBMB inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } /* mul t0, t0, c0 ; Rescale texel to correct range */ diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 07a9be2a99..6648133ef8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -77,6 +77,7 @@ int vlRenderMacroBlocksMpeg2 { assert(batch); assert(surface); + assert(surface->context); surface->context->render->vlBegin(surface->context->render); @@ -141,9 +142,10 @@ int vlPutPicture csc->vlEnd(csc); - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &surface->disp_fence); + bind_pipe_drawable(pipe, drawable); - /* TODO: Need to take destx, desty into consideration */ + pipe->winsys->flush_frontbuffer ( pipe->winsys, @@ -154,6 +156,59 @@ int vlPutPicture return 0; } +int vlSurfaceGetStatus +( + struct vlSurface *surface, + enum vlResourceStatus *status +) +{ + assert(surface); + assert(surface->context); + assert(status); + + if (surface->render_fence && !surface->context->pipe->winsys->fence_signalled(surface->context->pipe->winsys, surface->render_fence, 0)) + { + *status = vlResourceStatusRendering; + return 0; + } + + if (surface->disp_fence && !surface->context->pipe->winsys->fence_signalled(surface->context->pipe->winsys, surface->disp_fence, 0)) + { + *status = vlResourceStatusDisplaying; + return 0; + } + + *status = vlResourceStatusFree; + return 0; +} + +int vlSurfaceFlush +( + struct vlSurface *surface +) +{ + assert(surface); + assert(surface->context); + + surface->context->render->vlFlush(surface->context->render); + + return 0; +} + +int vlSurfaceSync +( + struct vlSurface *surface +) +{ + assert(surface); + assert(surface->context); + assert(surface->render_fence); + + surface->context->pipe->winsys->fence_finish(surface->context->pipe->winsys, surface->render_fence, 0); + + return 0; +} + struct vlScreen* vlSurfaceGetScreen ( struct vlSurface *surface diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h index 3e05050345..133e1515ef 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.h +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -8,12 +8,14 @@ struct pipe_texture; struct vlSurface { - struct vlScreen *screen; - struct vlContext *context; - unsigned int width; - unsigned int height; - enum vlFormat format; - struct pipe_texture *texture; + struct vlScreen *screen; + struct vlContext *context; + unsigned int width; + unsigned int height; + enum vlFormat format; + struct pipe_texture *texture; + struct pipe_fence_handle *render_fence; + struct pipe_fence_handle *disp_fence; }; #endif @@ -54,6 +56,22 @@ int vlPutPicture enum vlPictureType picture_type ); +int vlSurfaceGetStatus +( + struct vlSurface *surface, + enum vlResourceStatus *status +); + +int vlSurfaceFlush +( + struct vlSurface *surface +); + +int vlSurfaceSync +( + struct vlSurface *surface +); + struct vlScreen* vlSurfaceGetScreen ( struct vlSurface *surface diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h index b432bfde93..274e1f7437 100644 --- a/src/gallium/state_trackers/g3dvl/vl_types.h +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -13,6 +13,13 @@ struct vlScreen; struct vlContext; struct vlSurface; +enum vlResourceStatus +{ + vlResourceStatusFree, + vlResourceStatusRendering, + vlResourceStatusDisplaying +}; + enum vlProfile { vlProfileMpeg2Simple, -- cgit v1.2.3 From 3122f2bebe8d76568916b8cddff542f52466055e Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 13 Sep 2008 01:35:14 -0400 Subject: g3dvl: Fix field coded block copy. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 75d326b36e..888f0040bf 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -108,17 +108,7 @@ static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst { unsigned int y; - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) memcpy ( dst + y * dst_pitch * 2, @@ -1181,7 +1171,7 @@ static int vlCreateVertexShaderFieldPMB ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* decl c0 ; Texcoord denorm coefficients */ + /* decl c0 ; Render target dimensions */ decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); @@ -1661,8 +1651,8 @@ static int vlCreateVertexShaderFieldBMB ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } - /* decl c0 ; Denorm coefficients */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); + /* decl c0 ; Render target dimensions */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); /* -- cgit v1.2.3 From 111b8f6dd9c97cd30979c8d5f56244e1e6ed60a2 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 22 Sep 2008 12:13:23 -0400 Subject: g3dvl: Bad semantic index in shader decl. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 888f0040bf..62107803ac 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -974,7 +974,7 @@ static int vlCreateFragmentShaderIMB */ for (i = 0; i < 3; ++i) { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, i, i, TGSI_INTERPOLATE_LINEAR); + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -1186,7 +1186,7 @@ static int vlCreateVertexShaderFieldPMB */ for (i = 0; i < 7; i++) { - decl = vl_decl_output((i == 0 || i == 6) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } @@ -1668,7 +1668,7 @@ static int vlCreateVertexShaderFieldBMB */ for (i = 0; i < 9; i++) { - decl = vl_decl_output((i == 0 || i == 8) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); } -- cgit v1.2.3 From d008fb178631aecacc07aeec66299748470fd8c7 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 22 Sep 2008 12:26:13 -0400 Subject: g3dvl: Ignore client flush requests unless entire frame is buffered. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 10 ++++++++++ src/gallium/state_trackers/g3dvl/vl_surface.c | 1 + 2 files changed, 11 insertions(+) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 62107803ac..e7a070ef4d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -576,6 +576,9 @@ static int vlFlush unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0}; unsigned int offset[vlNumMacroBlockExTypes]; unsigned int vb_start = 0; + unsigned int mbw; + unsigned int mbh; + unsigned int num_mb_per_frame; unsigned int i; assert(render); @@ -585,6 +588,13 @@ static int vlFlush if (!mc->buffered_surface) return 0; + mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; + mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; + num_mb_per_frame = mbw * mbh; + + if (mc->num_macroblocks < num_mb_per_frame) + return 0; + pipe = mc->pipe; for (i = 0; i < mc->num_macroblocks; ++i) diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 6648133ef8..076bd40d41 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -179,6 +179,7 @@ int vlSurfaceGetStatus } *status = vlResourceStatusFree; + return 0; } -- cgit v1.2.3 From 3354e668f0d4eb7ad1d92607031c1fc2e785e8d1 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 17 Oct 2008 17:20:03 -0400 Subject: g3dvl: Get rid of state tracker unit tests, stale and not useful atm. --- src/gallium/state_trackers/g3dvl/tests/.gitignore | 6 - src/gallium/state_trackers/g3dvl/tests/Makefile | 45 ---- .../state_trackers/g3dvl/tests/test_b_rendering.c | 226 --------------------- .../state_trackers/g3dvl/tests/test_context.c | 22 -- .../state_trackers/g3dvl/tests/test_i_rendering.c | 137 ------------- .../state_trackers/g3dvl/tests/test_p_rendering.c | 214 ------------------- .../state_trackers/g3dvl/tests/test_pf_rendering.c | 214 ------------------- .../state_trackers/g3dvl/tests/test_surface.c | 26 --- 8 files changed, 890 deletions(-) delete mode 100644 src/gallium/state_trackers/g3dvl/tests/.gitignore delete mode 100644 src/gallium/state_trackers/g3dvl/tests/Makefile delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_context.c delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c delete mode 100644 src/gallium/state_trackers/g3dvl/tests/test_surface.c (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/tests/.gitignore b/src/gallium/state_trackers/g3dvl/tests/.gitignore deleted file mode 100644 index 9b1ec4e212..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -test_context -test_surface -test_i_rendering -test_p_rendering -test_pf_rendering -test_b_rendering diff --git a/src/gallium/state_trackers/g3dvl/tests/Makefile b/src/gallium/state_trackers/g3dvl/tests/Makefile deleted file mode 100644 index 45cefa2e57..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/Makefile +++ /dev/null @@ -1,45 +0,0 @@ -GALLIUMDIR = ../../.. - -CFLAGS += -g -Wall -Werror \ - -I${GALLIUMDIR}/state_trackers/g3dvl \ - -I${GALLIUMDIR}/winsys/g3dvl \ - -I${GALLIUMDIR}/include \ - -I${GALLIUMDIR}/auxiliary \ - -I${GALLIUMDIR}/drivers -LDFLAGS += -L${GALLIUMDIR}/state_trackers/g3dvl \ - -L${GALLIUMDIR}/drivers/softpipe \ - -L${GALLIUMDIR}/auxiliary/tgsi \ - -L${GALLIUMDIR}/auxiliary/draw \ - -L${GALLIUMDIR}/auxiliary/util \ - -L${GALLIUMDIR}/auxiliary/translate \ - -L${GALLIUMDIR}/auxiliary/cso_cache \ - -L${GALLIUMDIR}/auxiliary/rtasm -LIBS += -lg3dvl -lsoftpipe -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lutil -lX11 -lm - -############################################# - -.PHONY = all clean - -all: test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering - -test_context: test_context.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -test_surface: test_surface.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -test_i_rendering: test_i_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -test_p_rendering: test_p_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -test_pf_rendering: test_pf_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -test_b_rendering: test_b_rendering.o ${GALLIUMDIR}/winsys/g3dvl/xsp_winsys.o - $(CC) ${LDFLAGS} -o $@ $^ ${LIBS} - -clean: - rm -rf *.o test_context test_surface test_i_rendering test_p_rendering test_pf_rendering test_b_rendering - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c deleted file mode 100644 index b78cc851ae..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_b_rendering.c +++ /dev/null @@ -1,226 +0,0 @@ -#include -#include -#include -#include -#include - -static const unsigned short ycbcr16x16_420[8*8*6] = -{ - 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, - - 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, - - 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E -}; - -static const signed short ycbcr16x16_420_2[8*8*6] = -{ - -0x00A5,-0x00A5,-0x00A5,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, - -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x0072,-0x0072,-0x0072, - -0x0072,-0x00A5,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5, - -0x0072,-0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5, - -0x0072,-0x0072,-0x0072,-0x0072,-0x00A5,-0x00A5,-0x00A5,-0x00A5, - - -0x004F,-0x004F,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x004F,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F, - -0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x00B2,-0x004F,-0x004F, - - -0x003E,-0x003E,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x003E,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E, - -0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x0060,-0x003E,-0x003E -}; - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - const unsigned int window_width = video_width * 2, window_height = video_height * 2; - int quit = 0; - Display *display; - Window root, window; - Pixmap framebuffer; - XEvent event; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - struct VL_SURFACE *sfc, *past_sfc, *future_sfc; - struct VL_MOTION_VECTOR motion_vector[2] = - { - { - {0, 0}, {0, 0} - }, - { - {0, 0}, {0, 0} - } - }; - - display = XOpenDisplay(NULL); - root = XDefaultRootWindow(display); - window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); - framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); - - XSelectInput(display, window, ExposureMask | KeyPressMask); - XMapWindow(display, window); - XSync(display, 0); - - pipe = create_pipe_context(display); - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlCreateSurface(ctx, &sfc); - vlCreateSurface(ctx, &past_sfc); - vlCreateSurface(ctx, &future_sfc); - - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, past_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, future_sfc); - vlRenderBMacroBlock - ( - VL_FRAME_PICTURE, - VL_FIELD_FIRST, - 0, - 0, - VL_FRAME_MC, - motion_vector, - 0x3F, - VL_DCT_FRAME_CODED, - (short*)ycbcr16x16_420_2, - past_sfc, - future_sfc, - sfc - ); - vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); - - puts("Press any key to continue..."); - - while (!quit) - { - XNextEvent(display, &event); - switch (event.type) - { - case Expose: - { - XCopyArea - ( - display, - framebuffer, - window, - XDefaultGC(display, XDefaultScreen(display)), - 0, - 0, - window_width, - window_height, - 0, - 0 - ); - break; - } - case KeyPress: - { - quit = 1; - break; - } - } - } - - vlDestroySurface(sfc); - vlDestroySurface(past_sfc); - vlDestroySurface(future_sfc); - vlDestroyContext(ctx); - - XFreePixmap(display, framebuffer); - XDestroyWindow(display, window); - XCloseDisplay(display); - - return 0; -} - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_context.c b/src/gallium/state_trackers/g3dvl/tests/test_context.c deleted file mode 100644 index 2002977ee2..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_context.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - - Display *display; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - - display = XOpenDisplay(NULL); - pipe = create_pipe_context(display); - - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlDestroyContext(ctx); - - XCloseDisplay(display); - - return 0; -} - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c deleted file mode 100644 index 1f96471130..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_i_rendering.c +++ /dev/null @@ -1,137 +0,0 @@ -#include -#include -#include -#include -#include - -static const unsigned short ycbcr16x16_420[8*8*6] = -{ - 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, - - 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, - - 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E -}; - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - const unsigned int window_width = video_width * 2, window_height = video_height * 2; - int quit = 0; - Display *display; - Window root, window; - Pixmap framebuffer; - XEvent event; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - struct VL_SURFACE *sfc; - - display = XOpenDisplay(NULL); - root = XDefaultRootWindow(display); - window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); - framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); - - XSelectInput(display, window, ExposureMask | KeyPressMask); - XMapWindow(display, window); - XSync(display, 0); - - pipe = create_pipe_context(display); - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlCreateSurface(ctx, &sfc); - - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, sfc); - vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); - - puts("Press any key to continue..."); - - while (!quit) - { - XNextEvent(display, &event); - switch (event.type) - { - case Expose: - { - XCopyArea - ( - display, - framebuffer, - window, - XDefaultGC(display, XDefaultScreen(display)), - 0, - 0, - window_width, - window_height, - 0, - 0 - ); - break; - } - case KeyPress: - { - quit = 1; - break; - } - } - } - - vlDestroySurface(sfc); - vlDestroyContext(ctx); - - XFreePixmap(display, framebuffer); - XDestroyWindow(display, window); - XCloseDisplay(display); - - return 0; -} - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c deleted file mode 100644 index 2203349784..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_p_rendering.c +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include - -static const unsigned short ycbcr16x16_420[8*8*6] = -{ - 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, - - 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, - - 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E -}; - -static const signed short ycbcr16x16_420_2[8*8*6] = -{ - -51,-51,-51, 0,-51, 0, 0, 0, - 0,-51, 0, 0,-51, 0, 0, 0, - 0,-51, 0, 0,-51,-51, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 99, 99, 99, 0, 0, 0, 0, 0, - 0, 0, 99, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 33, 33, 33, 0, 0, 0, 0, 0, - 0, 0, 33, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - const unsigned int window_width = video_width * 2, window_height = video_height * 2; - int quit = 0; - Display *display; - Window root, window; - Pixmap framebuffer; - XEvent event; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - struct VL_SURFACE *sfc, *ref_sfc; - struct VL_MOTION_VECTOR motion_vector = - { - {0, 0}, {0, 0} - }; - - display = XOpenDisplay(NULL); - root = XDefaultRootWindow(display); - window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); - framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); - - XSelectInput(display, window, ExposureMask | KeyPressMask); - XMapWindow(display, window); - XSync(display, 0); - - pipe = create_pipe_context(display); - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlCreateSurface(ctx, &sfc); - vlCreateSurface(ctx, &ref_sfc); - - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderPMacroBlock - ( - VL_FRAME_PICTURE, - VL_FIELD_FIRST, - 0, - 0, - VL_FRAME_MC, - &motion_vector, - 0x3F, - VL_DCT_FRAME_CODED, - (short*)ycbcr16x16_420_2, - ref_sfc, - sfc - ); - vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); - - puts("Press any key to continue..."); - - while (!quit) - { - XNextEvent(display, &event); - switch (event.type) - { - case Expose: - { - XCopyArea - ( - display, - framebuffer, - window, - XDefaultGC(display, XDefaultScreen(display)), - 0, - 0, - window_width, - window_height, - 0, - 0 - ); - break; - } - case KeyPress: - { - quit = 1; - break; - } - } - } - - vlDestroySurface(sfc); - vlDestroySurface(ref_sfc); - vlDestroyContext(ctx); - - XFreePixmap(display, framebuffer); - XDestroyWindow(display, window); - XCloseDisplay(display); - - return 0; -} - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c b/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c deleted file mode 100644 index 43586fc553..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_pf_rendering.c +++ /dev/null @@ -1,214 +0,0 @@ -#include -#include -#include -#include -#include - -static const unsigned short ycbcr16x16_420[8*8*6] = -{ - 0x00A5,0x00A5,0x00A5,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x0072,0x0072,0x0072, - 0x0072,0x00A5,0x0072,0x0072,0x00A5,0x00A5,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5, - 0x0072,0x0072,0x0072,0x0072,0x00A5,0x00A5,0x00A5,0x00A5, - - 0x004F,0x004F,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x004F,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F, - 0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x00B2,0x004F,0x004F, - - 0x003E,0x003E,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x003E,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E, - 0x0060,0x0060,0x0060,0x0060,0x0060,0x0060,0x003E,0x003E -}; - -static const signed short ycbcr16x16_420_2[8*8*6] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - const unsigned int window_width = video_width * 2, window_height = video_height * 2; - int quit = 0; - Display *display; - Window root, window; - Pixmap framebuffer; - XEvent event; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - struct VL_SURFACE *sfc, *ref_sfc; - struct VL_MOTION_VECTOR motion_vector = - { - {0, 0}, {32, 32} - }; - - display = XOpenDisplay(NULL); - root = XDefaultRootWindow(display); - window = XCreateSimpleWindow(display, root, 0, 0, window_width, window_height, 0, 0, 0); - framebuffer = XCreatePixmap(display, root, window_width, window_height, 24); - - XSelectInput(display, window, ExposureMask | KeyPressMask); - XMapWindow(display, window); - XSync(display, 0); - - pipe = create_pipe_context(display); - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlCreateSurface(ctx, &sfc); - vlCreateSurface(ctx, &ref_sfc); - - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 0, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 0, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderIMacroBlock(VL_FRAME_PICTURE, VL_FIELD_FIRST, 1, 1, 0x3F, VL_DCT_FRAME_CODED, (short*)ycbcr16x16_420, ref_sfc); - vlRenderPMacroBlock - ( - VL_FRAME_PICTURE, - VL_FIELD_FIRST, - 0, - 0, - VL_FIELD_MC, - &motion_vector, - 0x3F, - VL_DCT_FRAME_CODED, - (short*)ycbcr16x16_420_2, - ref_sfc, - sfc - ); - vlPutSurface(sfc, framebuffer, 0, 0, video_width, video_height, 0, 0, window_width, window_height, VL_FRAME_PICTURE); - - puts("Press any key to continue..."); - - while (!quit) - { - XNextEvent(display, &event); - switch (event.type) - { - case Expose: - { - XCopyArea - ( - display, - framebuffer, - window, - XDefaultGC(display, XDefaultScreen(display)), - 0, - 0, - window_width, - window_height, - 0, - 0 - ); - break; - } - case KeyPress: - { - quit = 1; - break; - } - } - } - - vlDestroySurface(sfc); - vlDestroySurface(ref_sfc); - vlDestroyContext(ctx); - - XFreePixmap(display, framebuffer); - XDestroyWindow(display, window); - XCloseDisplay(display); - - return 0; -} - diff --git a/src/gallium/state_trackers/g3dvl/tests/test_surface.c b/src/gallium/state_trackers/g3dvl/tests/test_surface.c deleted file mode 100644 index 4d1946396a..0000000000 --- a/src/gallium/state_trackers/g3dvl/tests/test_surface.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include -#include - -int main(int argc, char **argv) -{ - const unsigned int video_width = 32, video_height = 32; - - Display *display; - struct pipe_context *pipe; - struct VL_CONTEXT *ctx; - struct VL_SURFACE *sfc; - - display = XOpenDisplay(NULL); - pipe = create_pipe_context(display); - - vlCreateContext(display, pipe, video_width, video_height, VL_FORMAT_YCBCR_420, &ctx); - vlCreateSurface(ctx, &sfc); - vlDestroySurface(sfc); - vlDestroyContext(ctx); - - XCloseDisplay(display); - - return 0; -} - -- cgit v1.2.3 From bacacd5adacc883757f615589fa4062ba2920f07 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 23 Nov 2008 13:28:01 -0500 Subject: g3dvl: Move MC shaders to a seperate file, #included in the original. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 1186 +------------------- .../g3dvl/vl_r16snorm_mc_buf_shaders.inc | 1185 +++++++++++++++++++ 2 files changed, 1186 insertions(+), 1185 deletions(-) create mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index e7a070ef4d..81ec35be0e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -864,1191 +864,7 @@ static const struct vlFragmentShaderConsts fs_consts = {0.5f, 2.0f, 0.0f, 0.0f} }; -static int vlCreateVertexShaderIMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; Ref surface top field texcoords - * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; Ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; Ref macroblock top field texcoords - * decl i5 ; Ref macroblock bottom field texcoords - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Render target dimensions */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; Ref macroblock top field texcoords - * decl o5 ; Ref macroblock bottom field texcoords - * decl o6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords - * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords - */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o6, i0, c0 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; Ref macroblock top field texcoords - * decl i4 ; Ref macroblock bottom field texcoords - * decl i5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i3, s3 ; Read texel from ref macroblock top field - * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; First ref macroblock top field texcoords - * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) - * decl i6 ; Second ref macroblock top field texcoords - * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; First ref macroblock texcoords - * decl o5 ; Second ref macroblock texcoords - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords - * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords - */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma Cb texcoords - * decl i3 ; Chroma Cr texcoords - * decl i4 ; First ref macroblock top field texcoords - * decl i5 ; First ref macroblock bottom field texcoords - * decl i6 ; Second ref macroblock top field texcoords - * decl i7 ; Second ref macroblock bottom field texcoords - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Render target dimensions */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma Cb texcoords - * decl o3 ; Chroma Cr texcoords - * decl o4 ; First ref macroblock top field texcoords - * decl o5 ; First ref macroblock Bottom field texcoords - * decl o6 ; Second ref macroblock top field texcoords - * decl o7 ; Second ref macroblock Bottom field texcoords - * decl o8 ; Denormalized vertex pos - */ - for (i = 0; i < 9; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * mov o0, i0 ; Move input vertex pos to output - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma Cb texcoords to output - * mov o3, i3 ; Move input chroma Cr texcoords to output - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords - * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords - * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords - * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o8, i0, c0 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; First ref macroblock texcoords - * decl i4 ; Second ref macroblock texcoords - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for first ref surface texture - * decl s4 ; Sampler for second ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i3, s3 ; Read texel from first ref macroblock - * tex2d t2, i4, s4 ; Read texel from second ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMB -( - struct vlR16SnormBufferedMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Luma texcoords - * decl i1 ; Chroma Cb texcoords - * decl i2 ; Chroma Cr texcoords - * decl i3 ; First ref macroblock top field texcoords - * decl i4 ; First ref macroblock bottom field texcoords - * decl i5 ; Second ref macroblock top field texcoords - * decl i6 ; Second ref macroblock bottom field texcoords - * decl i7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for first ref surface texture - * decl s4 ; Sampler for second ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i2, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} +#include "vl_r16snorm_mc_buf_shaders.inc" static int vlCreateDataBufs ( diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc new file mode 100644 index 0000000000..ef4a4b2add --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc @@ -0,0 +1,1185 @@ +static int vlCreateVertexShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref macroblock top field texcoords + * decl i5 ; Ref macroblock bottom field texcoords + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Render target dimensions */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock top field texcoords + * decl o5 ; Ref macroblock bottom field texcoords + * decl o6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o6, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock top field texcoords + * decl i4 ; Ref macroblock bottom field texcoords + * decl i5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Render target dimensions */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock top field texcoords + * decl o5 ; First ref macroblock Bottom field texcoords + * decl o6 ; Second ref macroblock top field texcoords + * decl o7 ; Second ref macroblock Bottom field texcoords + * decl o8 ; Denormalized vertex pos + */ + for (i = 0; i < 9; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o8, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock top field texcoords + * decl i4 ; First ref macroblock bottom field texcoords + * decl i5 ; Second ref macroblock top field texcoords + * decl i6 ; Second ref macroblock bottom field texcoords + * decl i7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} -- cgit v1.2.3 From c064d5a1baef7d227e83ecd902575dce16ca20bd Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 23 Nov 2008 14:06:20 -0500 Subject: g3dvl: Use texture instead of surface for backbuffer. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 55 ++++++++++++++----------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 9f9dafc8a9..e3b3d03256 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include "vl_csc.h" @@ -33,6 +34,7 @@ struct vlBasicCSC struct pipe_context *pipe; struct pipe_viewport_state viewport; struct pipe_framebuffer_state framebuffer; + struct pipe_texture *framebuffer_tex; void *sampler; void *vertex_shader, *fragment_shader; struct pipe_vertex_buffer vertex_bufs[2]; @@ -49,6 +51,7 @@ static int vlResizeFrameBuffer { struct vlBasicCSC *basic_csc; struct pipe_context *pipe; + struct pipe_texture template; assert(csc); @@ -58,13 +61,6 @@ static int vlResizeFrameBuffer if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height) return 0; - if (basic_csc->framebuffer.cbufs[0]) - pipe->winsys->surface_release - ( - pipe->winsys, - &basic_csc->framebuffer.cbufs[0] - ); - basic_csc->viewport.scale[0] = width; basic_csc->viewport.scale[1] = height; basic_csc->viewport.scale[2] = 1; @@ -73,20 +69,30 @@ static int vlResizeFrameBuffer basic_csc->viewport.translate[1] = 0; basic_csc->viewport.translate[2] = 0; basic_csc->viewport.translate[3] = 0; + + if (basic_csc->framebuffer_tex) + pipe_texture_release(&basic_csc->framebuffer_tex); + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = width; + template.height[0] = height; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + + basic_csc->framebuffer_tex = pipe->screen->texture_create(pipe->screen, &template); basic_csc->framebuffer.width = width; basic_csc->framebuffer.height = height; - basic_csc->framebuffer.cbufs[0] = pipe->winsys->surface_alloc(pipe->winsys); - pipe->winsys->surface_alloc_storage + basic_csc->framebuffer.cbufs[0] = pipe->screen->get_tex_surface ( - pipe->winsys, - basic_csc->framebuffer.cbufs[0], - width, - height, - PIPE_FORMAT_A8R8G8B8_UNORM, - /* XXX: SoftPipe doesn't change GPU usage to CPU like it does for textures */ - PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE, - 0 + pipe->screen, + basic_csc->framebuffer_tex, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ); /* Clear to black, in case video doesn't fill the entire window */ @@ -111,7 +117,7 @@ static int vlBegin pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer); pipe->set_viewport_state(pipe, &basic_csc->viewport); pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler); - /* Source texture set in vlPutSurface() */ + /* Source texture set in vlPutPictureCSC() */ pipe->bind_vs_state(pipe, basic_csc->vertex_shader); pipe->bind_fs_state(pipe, basic_csc->fragment_shader); pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs); @@ -218,12 +224,8 @@ static int vlDestroy basic_csc = (struct vlBasicCSC*)csc; pipe = basic_csc->pipe; - if (basic_csc->framebuffer.cbufs[0]) - pipe->winsys->surface_release - ( - pipe->winsys, - &basic_csc->framebuffer.cbufs[0] - ); + if (basic_csc->framebuffer_tex) + pipe_texture_release(&basic_csc->framebuffer_tex); pipe->delete_sampler_state(pipe, basic_csc->sampler); pipe->delete_vs_state(pipe, basic_csc->vertex_shader); @@ -645,7 +647,10 @@ static int vlInit pipe = csc->pipe; - /* Delay creating the FB until vlPutSurface() so we know window size */ + /* Delay creating the FB until vlPutPictureCSC() so we know window size */ + csc->framebuffer_tex = NULL; + csc->framebuffer.width = 0; + csc->framebuffer.height = 0; csc->framebuffer.num_cbufs = 1; csc->framebuffer.cbufs[0] = NULL; csc->framebuffer.zsbuf = NULL; -- cgit v1.2.3 From d585fdf318e92a1a25bb244db3e8dc374203f43e Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Mon, 24 Nov 2008 17:31:25 -0500 Subject: g3dvl: Map/unmap incoming block texture once per frame. (Technically once per flush, but we flush once per frame.) --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 64 +++++++++++++--------- 1 file changed, 37 insertions(+), 27 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 81ec35be0e..650528ed8f 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -72,6 +72,8 @@ struct vlR16SnormBufferedMC struct pipe_framebuffer_state render_target; struct pipe_sampler_state *samplers[5]; struct pipe_texture *textures[NUM_BUF_SETS][5]; + struct pipe_surface *tex_surface[3]; + short *texels[3]; void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; @@ -144,7 +146,6 @@ static inline int vlGrabBlocks short *blocks ) { - struct pipe_surface *tex_surface; short *texels; unsigned int tex_pitch; unsigned int x, y, tb = 0, sb = 0; @@ -153,17 +154,8 @@ static inline int vlGrabBlocks assert(mc); assert(blocks); - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - texels += mbpy * tex_pitch + mbpx; + tex_pitch = mc->tex_surface[0]->stride / mc->tex_surface[0]->block.size; + texels = mc->texels[0] + mbpy * tex_pitch + mbpx; for (y = 0; y < 2; ++y) { @@ -204,25 +196,14 @@ static inline int vlGrabBlocks } } - pipe_surface_unmap(tex_surface); - /* TODO: Implement 422, 444 */ mbpx >>= 1; mbpy >>= 1; for (tb = 0; tb < 2; ++tb) { - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - texels += mbpy * tex_pitch + mbpx; + tex_pitch = mc->tex_surface[tb + 1]->stride / mc->tex_surface[tb + 1]->block.size; + texels = mc->texels[tb + 1] + mbpy * tex_pitch + mbpx; if ((coded_block_pattern >> (1 - tb)) & 1) { @@ -244,8 +225,6 @@ static inline int vlGrabBlocks mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x; mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y; } - - pipe_surface_unmap(tex_surface); } return 0; @@ -617,6 +596,12 @@ static int vlFlush offset[mb_type_ex]++; } + + for (i = 0; i < 3; ++i) + { + pipe_surface_unmap(mc->tex_surface[i]); + mc->pipe->screen->tex_surface_release(mc->pipe->screen, &mc->tex_surface[i]); + } mc->render_target.cbufs[0] = pipe->screen->get_tex_surface ( @@ -776,6 +761,18 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered mc->future_surface = batch->future_surface; mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + + for (i = 0; i < 3; ++i) + { + mc->tex_surface[i] = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][i], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE); + } } } else @@ -785,6 +782,18 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered mc->future_surface = batch->future_surface; mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + + for (i = 0; i < 3; ++i) + { + mc->tex_surface[i] = mc->pipe->screen->get_tex_surface + ( + mc->pipe->screen, + mc->textures[mc->cur_buf % NUM_BUF_SETS][i], + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + ); + + mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE); + } } for (i = 0; i < batch->num_macroblocks; ++i) @@ -1060,6 +1069,7 @@ static int vlInit template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; for (i = 0; i < NUM_BUF_SETS; ++i) mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); -- cgit v1.2.3 From ebd38dd0d63151d6ee89f98af3450b3b9c4fa1f4 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 5 Dec 2008 02:26:47 -0500 Subject: g3dvl: Flag textures holding incoming data as dynamic. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 650528ed8f..1583673c5d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1069,7 +1069,7 @@ static int vlInit template.depth[0] = 1; template.compressed = 0; pf_get_block(template.format, &template.block); - template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER; + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; for (i = 0; i < NUM_BUF_SETS; ++i) mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); -- cgit v1.2.3 From 0e1301ec8f7bc865b8a81214928e5267393cb8e7 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 5 Dec 2008 02:27:35 -0500 Subject: g3dvl: Expand YCbCr to full RGB range by default. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index e3b3d03256..626d23cd46 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -626,7 +626,7 @@ static int vlCreateDataBufs memcpy ( pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &bt_601, + &bt_601_full, sizeof(struct vlFragmentShaderConsts) ); -- cgit v1.2.3 From 87e39466dc49e033c4075f99343856637611b438 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Thu, 11 Dec 2008 18:05:59 -0500 Subject: g3dvl: Allocate one set of bufs, let winsys rename them as necessary. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 272 ++++++++++----------- 1 file changed, 132 insertions(+), 140 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 1583673c5d..337680a306 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -17,16 +17,7 @@ #include "vl_types.h" #include "vl_defs.h" -/* - * TODO: Dynamically determine number of buf sets to use, based on - * video size and available mem, since we can easily run out of memory - * for high res videos. - * Note: Destroying previous frame's buffers and creating new ones - * doesn't work, since the buffer are not actually destroyed until their - * fence is signalled, and if we render fast enough we will create faster - * than we destroy. - */ -#define NUM_BUF_SETS 4 /* Number of rotating buffer sets to use */ +const unsigned int DEFAULT_BUF_ALIGNMENT = 256; enum vlMacroBlockTypeEx { @@ -56,32 +47,67 @@ struct vlR16SnormBufferedMC { struct vlRender base; - unsigned int picture_width, picture_height; + unsigned int picture_width; + unsigned int picture_height; enum vlFormat picture_format; + unsigned int macroblocks_per_picture; - unsigned int cur_buf; struct vlSurface *buffered_surface; - struct vlSurface *past_surface, *future_surface; + struct vlSurface *past_surface; + struct vlSurface *future_surface; struct vlVertex2f surface_tex_inv_size; struct vlVertex2f zero_block[3]; unsigned int num_macroblocks; struct vlMpeg2MacroBlock *macroblocks; + struct pipe_surface *tex_surface[3]; + short *texels[3]; struct pipe_context *pipe; struct pipe_viewport_state viewport; struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[NUM_BUF_SETS][5]; - struct pipe_surface *tex_surface[3]; - short *texels[3]; + + union + { + void *all[5]; + struct + { + void *y; + void *cb; + void *cr; + void *ref[2]; + }; + } samplers; + + union + { + struct pipe_texture *all[5]; + struct + { + struct pipe_texture *y; + struct pipe_texture *cb; + struct pipe_texture *cr; + struct pipe_texture *ref[2]; + }; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct + { + struct pipe_vertex_buffer ycbcr; + struct pipe_vertex_buffer ref[2]; + }; + } vertex_bufs; + void *i_vs, *p_vs[2], *b_vs[2]; void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[NUM_BUF_SETS][3]; struct pipe_vertex_element vertex_elems[8]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; }; -static int vlBegin +static inline int vlBegin ( struct vlRender *render ) @@ -382,7 +408,7 @@ static inline int vlGrabMacroBlockVB vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer, + mc->vertex_bufs.ref[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE ) + pos * 2 * 24; @@ -411,7 +437,7 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][2].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[1].buffer); /* fall-through */ } @@ -423,7 +449,7 @@ static inline int vlGrabMacroBlockVB vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer, + mc->vertex_bufs.ref[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE ) + pos * 2 * 24; @@ -469,7 +495,7 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][1].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[0].buffer); /* fall-through */ } @@ -497,7 +523,7 @@ static inline int vlGrabMacroBlockVB vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map ( mc->pipe->winsys, - mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer, + mc->vertex_bufs.ycbcr.buffer, PIPE_BUFFER_USAGE_CPU_WRITE ) + pos * 24; @@ -533,7 +559,7 @@ static inline int vlGrabMacroBlockVB 4, 2, 1, mc->zero_block ); - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS][0].buffer); + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer); break; } @@ -555,9 +581,6 @@ static int vlFlush unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0}; unsigned int offset[vlNumMacroBlockExTypes]; unsigned int vb_start = 0; - unsigned int mbw; - unsigned int mbh; - unsigned int num_mb_per_frame; unsigned int i; assert(render); @@ -567,11 +590,7 @@ static int vlFlush if (!mc->buffered_surface) return 0; - mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; - mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; - num_mb_per_frame = mbw * mbh; - - if (mc->num_macroblocks < num_mb_per_frame) + if (mc->num_macroblocks < mc->macroblocks_per_picture) return 0; pipe = mc->pipe; @@ -628,10 +647,10 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) { - pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); - pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); + pipe->set_sampler_textures(pipe, 3, mc->textures.all); + pipe->bind_sampler_states(pipe, 3, mc->samplers.all); pipe->bind_vs_state(pipe, mc->i_vs); pipe->bind_fs_state(pipe, mc->i_fs); @@ -641,11 +660,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); @@ -655,11 +674,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); @@ -669,11 +688,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[0]); pipe->bind_fs_state(pipe, mc->p_fs[0]); @@ -683,11 +702,11 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); pipe->bind_vs_state(pipe, mc->p_vs[1]); pipe->bind_fs_state(pipe, mc->p_fs[1]); @@ -697,12 +716,12 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); pipe->bind_vs_state(pipe, mc->b_vs[0]); pipe->bind_fs_state(pipe, mc->b_fs[0]); @@ -712,12 +731,12 @@ static int vlFlush if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) { - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs[mc->cur_buf % NUM_BUF_SETS]); + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); - mc->textures[mc->cur_buf % NUM_BUF_SETS][3] = mc->past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUF_SETS][4] = mc->future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUF_SETS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); pipe->bind_vs_state(pipe, mc->b_vs[1]); pipe->bind_fs_state(pipe, mc->b_fs[1]); @@ -732,7 +751,6 @@ static int vlFlush mc->buffered_surface = NULL; mc->num_macroblocks = 0; - mc->cur_buf++; return 0; } @@ -745,6 +763,7 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered ) { struct vlR16SnormBufferedMC *mc; + bool new_surface = false; unsigned int i; assert(render); @@ -756,39 +775,26 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered if (mc->buffered_surface != surface) { vlFlush(&mc->base); - mc->buffered_surface = surface; - mc->past_surface = batch->past_surface; - mc->future_surface = batch->future_surface; - mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; - mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; - - for (i = 0; i < 3; ++i) - { - mc->tex_surface[i] = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][i], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE); - } + new_surface = true; } } else + new_surface = true; + + if (new_surface) { mc->buffered_surface = surface; mc->past_surface = batch->past_surface; mc->future_surface = batch->future_surface; mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; - + for (i = 0; i < 3; ++i) { mc->tex_surface[i] = mc->pipe->screen->get_tex_surface ( mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUF_SETS][i], + mc->textures.all[i], 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -802,7 +808,7 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered return 0; } -static int vlEnd +static inline int vlEnd ( struct vlRender *render ) @@ -819,7 +825,7 @@ static int vlDestroy { struct vlR16SnormBufferedMC *mc; struct pipe_context *pipe; - unsigned int h, i; + unsigned int i; assert(render); @@ -827,19 +833,14 @@ static int vlDestroy pipe = mc->pipe; for (i = 0; i < 5; ++i) - pipe->delete_sampler_state(pipe, mc->samplers[i]); + pipe->delete_sampler_state(pipe, mc->samplers.all[i]); - for (h = 0; h < NUM_BUF_SETS; ++h) - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[h][i].buffer); + for (i = 0; i < 3; ++i) + pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs.all[i].buffer); /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < NUM_BUF_SETS; ++i) - { - pipe_texture_release(&mc->textures[i][0]); - pipe_texture_release(&mc->textures[i][1]); - pipe_texture_release(&mc->textures[i][2]); - } + for (i = 0; i < 3; ++i) + pipe_texture_release(&mc->textures.all[i]); pipe->delete_vs_state(pipe, mc->i_vs); pipe->delete_fs_state(pipe, mc->i_fs); @@ -882,42 +883,39 @@ static int vlCreateDataBufs { const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; - const unsigned int num_mb_per_frame = mbw * mbh; struct pipe_context *pipe; - unsigned int h, i; + unsigned int i; assert(mc); pipe = mc->pipe; + mc->macroblocks_per_picture = mbw * mbh; /* Create our vertex buffers */ - for (h = 0; h < NUM_BUF_SETS; ++h) + mc->vertex_bufs.ycbcr.pitch = sizeof(struct vlVertex2f) * 4; + mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.ycbcr.buffer_offset = 0; + mc->vertex_bufs.ycbcr.buffer = pipe->winsys->buffer_create + ( + pipe->winsys, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture + ); + + for (i = 1; i < 3; ++i) { - mc->vertex_bufs[h][0].pitch = sizeof(struct vlVertex2f) * 4; - mc->vertex_bufs[h][0].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[h][0].buffer_offset = 0; - mc->vertex_bufs[h][0].buffer = pipe->winsys->buffer_create + mc->vertex_bufs.all[i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.all[i].buffer_offset = 0; + mc->vertex_bufs.all[i].buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 4 * 24 * num_mb_per_frame + sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture ); - - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[h][i].pitch = sizeof(struct vlVertex2f) * 2; - mc->vertex_bufs[h][i].max_index = 24 * num_mb_per_frame - 1; - mc->vertex_bufs[h][i].buffer_offset = 0; - mc->vertex_bufs[h][i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 2 * 24 * num_mb_per_frame - ); - } } /* Position element */ @@ -973,7 +971,7 @@ static int vlCreateDataBufs mc->vs_const_buf.buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, mc->vs_const_buf.size ); @@ -982,7 +980,7 @@ static int vlCreateDataBufs mc->fs_const_buf.buffer = pipe->winsys->buffer_create ( pipe->winsys, - 1, + DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, mc->fs_const_buf.size ); @@ -996,7 +994,7 @@ static int vlCreateDataBufs pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * num_mb_per_frame); + mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); return 0; } @@ -1016,6 +1014,13 @@ static int vlInit pipe = mc->pipe; + mc->buffered_surface = NULL; + mc->past_surface = NULL; + mc->future_surface = NULL; + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; + mc->num_macroblocks = 0; + /* For MC we render to textures, which are rounded up to nearest POT */ mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width); mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height); @@ -1057,7 +1062,7 @@ static int vlInit /*sampler.max_lod = ;*/ /*sampler.border_color[i] = ;*/ /*sampler.max_anisotropy = ;*/ - mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); + mc->samplers.all[i] = pipe->create_sampler_state(pipe, &sampler); } memset(&template, 0, sizeof(struct pipe_texture)); @@ -1071,8 +1076,7 @@ static int vlInit pf_get_block(template.format, &template.block); template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; - for (i = 0; i < NUM_BUF_SETS; ++i) - mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); + mc->textures.y = pipe->screen->texture_create(pipe->screen, &template); if (mc->picture_format == vlFormatYCbCr420) { @@ -1082,13 +1086,10 @@ static int vlInit else if (mc->picture_format == vlFormatYCbCr422) template.height[0] = vlRoundUpPOT(mc->picture_height / 2); - for (i = 0; i < NUM_BUF_SETS; ++i) - { - mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); - } + mc->textures.cb = pipe->screen->texture_create(pipe->screen, &template); + mc->textures.cr = pipe->screen->texture_create(pipe->screen, &template); - /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + /* textures.all[3] & textures.all[4] are assigned from vlSurfaces for P and B macroblocks at render time */ vlCreateVertexShaderIMB(mc); vlCreateFragmentShaderIMB(mc); @@ -1114,8 +1115,7 @@ int vlCreateR16SNormBufferedMC struct vlRender **render ) { - struct vlR16SnormBufferedMC *mc; - unsigned int i; + struct vlR16SnormBufferedMC *mc; assert(pipe); assert(render); @@ -1131,14 +1131,6 @@ int vlCreateR16SNormBufferedMC mc->picture_width = picture_width; mc->picture_height = picture_height; - mc->cur_buf = 0; - mc->buffered_surface = NULL; - mc->past_surface = NULL; - mc->future_surface = NULL; - for (i = 0; i < 3; ++i) - mc->zero_block[i].x = -1.0f; - mc->num_macroblocks = 0; - vlInit(mc); *render = &mc->base; -- cgit v1.2.3 From 9beb004885ab5be652bcb733a5fd9ee729f89921 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 14:42:29 -0500 Subject: nouveau: Catch some more leaks. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 337680a306..426e5ba065 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -745,6 +745,7 @@ static int vlFlush } pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); + pipe->screen->tex_surface_release(pipe->screen, mc->render_target.cbufs[0]); for (i = 0; i < 3; ++i) mc->zero_block[i].x = -1.0f; -- cgit v1.2.3 From 3c1b790c313b46e16640d25a93d165646454d3d6 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 16:30:33 -0500 Subject: g3dvl: Map vertex bufs once per frame/flush. --- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 84 ++++++++++++---------- 1 file changed, 48 insertions(+), 36 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 426e5ba065..a31f5c58f4 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -43,6 +43,14 @@ struct vlFragmentShaderConsts struct vlVertex4f div; }; +struct vlMacroBlockVertexStream0 +{ + struct vlVertex2f pos; + struct vlVertex2f luma_tc; + struct vlVertex2f cb_tc; + struct vlVertex2f cr_tc; +}; + struct vlR16SnormBufferedMC { struct vlRender base; @@ -390,7 +398,9 @@ static inline int vlGrabMacroBlockVB ( struct vlR16SnormBufferedMC *mc, struct vlMpeg2MacroBlock *macroblock, - unsigned int pos + unsigned int pos, + struct vlMacroBlockVertexStream0 *ycbcr_vb, + struct vlVertex2f **ref_vb ) { struct vlVertex2f mo_vec[2]; @@ -398,6 +408,7 @@ static inline int vlGrabMacroBlockVB assert(mc); assert(macroblock); + assert(ycbcr_vb); switch (macroblock->mb_type) { @@ -405,12 +416,9 @@ static inline int vlGrabMacroBlockVB { struct vlVertex2f *vb; - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs.ref[1].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 2 * 24; + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; @@ -437,8 +445,6 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[1].buffer); - /* fall-through */ } case vlMacroBlockTypeFwdPredicted: @@ -446,12 +452,9 @@ static inline int vlGrabMacroBlockVB { struct vlVertex2f *vb; - vb = (struct vlVertex2f*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs.ref[0].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 2 * 24; + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) { @@ -495,8 +498,6 @@ static inline int vlGrabMacroBlockVB } } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[0].buffer); - /* fall-through */ } case vlMacroBlockTypeIntra: @@ -512,20 +513,9 @@ static inline int vlGrabMacroBlockVB mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) }; - struct vlMacroBlockVertexStream0 - { - struct vlVertex2f pos; - struct vlVertex2f luma_tc; - struct vlVertex2f cb_tc; - struct vlVertex2f cr_tc; - } *vb; + struct vlMacroBlockVertexStream0 *vb; - vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map - ( - mc->pipe->winsys, - mc->vertex_bufs.ycbcr.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ) + pos * 24; + vb = ycbcr_vb + pos * 24; SET_BLOCK ( @@ -559,8 +549,6 @@ static inline int vlGrabMacroBlockVB 4, 2, 1, mc->zero_block ); - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer); - break; } default: @@ -607,13 +595,37 @@ static int vlFlush for (i = 1; i < vlNumMacroBlockExTypes; ++i) offset[i] = offset[i - 1] + num_macroblocks[i - 1]; - for (i = 0; i < mc->num_macroblocks; ++i) { - enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + struct vlMacroBlockVertexStream0 *ycbcr_vb; + struct vlVertex2f *ref_vb[2]; + + ycbcr_vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); + + for (i = 0; i < 2; ++i) + ref_vb[i] = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ( + mc->pipe->winsys, + mc->vertex_bufs.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE + ); - vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex]); + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb); + + offset[mb_type_ex]++; + } - offset[mb_type_ex]++; + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer); + for (i = 0; i < 2; ++i) + mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[i].buffer); } for (i = 0; i < 3; ++i) -- cgit v1.2.3 From db1021a37c29a60c70ce294077680ca3e98a6460 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 16:31:29 -0500 Subject: g3dvl: Get rid of old unbuffered motion compensation code. --- src/gallium/state_trackers/g3dvl/Makefile | 2 +- src/gallium/state_trackers/g3dvl/vl_context.c | 2 - src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c | 2344 --------------------- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h | 18 - 4 files changed, 1 insertion(+), 2365 deletions(-) delete mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c delete mode 100644 src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 4f7a953484..84a0b2c6d8 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -1,6 +1,6 @@ TARGET = libg3dvl.a OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ - vl_r16snorm_mc.o vl_r16snorm_mc_buf.o + vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index fe107e406d..5b7bb73b39 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -5,7 +5,6 @@ #include #include #include "vl_render.h" -#include "vl_r16snorm_mc.h" #include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" #include "vl_basic_csc.h" @@ -127,7 +126,6 @@ int vlCreateContext vlInitCommon(ctx); - /*vlCreateR16SNormMC(pipe, picture_width, picture_height, picture_format, &ctx->render);*/ vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); vlCreateBasicCSC(pipe, &ctx->csc); diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c deleted file mode 100644 index 3272220ef8..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.c +++ /dev/null @@ -1,2344 +0,0 @@ -#define VL_INTERNAL -#include "vl_r16snorm_mc.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "vl_render.h" -#include "vl_shader_build.h" -#include "vl_surface.h" -#include "vl_util.h" -#include "vl_types.h" -#include "vl_defs.h" - -#define NUM_BUFS 4 /* Number of rotating buffers to use */ - -struct vlVertexShaderConsts -{ - /*struct vlVertex4f scale; - struct vlVertex4f denorm;*/ - struct vlVertex4f scale; - struct vlVertex4f mb_pos_trans; - struct vlVertex4f denorm; - struct - { - struct vlVertex4f top_field; - struct vlVertex4f bottom_field; - } mb_tc_trans[2]; -}; - -struct vlFragmentShaderConsts -{ - struct vlVertex4f multiplier; - struct vlVertex4f div; -}; - -struct vlR16SnormMC -{ - struct vlRender base; - - unsigned int video_width, video_height; - enum vlFormat video_format; - unsigned int cur_buf; - - struct pipe_context *pipe; - struct pipe_viewport_state viewport; - struct pipe_framebuffer_state render_target; - struct pipe_sampler_state *samplers[5]; - struct pipe_texture *textures[NUM_BUFS][5]; - void *i_vs, *p_vs[2], *b_vs[2]; - void *i_fs, *p_fs[2], *b_fs[2]; - struct pipe_vertex_buffer vertex_bufs[3]; - struct pipe_vertex_element vertex_elems[3]; - struct pipe_constant_buffer vs_const_buf, fs_const_buf; -}; - -static int vlBegin -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - /* Frame buffer set in vlRender*Macroblock() */ - /* Shaders, samplers, textures set in vlRender*Macroblock() */ - pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs); - pipe->set_vertex_elements(pipe, 3, mc->vertex_elems); - pipe->set_viewport_state(pipe, &mc->viewport); - pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); - pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); - - return 0; -} - -/*static int vlGrabMacroBlock -( - struct vlR16SnormMC *mc, - struct vlMpeg2MacroBlock *macroblock -) -{ - assert(mc); - assert(macroblock); - - - - return 0; -}*/ - -/*#define DO_IDCT*/ - -#ifdef DO_IDCT -static int vlTransformBlock(short *src, short *dst, short bias) -{ - static const float basis[8][8] = - { - {0.3536, 0.4904, 0.4619, 0.4157, 0.3536, 0.2778, 0.1913, 0.0975}, - {0.3536, 0.4157, 0.1913, -0.0975, -0.3536, -0.4904, -0.4619, -0.2778}, - {0.3536, 0.2778, -0.1913, -0.4904, -0.3536, 0.0975, 0.4619, 0.4157}, - {0.3536, 0.0975, -0.4619, -0.2778, 0.3536, 0.4157, -0.1913, -0.4904}, - {0.3536, -0.0975, -0.4619, 0.2778, 0.3536, -0.4157, -0.1913, 0.4904}, - {0.3536, -0.2778, -0.1913, 0.4904, -0.3536, -0.0975, 0.4619, -0.4157}, - {0.3536, -0.4157, 0.1913, 0.0975, -0.3536, 0.4904, -0.4619, 0.2778}, - {0.3536, -0.4904, 0.4619, -0.4157, 0.3536, -0.2778, 0.1913, -0.0975} - }; - - unsigned int x, y; - short tmp[64]; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - tmp[y * VL_BLOCK_WIDTH + x] = (short) - ( - src[y * VL_BLOCK_WIDTH + 0] * basis[x][0] + - src[y * VL_BLOCK_WIDTH + 1] * basis[x][1] + - src[y * VL_BLOCK_WIDTH + 2] * basis[x][2] + - src[y * VL_BLOCK_WIDTH + 3] * basis[x][3] + - src[y * VL_BLOCK_WIDTH + 4] * basis[x][4] + - src[y * VL_BLOCK_WIDTH + 5] * basis[x][5] + - src[y * VL_BLOCK_WIDTH + 6] * basis[x][6] + - src[y * VL_BLOCK_WIDTH + 7] * basis[x][7] - ); - - for (x = 0; x < VL_BLOCK_WIDTH; ++x) - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - { - dst[y * VL_BLOCK_WIDTH + x] = bias + (short) - ( - tmp[0 * VL_BLOCK_WIDTH + x] * basis[y][0] + - tmp[1 * VL_BLOCK_WIDTH + x] * basis[y][1] + - tmp[2 * VL_BLOCK_WIDTH + x] * basis[y][2] + - tmp[3 * VL_BLOCK_WIDTH + x] * basis[y][3] + - tmp[4 * VL_BLOCK_WIDTH + x] * basis[y][4] + - tmp[5 * VL_BLOCK_WIDTH + x] * basis[y][5] + - tmp[6 * VL_BLOCK_WIDTH + x] * basis[y][6] + - tmp[7 * VL_BLOCK_WIDTH + x] * basis[y][7] - ); - if (dst[y * VL_BLOCK_WIDTH + x] > 255) - dst[y * VL_BLOCK_WIDTH + x] = 255; - else if (bias > 0 && dst[y * VL_BLOCK_WIDTH + x] < 0) - dst[y * VL_BLOCK_WIDTH + x] = 0; - } - return 0; -} -#endif - -static int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT / 2; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - dst += VL_BLOCK_HEIGHT * dst_pitch; - - for (; y < VL_BLOCK_HEIGHT; ++y) - memcpy - ( - dst + y * dst_pitch * 2, - src + y * VL_BLOCK_WIDTH, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -static int vlGrabNoBlock(short *dst, unsigned int dst_pitch) -{ - unsigned int y; - - for (y = 0; y < VL_BLOCK_HEIGHT; ++y) - memset - ( - dst + y * dst_pitch, - 0, - VL_BLOCK_WIDTH * 2 - ); - - return 0; -} - -enum vlSampleType -{ - vlSampleTypeFull, - vlSampleTypeDiff -}; - -static int vlGrabBlocks -( - struct vlR16SnormMC *mc, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - enum vlSampleType sample_type, - short *blocks -) -{ - struct pipe_surface *tex_surface; - short *texels; - unsigned int tex_pitch; - unsigned int tb, sb = 0; - - assert(mc); - assert(blocks); - - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][0], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - for (tb = 0; tb < 4; ++tb) - { - if ((coded_block_pattern >> (5 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - if (dct_type == vlDCTTypeFrameCoded) - vlGrabFrameCodedBlock - ( - cur_block, - texels + tb * tex_pitch * VL_BLOCK_HEIGHT, - tex_pitch - ); - else - vlGrabFieldCodedBlock - ( - cur_block, - texels + (tb % 2) * tex_pitch * VL_BLOCK_HEIGHT + (tb / 2) * tex_pitch, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels + tb * tex_pitch * VL_BLOCK_HEIGHT, tex_pitch); - } - - pipe_surface_unmap(tex_surface); - - /* TODO: Implement 422, 444 */ - for (tb = 0; tb < 2; ++tb) - { - tex_surface = mc->pipe->screen->get_tex_surface - ( - mc->pipe->screen, - mc->textures[mc->cur_buf % NUM_BUFS][tb + 1], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE - ); - - texels = pipe_surface_map(tex_surface, PIPE_BUFFER_USAGE_CPU_WRITE); - tex_pitch = tex_surface->stride / tex_surface->block.size; - - if ((coded_block_pattern >> (1 - tb)) & 1) - { - short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; - -#ifdef DO_IDCT - vlTransformBlock(cur_block, cur_block, sample_type == vlSampleTypeFull ? 128 : 0); -#endif - - vlGrabFrameCodedBlock - ( - cur_block, - texels, - tex_pitch - ); - - ++sb; - } - else - vlGrabNoBlock(texels, tex_pitch); - - pipe_surface_unmap(tex_surface); - } - - return 0; -} - -static int vlRenderIMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(blocks); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeFull, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - pipe->set_sampler_textures(pipe, 3, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 3, (void**)mc->samplers); - pipe->bind_vs_state(pipe, mc->i_vs); - pipe->bind_fs_state(pipe, mc->i_fs); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderPMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_x, - short top_y, - short bottom_x, - short bottom_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *ref_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->p_vs[1]); - pipe->bind_fs_state(pipe, mc->p_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->p_vs[0]); - pipe->bind_fs_state(pipe, mc->p_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = ref_surface->texture; - pipe->set_sampler_textures(pipe, 4, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 4, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderBMacroBlock -( - struct vlR16SnormMC *mc, - enum vlPictureType picture_type, - enum vlFieldOrder field_order, - unsigned int mbx, - unsigned int mby, - enum vlMotionType mc_type, - short top_past_x, - short top_past_y, - short bottom_past_x, - short bottom_past_y, - short top_future_x, - short top_future_y, - short bottom_future_x, - short bottom_future_y, - unsigned int coded_block_pattern, - enum vlDCTType dct_type, - short *blocks, - struct vlSurface *past_surface, - struct vlSurface *future_surface, - struct vlSurface *surface -) -{ - struct pipe_context *pipe; - struct vlVertexShaderConsts *vs_consts; - - assert(motion_vectors); - assert(blocks); - assert(ref_surface); - assert(surface); - - /* TODO: Implement interlaced rendering */ - if (picture_type != vlPictureTypeFrame) - return 0; - /* TODO: Implement other MC types */ - if (mc_type != vlMotionTypeFrame && mc_type != vlMotionTypeField) - return 0; - - vlGrabBlocks(mc, coded_block_pattern, dct_type, vlSampleTypeDiff, blocks); - - pipe = mc->pipe; - - vs_consts = pipe->winsys->buffer_map - ( - pipe->winsys, - mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE - ); - - vs_consts->scale.x = VL_MACROBLOCK_WIDTH / (float)surface->texture->width[0]; - vs_consts->scale.y = VL_MACROBLOCK_HEIGHT / (float)surface->texture->height[0]; - vs_consts->scale.z = 1.0f; - vs_consts->scale.w = 1.0f; - vs_consts->mb_pos_trans.x = (mbx * VL_MACROBLOCK_WIDTH) / (float)surface->texture->width[0]; - vs_consts->mb_pos_trans.y = (mby * VL_MACROBLOCK_HEIGHT) / (float)surface->texture->height[0]; - vs_consts->mb_pos_trans.z = 0.0f; - vs_consts->mb_pos_trans.w = 0.0f; - vs_consts->mb_tc_trans[0].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].top_field.z = 0.0f; - vs_consts->mb_tc_trans[0].top_field.w = 0.0f; - vs_consts->mb_tc_trans[1].top_field.x = (mbx * VL_MACROBLOCK_WIDTH + top_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].top_field.y = (mby * VL_MACROBLOCK_HEIGHT + top_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].top_field.z = 0.0f; - vs_consts->mb_tc_trans[1].top_field.w = 0.0f; - - if (mc_type == vlMotionTypeField) - { - vs_consts->denorm.x = (float)surface->texture->width[0]; - vs_consts->denorm.y = (float)surface->texture->height[0]; - - vs_consts->mb_tc_trans[0].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_past_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[0].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_past_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[0].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[0].bottom_field.w = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.x = (mbx * VL_MACROBLOCK_WIDTH + bottom_future_x * 0.5f) / (float)surface->texture->width[0]; - vs_consts->mb_tc_trans[1].bottom_field.y = (mby * VL_MACROBLOCK_HEIGHT + bottom_future_y * 0.5f) / (float)surface->texture->height[0]; - vs_consts->mb_tc_trans[1].bottom_field.z = 0.0f; - vs_consts->mb_tc_trans[1].bottom_field.w = 0.0f; - - pipe->bind_vs_state(pipe, mc->b_vs[1]); - pipe->bind_fs_state(pipe, mc->b_fs[1]); - } - else - { - pipe->bind_vs_state(pipe, mc->b_vs[0]); - pipe->bind_fs_state(pipe, mc->b_fs[0]); - } - - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); - - mc->render_target.cbufs[0] = pipe->screen->get_tex_surface - ( - pipe->screen, - surface->texture, - 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE - ); - pipe->set_framebuffer_state(pipe, &mc->render_target); - - mc->textures[mc->cur_buf % NUM_BUFS][3] = past_surface->texture; - mc->textures[mc->cur_buf % NUM_BUFS][4] = future_surface->texture; - pipe->set_sampler_textures(pipe, 5, mc->textures[mc->cur_buf % NUM_BUFS]); - pipe->bind_sampler_states(pipe, 5, (void**)mc->samplers); - - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, 0, 24); - - mc->cur_buf++; - - return 0; -} - -static int vlRenderMacroBlocksMpeg2R16Snorm -( - struct vlRender *render, - struct vlMpeg2MacroBlockBatch *batch, - struct vlSurface *surface -) -{ - struct vlR16SnormMC *mc; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - - /*for (i = 0; i < batch->num_macroblocks; ++i) - vlGrabMacroBlock(batch->macroblocks[i]);*/ - - for (i = 0; i < batch->num_macroblocks; ++i) - { - switch (batch->macroblocks[i].mb_type) - { - case vlMacroBlockTypeIntra: - { - vlRenderIMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - surface - ); - break; - } - case vlMacroBlockTypeFwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - surface - ); - break; - } - case vlMacroBlockTypeBkwdPredicted: - { - vlRenderPMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->future_surface, - surface - ); - break; - } - case vlMacroBlockTypeBiPredicted: - { - vlRenderBMacroBlock - ( - mc, - batch->picture_type, - batch->field_order, - batch->macroblocks[i].mbx, - batch->macroblocks[i].mby, - batch->macroblocks[i].mo_type, - batch->macroblocks[i].PMV[0][0][0], - batch->macroblocks[i].PMV[0][0][1], - batch->macroblocks[i].PMV[1][0][0], - batch->macroblocks[i].PMV[1][0][1], - batch->macroblocks[i].PMV[0][1][0], - batch->macroblocks[i].PMV[0][1][1], - batch->macroblocks[i].PMV[1][1][0], - batch->macroblocks[i].PMV[1][1][1], - batch->macroblocks[i].cbp, - batch->macroblocks[i].dct_type, - batch->macroblocks[i].blocks, - batch->past_surface, - batch->future_surface, - surface - ); - break; - } - default: - assert(0); - } - } - - return 0; -} - -static int vlEnd -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlFlush -( - struct vlRender *render -) -{ - assert(render); - - return 0; -} - -static int vlDestroy -( - struct vlRender *render -) -{ - struct vlR16SnormMC *mc; - struct pipe_context *pipe; - unsigned int i; - - assert(render); - - mc = (struct vlR16SnormMC*)render; - pipe = mc->pipe; - - for (i = 0; i < 5; ++i) - pipe->delete_sampler_state(pipe, mc->samplers[i]); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Textures 3 & 4 are not created directly, no need to release them here */ - for (i = 0; i < NUM_BUFS; ++i) - { - pipe_texture_release(&mc->textures[i][0]); - pipe_texture_release(&mc->textures[i][1]); - pipe_texture_release(&mc->textures[i][2]); - } - - pipe->delete_vs_state(pipe, mc->i_vs); - pipe->delete_fs_state(pipe, mc->i_fs); - - for (i = 0; i < 2; ++i) - { - pipe->delete_vs_state(pipe, mc->p_vs[i]); - pipe->delete_fs_state(pipe, mc->p_fs[i]); - pipe->delete_vs_state(pipe, mc->b_vs[i]); - pipe->delete_fs_state(pipe, mc->b_fs[i]); - } - - pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); - pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - - free(mc); - - return 0; -} - -/* - * Represents 8 triangles (4 quads, 1 per block) in noormalized coords - * that render a macroblock. - * Need to be scaled to cover mbW*mbH macroblock pixels and translated into - * position on target surface. - */ -static const struct vlVertex2f macroblock_verts[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f}, - {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f}, - - {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f}, - {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f}, - - {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 4 luma blocks arranged - * in a bW*(bH*4) texture. First luma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH; third at 0,2bH->bW,3bH; fourth at 0,3bH->bW,4bH. - */ -static const struct vlVertex2f macroblock_luma_texcoords[24] = -{ - {0.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.0f}, - {1.0f, 0.0f}, {0.0f, 0.25f}, {1.0f, 0.25f}, - - {0.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.25f}, - {1.0f, 0.25f}, {0.0f, 0.5f}, {1.0f, 0.5f}, - - {0.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.5f}, - {1.0f, 0.5f}, {0.0f, 0.75f}, {1.0f, 0.75f}, - - {0.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 0.75f}, - {1.0f, 0.75f}, {0.0f, 1.0f}, {1.0f, 1.0f} -}; - -/* - * Represents texcoords for the above for rendering 1 chroma block. - * Straight forward 0,0->1,1 mapping so we can reuse the MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_420_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 2 chroma blocks arranged - * in a bW*(bH*2) texture. First chroma block located at 0,0->bW,bH; second at - * 0,bH->bW,2bH. We can render this with 0,0->1,1 mapping. - * Straight forward 0,0->1,1 mapping so we can reuse MB pos vectors. - */ -static const struct vlVertex2f *macroblock_chroma_422_texcoords = macroblock_verts; - -/* - * Represents texcoords for the above for rendering 4 chroma blocks. - * Same case as 4 luma blocks. - */ -static const struct vlVertex2f *macroblock_chroma_444_texcoords = macroblock_luma_texcoords; - -/* - * Used when rendering P and B macroblocks, multiplier is applied to the A channel, - * which is then added to the L channel, then the bias is subtracted from that to - * get back the differential. The differential is then added to the samples from the - * reference surface(s). - */ -static const struct vlFragmentShaderConsts fs_consts = -{ - {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, - {0.5f, 2.0f, 0.0f, 0.0f} -}; - -static int vlCreateVertexShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 50; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->i_vs = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderIMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - */ - for (i = 0; i < 2; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul o0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->i_fs = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Ref macroblock texcoords - */ - for (i = 0; i < 4; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on ref macroblock */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration - ( - &decl, - &tokens[ti], - header, - max_tokens - ti - ); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field ref macroblock texcoords - * decl o4 ; Bottom field ref macroblock texcoords - * decl o5 ; Denormalized vertex pos - */ - for (i = 0; i < 6; i++) - { - decl = vl_decl_output((i == 0 || i == 5) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - mov o1, i1 ; Move input luma texcoords to output - mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate top field rect into position on ref macroblock - add o4, t0, c4 ; Translate bottom field rect into position on ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o5, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 5, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFramePMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - */ - for (i = 0; i < 3; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* tex2d t1, i2, s3 ; Read texel from ref macroblock */ - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldPMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Denormalized vertex pos - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t4 */ - decl = vl_decl_temps(0, 4); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for ref surface texture - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i4.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Unused - * decl c3 ; Translation vector to move past ref macroblock texcoords into position - * decl c4 ; Unused - * decl c5 ; Translation vector to move future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Past ref macroblock texcoords - * decl o4 ; Future ref macroblock texcoords - */ - for (i = 0; i < 5; i++) - { - decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0 */ - decl = vl_decl_temps(0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* add o3, t0, c3 ; Translate rect into position on past ref macroblock - add o4, t0, c5 ; Translate rect into position on future ref macroblock */ - for (i = 0; i < 2; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i * 2 + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateVertexShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state vs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); - - ti = 3; - - /* - * decl i0 ; Vertex pos - * decl i1 ; Luma texcoords - * decl i2 ; Chroma texcoords - */ - for (i = 0; i < 3; i++) - { - decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling vector to scale unit rect to macroblock size - * decl c1 ; Translation vector to move macroblock into position - * decl c2 ; Denorm coefficients - * decl c3 ; Translation vector to move top field past ref macroblock texcoords into position - * decl c4 ; Translation vector to move bottom field past ref macroblock texcoords into position - * decl c5 ; Translation vector to move top field future ref macroblock texcoords into position - * decl c6 ; Translation vector to move bottom field future ref macroblock texcoords into position - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 6); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl o0 ; Vertex pos - * decl o1 ; Luma texcoords - * decl o2 ; Chroma texcoords - * decl o3 ; Top field past ref macroblock texcoords - * decl o4 ; Bottom field past ref macroblock texcoords - * decl o5 ; Top field future ref macroblock texcoords - * decl o6 ; Bottom field future ref macroblock texcoords - * decl o7 ; Denormalized vertex pos - */ - for (i = 0; i < 8; i++) - { - decl = vl_decl_output((i == 0 || i == 7) ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* decl t0, t1 */ - decl = vl_decl_temps(0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* mul t0, i0, c0 ; Scale unit rect to normalized MB size */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add t1, t0, c1 ; Translate rect into position */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mov o0, t1 ; Move vertex pos to output */ - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * mov o1, i1 ; Move input luma texcoords to output - * mov o2, i2 ; Move input chroma texcoords to output - */ - for (i = 1; i < 3; ++i) - { - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* - * add o3, t0, c3 ; Translate top field rect into position on past ref macroblock - * add o4, t0, c4 ; Translate bottom field rect into position on past ref macroblock - * add o5, t0, c5 ; Translate top field rect into position on future ref macroblock - * add o6, t0, c6 ; Translate bottom field rect into position on future ref macroblock - */ - for (i = 0; i < 4; ++i) - { - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 3, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* mul o7, t1, c2 ; Denorm vertex pos */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 7, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - vs.tokens = tokens; - mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFrameBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 100; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s4 - */ - for (i = 0; i < 4; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t2 */ - decl = vl_decl_temps(0, 2); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock - * tex2d t2, i3, s4 ; Read texel from future ref macroblock - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, i + 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateFragmentShaderFieldBMB -( - struct vlR16SnormMC *mc -) -{ - const unsigned int max_tokens = 200; - - struct pipe_context *pipe; - struct pipe_shader_state fs; - struct tgsi_token *tokens; - struct tgsi_header *header; - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - - unsigned int ti; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); - - /* Version */ - *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); - /* Header */ - header = (struct tgsi_header*)&tokens[1]; - *header = tgsi_build_header(); - /* Processor */ - *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); - - ti = 3; - - /* - * decl i0 ; Texcoords for s0 - * decl i1 ; Texcoords for s1, s2 - * decl i2 ; Texcoords for s3 - * decl i3 ; Texcoords for s3 - * decl i4 ; Texcoords for s4 - * decl i5 ; Texcoords for s4 - * decl i6 ; Denormalized vertex pos - */ - for (i = 0; i < 7; ++i) - { - decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm - * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels - * ; and for Y-mod-2 top/bottom field selection - */ - decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl o0 ; Fragment color */ - decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* decl t0-t5 */ - decl = vl_decl_temps(0, 5); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - - /* - * decl s0 ; Sampler for luma texture - * decl s1 ; Sampler for chroma Cb texture - * decl s2 ; Sampler for chroma Cr texture - * decl s3 ; Sampler for past ref surface texture - * decl s4 ; Sampler for future ref surface texture - */ - for (i = 0; i < 5; ++i) - { - decl = vl_decl_samplers(i, i); - ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); - } - - /* - * tex2d t1, i0, s0 ; Read texel from luma texture - * mov t0.x, t1.x ; Move luma sample into .x component - * tex2d t1, i1, s1 ; Read texel from chroma Cb texture - * mov t0.y, t1.x ; Move Cb sample into .y component - * tex2d t1, i1, s2 ; Read texel from chroma Cr texture - * mov t0.z, t1.x ; Move Cr sample into .z component - */ - for (i = 0; i < 3; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i > 0 ? 1 : 0, TGSI_FILE_SAMPLER, i); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - } - - /* mul t0, t0, c0 ; Rescale texel to correct range */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* XXX: Pos values off by 0.5? */ - /* sub t4, i6.y, c1.x ; Sub 0.5 from denormalized pos */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 6, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ - inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* floor t3, t3 ; Get rid of fractional part */ - inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* mul t3, t3, c1.y ; Multiply by 2 */ - inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); - inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ - inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t1, i2, s3 ; Read texel from past ref macroblock top field - * tex2d t2, i3, s3 ; Read texel from past ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 2, TGSI_FILE_SAMPLER, 3); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* - * tex2d t4, i4, s4 ; Read texel from future ref macroblock top field - * tex2d t5, i5, s4 ; Read texel from future ref macroblock bottom field - */ - for (i = 0; i < 2; ++i) - { - inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 4, TGSI_FILE_SAMPLER, 4); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - } - - /* TODO: Move to conditional tex fetch on t3 instead of lerp */ - /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ - inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); - inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; - inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ - inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - /* end */ - inst = vl_end(); - ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); - - fs.tokens = tokens; - mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); - free(tokens); - - return 0; -} - -static int vlCreateDataBufs -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* Create our vertex buffer and vertex buffer element */ - mc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[0].max_index = 23; - mc->vertex_bufs[0].buffer_offset = 0; - mc->vertex_bufs[0].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[0].src_offset = 0; - mc->vertex_elems[0].vertex_buffer_index = 0; - mc->vertex_elems[0].nr_components = 2; - mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; - - /* Create our texcoord buffers and texcoord buffer elements */ - for (i = 1; i < 3; ++i) - { - mc->vertex_bufs[i].pitch = sizeof(struct vlVertex2f); - mc->vertex_bufs[i].max_index = 23; - mc->vertex_bufs[i].buffer_offset = 0; - mc->vertex_bufs[i].buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(struct vlVertex2f) * 24 - ); - - mc->vertex_elems[i].src_offset = 0; - mc->vertex_elems[i].vertex_buffer_index = i; - mc->vertex_elems[i].nr_components = 2; - mc->vertex_elems[i].src_format = PIPE_FORMAT_R32G32_FLOAT; - } - - /* Fill buffers */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_verts, - sizeof(struct vlVertex2f) * 24 - ); - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_luma_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - /* TODO: Accomodate 422, 444 */ - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->vertex_bufs[2].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - macroblock_chroma_420_texcoords, - sizeof(struct vlVertex2f) * 24 - ); - - for (i = 0; i < 3; ++i) - pipe->winsys->buffer_unmap(pipe->winsys, mc->vertex_bufs[i].buffer); - - /* Create our constant buffer */ - mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); - mc->vs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->vs_const_buf.size - ); - - mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); - mc->fs_const_buf.buffer = pipe->winsys->buffer_create - ( - pipe->winsys, - 1, - PIPE_BUFFER_USAGE_CONSTANT, - mc->fs_const_buf.size - ); - - memcpy - ( - pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), - &fs_consts, - sizeof(struct vlFragmentShaderConsts) - ); - - pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - - return 0; -} - -static int vlInit -( - struct vlR16SnormMC *mc -) -{ - struct pipe_context *pipe; - struct pipe_sampler_state sampler; - struct pipe_texture template; - unsigned int filters[5]; - unsigned int i; - - assert(mc); - - pipe = mc->pipe; - - /* For MC we render to textures, which are rounded up to nearest POT */ - mc->viewport.scale[0] = vlRoundUpPOT(mc->video_width); - mc->viewport.scale[1] = vlRoundUpPOT(mc->video_height); - mc->viewport.scale[2] = 1; - mc->viewport.scale[3] = 1; - mc->viewport.translate[0] = 0; - mc->viewport.translate[1] = 0; - mc->viewport.translate[2] = 0; - mc->viewport.translate[3] = 0; - - mc->render_target.width = vlRoundUpPOT(mc->video_width); - mc->render_target.height = vlRoundUpPOT(mc->video_height); - mc->render_target.num_cbufs = 1; - /* FB for MC stage is a vlSurface, set in vlSetRenderSurface() */ - mc->render_target.zsbuf = NULL; - - filters[0] = PIPE_TEX_FILTER_NEAREST; - filters[1] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[2] = mc->video_format == vlFormatYCbCr444 ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR; - filters[3] = PIPE_TEX_FILTER_LINEAR; - filters[4] = PIPE_TEX_FILTER_LINEAR; - - for (i = 0; i < 5; ++i) - { - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_img_filter = filters[i]; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = filters[i]; - sampler.compare_mode = PIPE_TEX_COMPARE_NONE; - sampler.compare_func = PIPE_FUNC_ALWAYS; - sampler.normalized_coords = 1; - /*sampler.prefilter = ;*/ - /*sampler.shadow_ambient = ;*/ - /*sampler.lod_bias = ;*/ - sampler.min_lod = 0; - /*sampler.max_lod = ;*/ - /*sampler.border_color[i] = ;*/ - /*sampler.max_anisotropy = ;*/ - mc->samplers[i] = pipe->create_sampler_state(pipe, &sampler); - } - - memset(&template, 0, sizeof(struct pipe_texture)); - template.target = PIPE_TEXTURE_2D; - template.format = PIPE_FORMAT_R16_SNORM; - template.last_level = 0; - template.width[0] = 8; - template.height[0] = 8 * 4; - template.depth[0] = 1; - template.compressed = 0; - pf_get_block(template.format, &template.block); - - for (i = 0; i < NUM_BUFS; ++i) - mc->textures[i][0] = pipe->screen->texture_create(pipe->screen, &template); - - if (mc->video_format == vlFormatYCbCr420) - template.height[0] = 8; - else if (mc->video_format == vlFormatYCbCr422) - template.height[0] = 8 * 2; - else if (mc->video_format == vlFormatYCbCr444) - template.height[0] = 8 * 4; - else - assert(0); - - for (i = 0; i < NUM_BUFS; ++i) - { - mc->textures[i][1] = pipe->screen->texture_create(pipe->screen, &template); - mc->textures[i][2] = pipe->screen->texture_create(pipe->screen, &template); - } - - /* textures[3] & textures[4] are assigned from vlSurfaces for P and B macroblocks at render time */ - - vlCreateVertexShaderIMB(mc); - vlCreateFragmentShaderIMB(mc); - vlCreateVertexShaderFramePMB(mc); - vlCreateVertexShaderFieldPMB(mc); - vlCreateFragmentShaderFramePMB(mc); - vlCreateFragmentShaderFieldPMB(mc); - vlCreateVertexShaderFrameBMB(mc); - vlCreateVertexShaderFieldBMB(mc); - vlCreateFragmentShaderFrameBMB(mc); - vlCreateFragmentShaderFieldBMB(mc); - vlCreateDataBufs(mc); - - return 0; -} - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -) -{ - struct vlR16SnormMC *mc; - - assert(pipe); - assert(render); - - mc = calloc(1, sizeof(struct vlR16SnormMC)); - - mc->base.vlBegin = &vlBegin; - mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16Snorm; - mc->base.vlEnd = &vlEnd; - mc->base.vlFlush = &vlFlush; - mc->base.vlDestroy = &vlDestroy; - mc->pipe = pipe; - mc->video_width = video_width; - mc->video_height = video_height; - mc->cur_buf = 0; - - vlInit(mc); - - *render = &mc->base; - - return 0; -} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h deleted file mode 100644 index 9842926bf7..0000000000 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef vl_r16snorm_mc_h -#define vl_r16snorm_mc_h - -#include "vl_types.h" - -struct pipe_context; -struct vlRender; - -int vlCreateR16SNormMC -( - struct pipe_context *pipe, - unsigned int video_width, - unsigned int video_height, - enum vlFormat video_format, - struct vlRender **render -); - -#endif -- cgit v1.2.3 From 1e9c3efcc783cee46268cc227234ed118f0cc08b Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 20 Dec 2008 17:09:55 -0500 Subject: g3dvl: Use Gallium MALLOC wrappers. --- src/gallium/state_trackers/g3dvl/Makefile | 5 ++++- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 14 +++++++------- src/gallium/state_trackers/g3dvl/vl_context.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_display.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 10 +++++----- src/gallium/state_trackers/g3dvl/vl_screen.c | 6 +++--- src/gallium/state_trackers/g3dvl/vl_surface.c | 6 +++--- 7 files changed, 28 insertions(+), 25 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile index 84a0b2c6d8..cddfca54fe 100644 --- a/src/gallium/state_trackers/g3dvl/Makefile +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -3,7 +3,10 @@ OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_r16snorm_mc_buf.o GALLIUMDIR = ../.. -CFLAGS += -g -Wall -fPIC -I${GALLIUMDIR}/include -I${GALLIUMDIR}/auxiliary -I${GALLIUMDIR}/winsys/g3dvl +CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/winsys/g3dvl \ ############################################# diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 626d23cd46..3ce93cf49d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -1,13 +1,13 @@ #define VL_INTERNAL #include "vl_basic_csc.h" #include -#include #include #include #include #include #include #include +#include #include "vl_csc.h" #include "vl_surface.h" #include "vl_shader_build.h" @@ -237,7 +237,7 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); - free(basic_csc); + FREE(basic_csc); return 0; } @@ -369,7 +369,7 @@ static int vlCreateVertexShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -430,7 +430,7 @@ static int vlCreateVertexShader vs.tokens = tokens; csc->vertex_shader = pipe->create_vs_state(pipe, &vs); - free(tokens); + FREE(tokens); return 0; } @@ -456,7 +456,7 @@ static int vlCreateFragmentShader assert(context); pipe = csc->pipe; - tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); /* Version */ *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); @@ -517,7 +517,7 @@ static int vlCreateFragmentShader fs.tokens = tokens; csc->fragment_shader = pipe->create_fs_state(pipe, &fs); - free(tokens); + FREE(tokens); return 0; } @@ -691,7 +691,7 @@ int vlCreateBasicCSC assert(pipe); assert(csc); - basic_csc = calloc(1, sizeof(struct vlBasicCSC)); + basic_csc = CALLOC_STRUCT(vlBasicCSC); if (!basic_csc) return 1; diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 5b7bb73b39..fbea1363d8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -1,9 +1,9 @@ #define VL_INTERNAL #include "vl_context.h" #include -#include #include #include +#include #include "vl_render.h" #include "vl_r16snorm_mc_buf.h" #include "vl_csc.h" @@ -111,7 +111,7 @@ int vlCreateContext assert(context); assert(pipe); - ctx = calloc(1, sizeof(struct vlContext)); + ctx = CALLOC_STRUCT(vlContext); if (!ctx) return 1; @@ -152,7 +152,7 @@ int vlDestroyContext context->pipe->delete_rasterizer_state(context->pipe, context->raster); context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); - free(context); + FREE(context); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c index af80faa7f5..dce06de758 100644 --- a/src/gallium/state_trackers/g3dvl/vl_display.c +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_display.h" #include -#include +#include int vlCreateDisplay ( @@ -14,7 +14,7 @@ int vlCreateDisplay assert(native_display); assert(display); - dpy = calloc(1, sizeof(struct vlDisplay)); + dpy = CALLOC_STRUCT(vlDisplay); if (!dpy) return 1; @@ -32,7 +32,7 @@ int vlDestroyDisplay { assert(display); - free(display); + FREE(display); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index a31f5c58f4..fcea899ef9 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1,7 +1,6 @@ #define VL_INTERNAL #include "vl_r16snorm_mc_buf.h" #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include "vl_render.h" #include "vl_shader_build.h" #include "vl_surface.h" @@ -869,8 +869,8 @@ static int vlDestroy pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); - free(mc->macroblocks); - free(mc); + FREE(mc->macroblocks); + FREE(mc); return 0; } @@ -1007,7 +1007,7 @@ static int vlCreateDataBufs pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); - mc->macroblocks = malloc(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); + mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); return 0; } @@ -1133,7 +1133,7 @@ int vlCreateR16SNormBufferedMC assert(pipe); assert(render); - mc = calloc(1, sizeof(struct vlR16SnormBufferedMC)); + mc = CALLOC_STRUCT(vlR16SnormBufferedMC); mc->base.vlBegin = &vlBegin; mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c index 484f63b0d4..ade8643a66 100644 --- a/src/gallium/state_trackers/g3dvl/vl_screen.c +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -1,7 +1,7 @@ #define VL_INTERNAL #include "vl_screen.h" #include -#include +#include int vlCreateScreen ( @@ -17,7 +17,7 @@ int vlCreateScreen assert(pscreen); assert(vl_screen); - scrn = calloc(1, sizeof(struct vlScreen)); + scrn = CALLOC_STRUCT(vlScreen); if (!scrn) return 1; @@ -37,7 +37,7 @@ int vlDestroyScreen { assert(screen); - free(screen); + FREE(screen); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 076bd40d41..911469f966 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -1,11 +1,11 @@ #define VL_INTERNAL #include "vl_surface.h" #include -#include #include #include #include #include +#include #include #include "vl_screen.h" #include "vl_context.h" @@ -28,7 +28,7 @@ int vlCreateSurface assert(screen); assert(surface); - sfc = calloc(1, sizeof(struct vlSurface)); + sfc = CALLOC_STRUCT(vlSurface); if (!sfc) return 1; @@ -64,7 +64,7 @@ int vlDestroySurface assert(surface); pipe_texture_release(&surface->texture); - free(surface); + FREE(surface); return 0; } -- cgit v1.2.3 From 8ee238be7587a232beeb56b1dc3b75e1b8fb903e Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 10 Jan 2009 13:30:29 -0500 Subject: nouveau: Factor out common winsys bits into libnouveaudrm.a --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index fcea899ef9..c5a73b2bf2 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -17,7 +17,7 @@ #include "vl_types.h" #include "vl_defs.h" -const unsigned int DEFAULT_BUF_ALIGNMENT = 256; +const unsigned int DEFAULT_BUF_ALIGNMENT = 1; enum vlMacroBlockTypeEx { @@ -394,7 +394,7 @@ static inline int vlGrabMacroBlock (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ } -static inline int vlGrabMacroBlockVB +static inline int vlGenMacroblockVerts ( struct vlR16SnormBufferedMC *mc, struct vlMpeg2MacroBlock *macroblock, @@ -618,7 +618,7 @@ static int vlFlush { enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); - vlGrabMacroBlockVB(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb); + vlGenMacroblockVerts(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb); offset[mb_type_ex]++; } @@ -627,7 +627,7 @@ static int vlFlush for (i = 0; i < 2; ++i) mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[i].buffer); } - + for (i = 0; i < 3; ++i) { pipe_surface_unmap(mc->tex_surface[i]); @@ -757,7 +757,7 @@ static int vlFlush } pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); - pipe->screen->tex_surface_release(pipe->screen, mc->render_target.cbufs[0]); + pipe->screen->tex_surface_release(pipe->screen, &mc->render_target.cbufs[0]); for (i = 0; i < 3; ++i) mc->zero_block[i].x = -1.0f; -- cgit v1.2.3 From 11f91936f21c1ab0b38f0f84bb2cbf82f9cadece Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Tue, 13 Jan 2009 22:58:43 -0500 Subject: g3dvl: Return BadAlloc if we can't create an XvMC surface. --- src/gallium/state_trackers/g3dvl/vl_surface.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 911469f966..612438f2ac 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -51,6 +51,12 @@ int vlCreateSurface sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template); + if (!sfc->texture) + { + FREE(sfc); + return 1; + } + *surface = sfc; return 0; -- cgit v1.2.3 From 3933d338f7fd1a7709d7971036671920f65fcd86 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Wed, 14 Jan 2009 00:28:58 -0500 Subject: g3dvl: Mark all buffers for incoming frame data as discardable. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 4 ++-- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 3ce93cf49d..c685bc9c70 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -157,7 +157,7 @@ static int vlPutPictureCSC ( pipe->winsys, basic_csc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); vs_consts->dst_scale.x = destw / (float)basic_csc->framebuffer.cbufs[0]->width; @@ -602,7 +602,7 @@ static int vlCreateDataBufs ( pipe->winsys, 1, - PIPE_BUFFER_USAGE_CONSTANT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, csc->vs_const_buf.size ); diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index c5a73b2bf2..2e790bb3af 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -603,7 +603,7 @@ static int vlFlush ( mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); for (i = 0; i < 2; ++i) @@ -611,7 +611,7 @@ static int vlFlush ( mc->pipe->winsys, mc->vertex_bufs.ref[i].buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); for (i = 0; i < mc->num_macroblocks; ++i) @@ -647,7 +647,7 @@ static int vlFlush ( pipe->winsys, mc->vs_const_buf.buffer, - PIPE_BUFFER_USAGE_CPU_WRITE + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); vs_consts->denorm.x = mc->buffered_surface->texture->width[0]; @@ -808,10 +808,10 @@ static int vlRenderMacroBlocksMpeg2R16SnormBuffered ( mc->pipe->screen, mc->textures.all[i], - 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE + 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); - mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE); + mc->texels[i] = pipe_surface_map(mc->tex_surface[i], PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD); } } @@ -913,7 +913,7 @@ static int vlCreateDataBufs ( pipe->winsys, DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_VERTEX, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture ); @@ -926,7 +926,7 @@ static int vlCreateDataBufs ( pipe->winsys, DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_VERTEX, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture ); } @@ -985,7 +985,7 @@ static int vlCreateDataBufs ( pipe->winsys, DEFAULT_BUF_ALIGNMENT, - PIPE_BUFFER_USAGE_CONSTANT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, mc->vs_const_buf.size ); -- cgit v1.2.3 From 9ddca0b41d16a68beebddc7765fc2e354b6bc6fe Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 18 Jan 2009 18:11:18 -0500 Subject: g3dvl: Ref count everywhere. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 50 ++++++++++++---------- .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 46 ++++++++++---------- src/gallium/state_trackers/g3dvl/vl_surface.c | 2 +- 3 files changed, 52 insertions(+), 46 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index c685bc9c70..da119ff1bd 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -71,7 +71,10 @@ static int vlResizeFrameBuffer basic_csc->viewport.translate[3] = 0; if (basic_csc->framebuffer_tex) - pipe_texture_release(&basic_csc->framebuffer_tex); + { + pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL); + pipe_texture_reference(&basic_csc->framebuffer_tex, NULL); + } memset(&template, 0, sizeof(struct pipe_texture)); template.target = PIPE_TEXTURE_2D; @@ -153,9 +156,9 @@ static int vlPutPictureCSC basic_csc = (struct vlBasicCSC*)csc; pipe = basic_csc->pipe; - vs_consts = pipe->winsys->buffer_map + vs_consts = pipe_buffer_map ( - pipe->winsys, + pipe->screen, basic_csc->vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); @@ -178,7 +181,7 @@ static int vlPutPictureCSC vs_consts->src_trans.z = 0; vs_consts->src_trans.w = 0; - pipe->winsys->buffer_unmap(pipe->winsys, basic_csc->vs_const_buf.buffer); + pipe_buffer_unmap(pipe->screen, basic_csc->vs_const_buf.buffer); pipe->set_sampler_textures(pipe, 1, &surface->texture); pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); @@ -225,17 +228,20 @@ static int vlDestroy pipe = basic_csc->pipe; if (basic_csc->framebuffer_tex) - pipe_texture_release(&basic_csc->framebuffer_tex); + { + pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL); + pipe_texture_reference(&basic_csc->framebuffer_tex, NULL); + } pipe->delete_sampler_state(pipe, basic_csc->sampler); pipe->delete_vs_state(pipe, basic_csc->vertex_shader); pipe->delete_fs_state(pipe, basic_csc->fragment_shader); for (i = 0; i < 2; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vertex_bufs[i].buffer); + pipe_buffer_reference(pipe->screen, &basic_csc->vertex_bufs[i].buffer, NULL); - pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->vs_const_buf.buffer); - pipe->winsys->buffer_destroy(pipe->winsys, basic_csc->fs_const_buf.buffer); + pipe_buffer_reference(pipe->screen, &basic_csc->vs_const_buf.buffer, NULL); + pipe_buffer_reference(pipe->screen, &basic_csc->fs_const_buf.buffer, NULL); FREE(basic_csc); @@ -542,9 +548,9 @@ static int vlCreateDataBufs csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); csc->vertex_bufs[0].max_index = 3; csc->vertex_bufs[0].buffer_offset = 0; - csc->vertex_bufs[0].buffer = pipe->winsys->buffer_create + csc->vertex_bufs[0].buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, 1, PIPE_BUFFER_USAGE_VERTEX, sizeof(struct vlVertex2f) * 4 @@ -552,12 +558,12 @@ static int vlCreateDataBufs memcpy ( - pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(pipe->screen, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), surface_verts, sizeof(struct vlVertex2f) * 4 ); - pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[0].buffer); + pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[0].buffer); csc->vertex_elems[0].src_offset = 0; csc->vertex_elems[0].vertex_buffer_index = 0; @@ -571,9 +577,9 @@ static int vlCreateDataBufs csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f); csc->vertex_bufs[1].max_index = 3; csc->vertex_bufs[1].buffer_offset = 0; - csc->vertex_bufs[1].buffer = pipe->winsys->buffer_create + csc->vertex_bufs[1].buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, 1, PIPE_BUFFER_USAGE_VERTEX, sizeof(struct vlVertex2f) * 4 @@ -581,12 +587,12 @@ static int vlCreateDataBufs memcpy ( - pipe->winsys->buffer_map(pipe->winsys, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(pipe->screen, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), surface_texcoords, sizeof(struct vlVertex2f) * 4 ); - pipe->winsys->buffer_unmap(pipe->winsys, csc->vertex_bufs[1].buffer); + pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[1].buffer); csc->vertex_elems[1].src_offset = 0; csc->vertex_elems[1].vertex_buffer_index = 1; @@ -598,9 +604,9 @@ static int vlCreateDataBufs * Const buffer contains scaling and translation vectors */ csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); - csc->vs_const_buf.buffer = pipe->winsys->buffer_create + csc->vs_const_buf.buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, csc->vs_const_buf.size @@ -611,9 +617,9 @@ static int vlCreateDataBufs * Const buffer contains the color conversion matrix and bias vectors */ csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); - csc->fs_const_buf.buffer = pipe->winsys->buffer_create + csc->fs_const_buf.buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT, csc->fs_const_buf.size @@ -625,12 +631,12 @@ static int vlCreateDataBufs */ memcpy ( - pipe->winsys->buffer_map(pipe->winsys, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(pipe->screen, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), &bt_601_full, sizeof(struct vlFragmentShaderConsts) ); - pipe->winsys->buffer_unmap(pipe->winsys, csc->fs_const_buf.buffer); + pipe_buffer_unmap(pipe->screen, csc->fs_const_buf.buffer); return 0; } diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 2e790bb3af..0c1ce3cd8d 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -599,17 +599,17 @@ static int vlFlush struct vlMacroBlockVertexStream0 *ycbcr_vb; struct vlVertex2f *ref_vb[2]; - ycbcr_vb = (struct vlMacroBlockVertexStream0*)mc->pipe->winsys->buffer_map + ycbcr_vb = (struct vlMacroBlockVertexStream0*)pipe_buffer_map ( - mc->pipe->winsys, + pipe->screen, mc->vertex_bufs.ycbcr.buffer, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); for (i = 0; i < 2; ++i) - ref_vb[i] = (struct vlVertex2f*)mc->pipe->winsys->buffer_map + ref_vb[i] = (struct vlVertex2f*)pipe_buffer_map ( - mc->pipe->winsys, + pipe->screen, mc->vertex_bufs.ref[i].buffer, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); @@ -623,15 +623,15 @@ static int vlFlush offset[mb_type_ex]++; } - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ycbcr.buffer); + pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ycbcr.buffer); for (i = 0; i < 2; ++i) - mc->pipe->winsys->buffer_unmap(mc->pipe->winsys, mc->vertex_bufs.ref[i].buffer); + pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ref[i].buffer); } for (i = 0; i < 3; ++i) { pipe_surface_unmap(mc->tex_surface[i]); - mc->pipe->screen->tex_surface_release(mc->pipe->screen, &mc->tex_surface[i]); + pipe_surface_reference(&mc->tex_surface[i], NULL); } mc->render_target.cbufs[0] = pipe->screen->get_tex_surface @@ -653,7 +653,7 @@ static int vlFlush vs_consts->denorm.x = mc->buffered_surface->texture->width[0]; vs_consts->denorm.y = mc->buffered_surface->texture->height[0]; - pipe->winsys->buffer_unmap(pipe->winsys, mc->vs_const_buf.buffer); + pipe_buffer_unmap(pipe->screen, mc->vs_const_buf.buffer); pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); @@ -757,7 +757,7 @@ static int vlFlush } pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); - pipe->screen->tex_surface_release(pipe->screen, &mc->render_target.cbufs[0]); + pipe_surface_reference(&mc->render_target.cbufs[0], NULL); for (i = 0; i < 3; ++i) mc->zero_block[i].x = -1.0f; @@ -849,11 +849,11 @@ static int vlDestroy pipe->delete_sampler_state(pipe, mc->samplers.all[i]); for (i = 0; i < 3; ++i) - pipe->winsys->buffer_destroy(pipe->winsys, mc->vertex_bufs.all[i].buffer); + pipe_buffer_reference(pipe->screen, &mc->vertex_bufs.all[i].buffer, NULL); /* Textures 3 & 4 are not created directly, no need to release them here */ for (i = 0; i < 3; ++i) - pipe_texture_release(&mc->textures.all[i]); + pipe_texture_reference(&mc->textures.all[i], NULL); pipe->delete_vs_state(pipe, mc->i_vs); pipe->delete_fs_state(pipe, mc->i_fs); @@ -866,8 +866,8 @@ static int vlDestroy pipe->delete_fs_state(pipe, mc->b_fs[i]); } - pipe->winsys->buffer_destroy(pipe->winsys, mc->vs_const_buf.buffer); - pipe->winsys->buffer_destroy(pipe->winsys, mc->fs_const_buf.buffer); + pipe_buffer_reference(pipe->screen, &mc->vs_const_buf.buffer, NULL); + pipe_buffer_reference(pipe->screen, &mc->fs_const_buf.buffer, NULL); FREE(mc->macroblocks); FREE(mc); @@ -909,9 +909,9 @@ static int vlCreateDataBufs mc->vertex_bufs.ycbcr.pitch = sizeof(struct vlVertex2f) * 4; mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1; mc->vertex_bufs.ycbcr.buffer_offset = 0; - mc->vertex_bufs.ycbcr.buffer = pipe->winsys->buffer_create + mc->vertex_bufs.ycbcr.buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture @@ -922,9 +922,9 @@ static int vlCreateDataBufs mc->vertex_bufs.all[i].pitch = sizeof(struct vlVertex2f) * 2; mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1; mc->vertex_bufs.all[i].buffer_offset = 0; - mc->vertex_bufs.all[i].buffer = pipe->winsys->buffer_create + mc->vertex_bufs.all[i].buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture @@ -981,18 +981,18 @@ static int vlCreateDataBufs /* Create our constant buffer */ mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); - mc->vs_const_buf.buffer = pipe->winsys->buffer_create + mc->vs_const_buf.buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, mc->vs_const_buf.size ); mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); - mc->fs_const_buf.buffer = pipe->winsys->buffer_create + mc->fs_const_buf.buffer = pipe_buffer_create ( - pipe->winsys, + pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, mc->fs_const_buf.size @@ -1000,12 +1000,12 @@ static int vlCreateDataBufs memcpy ( - pipe->winsys->buffer_map(pipe->winsys, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + pipe_buffer_map(pipe->screen, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), &fs_consts, sizeof(struct vlFragmentShaderConsts) ); - pipe->winsys->buffer_unmap(pipe->winsys, mc->fs_const_buf.buffer); + pipe_buffer_unmap(pipe->screen, mc->fs_const_buf.buffer); mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 612438f2ac..0fa7b25b92 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -69,7 +69,7 @@ int vlDestroySurface { assert(surface); - pipe_texture_release(&surface->texture); + pipe_texture_reference(&surface->texture, NULL); FREE(surface); return 0; -- cgit v1.2.3 From 76753e30781e88912c0465642616ab16bbc1b4f3 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sun, 18 Jan 2009 21:38:48 -0500 Subject: g3dvl: Some cleanups. --- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 0c1ce3cd8d..f0f8294473 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -297,6 +297,7 @@ static inline int vlGrabMacroBlock { assert(mc); assert(macroblock); + assert(mc->num_macroblocks < mc->macroblocks_per_picture); mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx; mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby; @@ -330,6 +331,7 @@ static inline int vlGrabMacroBlock } #define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \ + do { \ (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ @@ -392,7 +394,8 @@ static inline int vlGrabMacroBlock (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ - } + } \ + } while (0) static inline int vlGenMacroblockVerts ( @@ -409,6 +412,7 @@ static inline int vlGenMacroblockVerts assert(mc); assert(macroblock); assert(ycbcr_vb); + assert(pos < mc->macroblocks_per_picture); switch (macroblock->mb_type) { @@ -581,6 +585,8 @@ static int vlFlush if (mc->num_macroblocks < mc->macroblocks_per_picture) return 0; + assert(mc->num_macroblocks <= mc->macroblocks_per_picture); + pipe = mc->pipe; for (i = 0; i < mc->num_macroblocks; ++i) -- cgit v1.2.3 From a7e72231e3c76a9410d192441da309002ea6422d Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 26 Jan 2009 14:37:21 -0500 Subject: gallium: standardize naming of masks --- src/gallium/state_trackers/g3dvl/vl_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index fbea1363d8..c4c4e23c15 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -81,8 +81,8 @@ static int vlInitCommon(struct vlContext *context) dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; dsa.stencil[i].ref_value = 0; - dsa.stencil[i].value_mask = 0; - dsa.stencil[i].write_mask = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; } dsa.alpha.enabled = 0; dsa.alpha.func = PIPE_FUNC_ALWAYS; -- cgit v1.2.3 From 2299f21f8da816fc4588492965e7dac422da1a96 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 26 Jan 2009 14:49:54 -0500 Subject: gallium: standardize api on the prefix "nr" --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 2 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index da119ff1bd..53ef275349 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -657,7 +657,7 @@ static int vlInit csc->framebuffer_tex = NULL; csc->framebuffer.width = 0; csc->framebuffer.height = 0; - csc->framebuffer.num_cbufs = 1; + csc->framebuffer.nr_cbufs = 1; csc->framebuffer.cbufs[0] = NULL; csc->framebuffer.zsbuf = NULL; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index f0f8294473..789042f6f2 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1052,7 +1052,7 @@ static int vlInit mc->render_target.width = vlRoundUpPOT(mc->picture_width); mc->render_target.height = vlRoundUpPOT(mc->picture_height); - mc->render_target.num_cbufs = 1; + mc->render_target.nr_cbufs = 1; /* FB for MC stage is a vlSurface created by the user, set at render time */ mc->render_target.zsbuf = NULL; -- cgit v1.2.3 From adfbba476db1fc55006efb748656ebb1a481d143 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 30 Jan 2009 15:56:00 -0500 Subject: gallium: make p_winsys internal move it to pipe/internal/p_winsys_screen.h and start converting the state trackers to the screen usage --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 1 - src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 53ef275349..122c42ed0e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -2,7 +2,6 @@ #include "vl_basic_csc.h" #include #include -#include #include #include #include diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 789042f6f2..d53482f579 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -2,7 +2,6 @@ #include "vl_r16snorm_mc_buf.h" #include #include -#include #include #include #include @@ -649,9 +648,9 @@ static int vlFlush pipe->set_framebuffer_state(pipe, &mc->render_target); pipe->set_viewport_state(pipe, &mc->viewport); - vs_consts = pipe->winsys->buffer_map + vs_consts = pipe_buffer_map ( - pipe->winsys, + pipe->screen, mc->vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); -- cgit v1.2.3 From 776d86606cd8b250802730410d5e55a41944cf0a Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Fri, 6 Feb 2009 14:37:35 -0500 Subject: g3dvl: Catch up to gallium changes, fix build. --- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 10 ++++------ src/gallium/state_trackers/g3dvl/vl_context.c | 2 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 10 ++++------ src/gallium/state_trackers/g3dvl/vl_surface.c | 10 +++++----- 4 files changed, 14 insertions(+), 18 deletions(-) (limited to 'src/gallium/state_trackers/g3dvl') diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 122c42ed0e..187a13a560 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -544,7 +544,7 @@ static int vlCreateDataBufs * to display a rendered surface * Quad is rendered as a tri strip */ - csc->vertex_bufs[0].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[0].stride = sizeof(struct vlVertex2f); csc->vertex_bufs[0].max_index = 3; csc->vertex_bufs[0].buffer_offset = 0; csc->vertex_bufs[0].buffer = pipe_buffer_create @@ -573,7 +573,7 @@ static int vlCreateDataBufs * Create our texcoord buffer and texcoord buffer element * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices */ - csc->vertex_bufs[1].pitch = sizeof(struct vlVertex2f); + csc->vertex_bufs[1].stride = sizeof(struct vlVertex2f); csc->vertex_bufs[1].max_index = 3; csc->vertex_bufs[1].buffer_offset = 0; csc->vertex_bufs[1].buffer = pipe_buffer_create @@ -602,26 +602,24 @@ static int vlCreateDataBufs * Create our vertex shader's constant buffer * Const buffer contains scaling and translation vectors */ - csc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); csc->vs_const_buf.buffer = pipe_buffer_create ( pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, - csc->vs_const_buf.size + sizeof(struct vlVertexShaderConsts) ); /* * Create our fragment shader's constant buffer * Const buffer contains the color conversion matrix and bias vectors */ - csc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); csc->fs_const_buf.buffer = pipe_buffer_create ( pipe->screen, 1, PIPE_BUFFER_USAGE_CONSTANT, - csc->fs_const_buf.size + sizeof(struct vlFragmentShaderConsts) ); /* diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index c4c4e23c15..65ddb9f01e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -86,7 +86,7 @@ static int vlInitCommon(struct vlContext *context) } dsa.alpha.enabled = 0; dsa.alpha.func = PIPE_FUNC_ALWAYS; - dsa.alpha.ref = 0; + dsa.alpha.ref_value = 0; context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); pipe->bind_depth_stencil_alpha_state(pipe, context->dsa); diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index d53482f579..2176bb86d8 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -911,7 +911,7 @@ static int vlCreateDataBufs mc->macroblocks_per_picture = mbw * mbh; /* Create our vertex buffers */ - mc->vertex_bufs.ycbcr.pitch = sizeof(struct vlVertex2f) * 4; + mc->vertex_bufs.ycbcr.stride = sizeof(struct vlVertex2f) * 4; mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1; mc->vertex_bufs.ycbcr.buffer_offset = 0; mc->vertex_bufs.ycbcr.buffer = pipe_buffer_create @@ -924,7 +924,7 @@ static int vlCreateDataBufs for (i = 1; i < 3; ++i) { - mc->vertex_bufs.all[i].pitch = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs.all[i].stride = sizeof(struct vlVertex2f) * 2; mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1; mc->vertex_bufs.all[i].buffer_offset = 0; mc->vertex_bufs.all[i].buffer = pipe_buffer_create @@ -985,22 +985,20 @@ static int vlCreateDataBufs mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; /* Create our constant buffer */ - mc->vs_const_buf.size = sizeof(struct vlVertexShaderConsts); mc->vs_const_buf.buffer = pipe_buffer_create ( pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, - mc->vs_const_buf.size + sizeof(struct vlVertexShaderConsts) ); - mc->fs_const_buf.size = sizeof(struct vlFragmentShaderConsts); mc->fs_const_buf.buffer = pipe_buffer_create ( pipe->screen, DEFAULT_BUF_ALIGNMENT, PIPE_BUFFER_USAGE_CONSTANT, - mc->fs_const_buf.size + sizeof(struct vlFragmentShaderConsts) ); memcpy diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c index 0fa7b25b92..92388f7978 100644 --- a/src/gallium/state_trackers/g3dvl/vl_surface.c +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -152,9 +152,9 @@ int vlPutPicture bind_pipe_drawable(pipe, drawable); - pipe->winsys->flush_frontbuffer + pipe->screen->flush_frontbuffer ( - pipe->winsys, + pipe->screen, csc->vlGetFrameBuffer(csc), pipe->priv ); @@ -172,13 +172,13 @@ int vlSurfaceGetStatus assert(surface->context); assert(status); - if (surface->render_fence && !surface->context->pipe->winsys->fence_signalled(surface->context->pipe->winsys, surface->render_fence, 0)) + if (surface->render_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->render_fence, 0)) { *status = vlResourceStatusRendering; return 0; } - if (surface->disp_fence && !surface->context->pipe->winsys->fence_signalled(surface->context->pipe->winsys, surface->disp_fence, 0)) + if (surface->disp_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->disp_fence, 0)) { *status = vlResourceStatusDisplaying; return 0; @@ -211,7 +211,7 @@ int vlSurfaceSync assert(surface->context); assert(surface->render_fence); - surface->context->pipe->winsys->fence_finish(surface->context->pipe->winsys, surface->render_fence, 0); + surface->context->pipe->screen->fence_finish(surface->context->pipe->screen, surface->render_fence, 0); return 0; } -- cgit v1.2.3