summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/common/driverfuncs.c21
-rw-r--r--src/mesa/drivers/common/meta.c1459
-rw-r--r--src/mesa/drivers/common/meta.h109
-rw-r--r--src/mesa/drivers/dri/common/extension_helper.h68
-rw-r--r--src/mesa/drivers/dri/ffb/ffb_tex.c18
-rw-r--r--src/mesa/drivers/dri/glcore/Makefile84
-rw-r--r--src/mesa/drivers/dri/i915/Makefile1
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c5
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.h15
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c235
-rw-r--r--src/mesa/drivers/dri/i915/i915_program.c31
-rw-r--r--src/mesa/drivers/dri/i915/i915_program.h5
-rw-r--r--src/mesa/drivers/dri/i915/i915_vtbl.c3
l---------src/mesa/drivers/dri/i915/intel_generatemipmap.c1
-rw-r--r--src/mesa/drivers/dri/i915/intel_tris.c2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile1
-rw-r--r--src/mesa/drivers/dri/i965/brw_cc.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_clip_state.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw.c13
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_gs_state.c5
-rw-r--r--src/mesa/drivers/dri/i965/brw_tex.c32
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c55
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c101
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c38
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c94
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_surface_state.c28
l---------src/mesa/drivers/dri/i965/intel_generatemipmap.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_batchbuffer.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_blit.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_clear.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c9
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c26
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_generatemipmap.c304
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel.c14
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c8
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_draw.c8
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_read.c16
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c10
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.c92
-rw-r--r--src/mesa/drivers/dri/intel/intel_span.h2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.c28
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.h3
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c25
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_image.c17
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_subimage.c5
-rw-r--r--src/mesa/drivers/dri/r200/Makefile3
-rw-r--r--src/mesa/drivers/dri/r300/Makefile4
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile9
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript37
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c170
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c161
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c107
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c122
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c413
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c250
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c321
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c14
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h25
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c175
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h57
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c162
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h59
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c295
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c102
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c294
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h91
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c429
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h235
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c350
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c501
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c253
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c153
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h166
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c641
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h10
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h147
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c896
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h129
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c300
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h57
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c14
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h5
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c7
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h10
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c43
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c26
-rw-r--r--src/mesa/drivers/dri/r300/radeon_context.h14
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c223
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h36
-rw-r--r--src/mesa/drivers/dri/r600/Makefile4
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c10
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.h30
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c43
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c133
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h9
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c113
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c5
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c703
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.c90
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.h1
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c38
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.h1
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c235
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.h17
l---------src/mesa/drivers/dri/r600/radeon_buffer_objects.c1
l---------src/mesa/drivers/dri/r600/radeon_buffer_objects.h1
-rw-r--r--src/mesa/drivers/dri/radeon/Makefile3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c62
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c62
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texstate.c10
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c19
-rw-r--r--src/mesa/drivers/dri/s3v/s3v_tex.c11
-rw-r--r--src/mesa/drivers/dri/swrast/swrast.c9
-rw-r--r--src/mesa/drivers/dri/tdfx/tdfx_tex.c136
-rw-r--r--src/mesa/drivers/dri/unichrome/via_tex.c5
-rw-r--r--src/mesa/drivers/osmesa/osmesa.c5
-rw-r--r--src/mesa/drivers/windows/gdi/mesa.def18
-rw-r--r--src/mesa/drivers/windows/gdi/wmesa.c5
-rw-r--r--src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c26
-rw-r--r--src/mesa/drivers/windows/icd/mesa.def18
-rw-r--r--src/mesa/drivers/x11/xm_api.c10
-rw-r--r--src/mesa/drivers/x11/xm_dd.c11
140 files changed, 8563 insertions, 3901 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index a9f3c8e727..0f8447cb70 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -56,6 +56,7 @@
#include "swrast/swrast.h"
#include "driverfuncs.h"
+#include "meta.h"
@@ -100,12 +101,12 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->TexSubImage2D = _mesa_store_texsubimage2d;
driver->TexSubImage3D = _mesa_store_texsubimage3d;
driver->GetTexImage = _mesa_get_teximage;
- driver->CopyTexImage1D = _swrast_copy_teximage1d;
- driver->CopyTexImage2D = _swrast_copy_teximage2d;
- driver->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
- driver->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
- driver->CopyTexSubImage3D = _swrast_copy_texsubimage3d;
- driver->GenerateMipmap = _mesa_generate_mipmap;
+ driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
+ driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
+ driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
+ driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
+ driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
+ driver->GenerateMipmap = _mesa_meta_GenerateMipmap;
driver->TestProxyTexImage = _mesa_test_proxy_teximage;
driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d;
driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d;
@@ -129,10 +130,10 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->UpdateTexturePalette = NULL;
/* imaging */
- driver->CopyColorTable = _swrast_CopyColorTable;
- driver->CopyColorSubTable = _swrast_CopyColorSubTable;
- driver->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
- driver->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
+ driver->CopyColorTable = _mesa_meta_CopyColorTable;
+ driver->CopyColorSubTable = _mesa_meta_CopyColorSubTable;
+ driver->CopyConvolutionFilter1D = _mesa_meta_CopyConvolutionFilter1D;
+ driver->CopyConvolutionFilter2D = _mesa_meta_CopyConvolutionFilter2D;
/* Vertex/fragment programs */
driver->BindProgram = NULL;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index b6c6ef70fd..02e771c8d8 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -37,18 +37,25 @@
#include "main/arrayobj.h"
#include "main/blend.h"
#include "main/bufferobj.h"
+#include "main/buffers.h"
+#include "main/colortab.h"
+#include "main/convolve.h"
#include "main/depth.h"
#include "main/enable.h"
+#include "main/fbobject.h"
#include "main/image.h"
#include "main/macros.h"
#include "main/matrix.h"
+#include "main/mipmap.h"
#include "main/polygon.h"
#include "main/readpix.h"
#include "main/scissor.h"
#include "main/shaders.h"
+#include "main/state.h"
#include "main/stencil.h"
#include "main/texobj.h"
#include "main/texenv.h"
+#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texparam.h"
#include "main/texstate.h"
@@ -60,6 +67,33 @@
#include "drivers/common/meta.h"
+/** Return offset in bytes of the field within a vertex struct */
+#define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
+
+
+/**
+ * Flags passed to _mesa_meta_begin().
+ */
+/*@{*/
+#define META_ALL ~0x0
+#define META_ALPHA_TEST 0x1
+#define META_BLEND 0x2 /**< includes logicop */
+#define META_COLOR_MASK 0x4
+#define META_DEPTH_TEST 0x8
+#define META_FOG 0x10
+#define META_PIXEL_STORE 0x20
+#define META_PIXEL_TRANSFER 0x40
+#define META_RASTERIZATION 0x80
+#define META_SCISSOR 0x100
+#define META_SHADER 0x200
+#define META_STENCIL_TEST 0x400
+#define META_TRANSFORM 0x800 /**< modelview, projection, clip planes */
+#define META_TEXTURE 0x1000
+#define META_VERTEX 0x2000
+#define META_VIEWPORT 0x4000
+/*@}*/
+
+
/**
* State which we may save/restore across meta ops.
* XXX this may be incomplete...
@@ -87,6 +121,17 @@ struct save_state
/** META_PIXEL_STORE */
struct gl_pixelstore_attrib Pack, Unpack;
+ /** META_PIXEL_TRANSFER */
+ GLfloat RedBias, RedScale;
+ GLfloat GreenBias, GreenScale;
+ GLfloat BlueBias, BlueScale;
+ GLfloat AlphaBias, AlphaScale;
+ GLfloat DepthBias, DepthScale;
+ GLboolean MapColorFlag;
+ GLboolean Convolution1DEnabled;
+ GLboolean Convolution2DEnabled;
+ GLboolean Separable2DEnabled;
+
/** META_RASTERIZATION */
GLenum FrontPolygonMode, BackPolygonMode;
GLboolean PolygonOffset;
@@ -138,6 +183,24 @@ struct save_state
/**
+ * Temporary texture used for glBlitFramebuffer, glDrawPixels, etc.
+ * This is currently shared by all the meta ops. But we could create a
+ * separate one for each of glDrawPixel, glBlitFramebuffer, glCopyPixels, etc.
+ */
+struct temp_texture
+{
+ GLuint TexObj;
+ GLenum Target; /**< GL_TEXTURE_2D or GL_TEXTURE_RECTANGLE */
+ GLsizei MinSize; /**< Min texture size to allocate */
+ GLsizei MaxSize; /**< Max possible texture size */
+ GLboolean NPOT; /**< Non-power of two size OK? */
+ GLsizei Width, Height; /**< Current texture size */
+ GLenum IntFormat;
+ GLfloat Sright, Ttop; /**< right, top texcoords */
+};
+
+
+/**
* State for glBlitFramebufer()
*/
struct blit_state
@@ -181,19 +244,24 @@ struct drawpix_state
/**
- * Temporary texture used for glBlitFramebuffer, glDrawPixels, etc.
- * This is currently shared by all the meta ops. But we could create a
- * separate one for each of glDrawPixel, glBlitFramebuffer, glCopyPixels, etc.
+ * State for glBitmap()
*/
-struct temp_texture
+struct bitmap_state
{
- GLuint TexObj;
- GLenum Target; /**< GL_TEXTURE_2D or GL_TEXTURE_RECTANGLE */
- GLsizei MaxSize; /**< Max possible texture size */
- GLboolean NPOT; /**< Non-power of two size OK? */
- GLsizei Width, Height; /**< Current texture size */
- GLenum IntFormat;
- GLfloat Sright, Ttop; /**< right, top texcoords */
+ GLuint ArrayObj;
+ GLuint VBO;
+ struct temp_texture Tex; /**< separate texture from other meta ops */
+};
+
+
+/**
+ * State for _mesa_meta_generate_mipmap()
+ */
+struct gen_mipmap_state
+{
+ GLuint ArrayObj;
+ GLuint VBO;
+ GLuint FBO;
};
@@ -206,15 +274,12 @@ struct gl_meta_state
struct temp_texture TempTex;
- struct blit_state Blit; /**< For _mesa_meta_blit_framebuffer() */
- struct clear_state Clear; /**< For _mesa_meta_clear() */
- struct copypix_state CopyPix; /**< For _mesa_meta_copy_pixels() */
- struct drawpix_state DrawPix; /**< For _mesa_meta_draw_pixels() */
-
- /* other possible meta-ops:
- * glBitmap()
- * glGenerateMipmap()
- */
+ struct blit_state Blit; /**< For _mesa_meta_BlitFramebuffer() */
+ struct clear_state Clear; /**< For _mesa_meta_Clear() */
+ struct copypix_state CopyPix; /**< For _mesa_meta_CopyPixels() */
+ struct drawpix_state DrawPix; /**< For _mesa_meta_DrawPixels() */
+ struct bitmap_state Bitmap; /**< For _mesa_meta_Bitmap() */
+ struct gen_mipmap_state Mipmap; /**< For _mesa_meta_GenerateMipmap() */
};
@@ -264,16 +329,16 @@ _mesa_meta_begin(GLcontext *ctx, GLbitfield state)
if (state & META_ALPHA_TEST) {
save->AlphaEnabled = ctx->Color.AlphaEnabled;
if (ctx->Color.AlphaEnabled)
- _mesa_Disable(GL_ALPHA_TEST);
+ _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
}
if (state & META_BLEND) {
save->BlendEnabled = ctx->Color.BlendEnabled;
if (ctx->Color.BlendEnabled)
- _mesa_Disable(GL_BLEND);
+ _mesa_set_enable(ctx, GL_BLEND, GL_FALSE);
save->ColorLogicOpEnabled = ctx->Color.ColorLogicOpEnabled;
if (ctx->Color.ColorLogicOpEnabled)
- _mesa_Disable(GL_COLOR_LOGIC_OP);
+ _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
}
if (state & META_COLOR_MASK) {
@@ -288,7 +353,7 @@ _mesa_meta_begin(GLcontext *ctx, GLbitfield state)
if (state & META_DEPTH_TEST) {
save->Depth = ctx->Depth; /* struct copy */
if (ctx->Depth.Test)
- _mesa_Disable(GL_DEPTH_TEST);
+ _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
}
if (state & META_FOG) {
@@ -304,6 +369,35 @@ _mesa_meta_begin(GLcontext *ctx, GLbitfield state)
ctx->Unpack = ctx->DefaultPacking;
}
+ if (state & META_PIXEL_TRANSFER) {
+ save->RedScale = ctx->Pixel.RedScale;
+ save->RedBias = ctx->Pixel.RedBias;
+ save->GreenScale = ctx->Pixel.GreenScale;
+ save->GreenBias = ctx->Pixel.GreenBias;
+ save->BlueScale = ctx->Pixel.BlueScale;
+ save->BlueBias = ctx->Pixel.BlueBias;
+ save->AlphaScale = ctx->Pixel.AlphaScale;
+ save->AlphaBias = ctx->Pixel.AlphaBias;
+ save->MapColorFlag = ctx->Pixel.MapColorFlag;
+ save->Convolution1DEnabled = ctx->Pixel.Convolution1DEnabled;
+ save->Convolution2DEnabled = ctx->Pixel.Convolution2DEnabled;
+ save->Separable2DEnabled = ctx->Pixel.Separable2DEnabled;
+ ctx->Pixel.RedScale = 1.0F;
+ ctx->Pixel.RedBias = 0.0F;
+ ctx->Pixel.GreenScale = 1.0F;
+ ctx->Pixel.GreenBias = 0.0F;
+ ctx->Pixel.BlueScale = 1.0F;
+ ctx->Pixel.BlueBias = 0.0F;
+ ctx->Pixel.AlphaScale = 1.0F;
+ ctx->Pixel.AlphaBias = 0.0F;
+ ctx->Pixel.MapColorFlag = GL_FALSE;
+ ctx->Pixel.Convolution1DEnabled = GL_FALSE;
+ ctx->Pixel.Convolution2DEnabled = GL_FALSE;
+ ctx->Pixel.Separable2DEnabled = GL_FALSE;
+ /* XXX more state */
+ ctx->NewState |=_NEW_PIXEL;
+ }
+
if (state & META_RASTERIZATION) {
save->FrontPolygonMode = ctx->Polygon.FrontMode;
save->BackPolygonMode = ctx->Polygon.BackMode;
@@ -345,7 +439,7 @@ _mesa_meta_begin(GLcontext *ctx, GLbitfield state)
if (state & META_STENCIL_TEST) {
save->Stencil = ctx->Stencil; /* struct copy */
if (ctx->Stencil.Enabled)
- _mesa_Disable(GL_STENCIL_TEST);
+ _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
/* NOTE: other stencil state not reset */
}
@@ -499,6 +593,23 @@ _mesa_meta_end(GLcontext *ctx)
ctx->Unpack = save->Unpack;
}
+ if (state & META_PIXEL_TRANSFER) {
+ ctx->Pixel.RedScale = save->RedScale;
+ ctx->Pixel.RedBias = save->RedBias;
+ ctx->Pixel.GreenScale = save->GreenScale;
+ ctx->Pixel.GreenBias = save->GreenBias;
+ ctx->Pixel.BlueScale = save->BlueScale;
+ ctx->Pixel.BlueBias = save->BlueBias;
+ ctx->Pixel.AlphaScale = save->AlphaScale;
+ ctx->Pixel.AlphaBias = save->AlphaBias;
+ ctx->Pixel.MapColorFlag = save->MapColorFlag;
+ ctx->Pixel.Convolution1DEnabled = save->Convolution1DEnabled;
+ ctx->Pixel.Convolution2DEnabled = save->Convolution2DEnabled;
+ ctx->Pixel.Separable2DEnabled = save->Separable2DEnabled;
+ /* XXX more state */
+ ctx->NewState |=_NEW_PIXEL;
+ }
+
if (state & META_RASTERIZATION) {
_mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
_mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
@@ -669,8 +780,35 @@ _mesa_meta_end(GLcontext *ctx)
/**
- * Return pointer to temp_texture info. This does some one-time init
- * if needed.
+ * One-time init for a temp_texture object.
+ * Choose tex target, compute max tex size, etc.
+ */
+static void
+init_temp_texture(GLcontext *ctx, struct temp_texture *tex)
+{
+ /* prefer texture rectangle */
+ if (ctx->Extensions.NV_texture_rectangle) {
+ tex->Target = GL_TEXTURE_RECTANGLE;
+ tex->MaxSize = ctx->Const.MaxTextureRectSize;
+ tex->NPOT = GL_TRUE;
+ }
+ else {
+ /* use 2D texture, NPOT if possible */
+ tex->Target = GL_TEXTURE_2D;
+ tex->MaxSize = 1 << (ctx->Const.MaxTextureLevels - 1);
+ tex->NPOT = ctx->Extensions.ARB_texture_non_power_of_two;
+ }
+ tex->MinSize = 16; /* 16 x 16 at least */
+ assert(tex->MaxSize > 0);
+
+ _mesa_GenTextures(1, &tex->TexObj);
+ _mesa_BindTexture(tex->Target, tex->TexObj);
+}
+
+
+/**
+ * Return pointer to temp_texture info for non-bitmap ops.
+ * This does some one-time init if needed.
*/
static struct temp_texture *
get_temp_texture(GLcontext *ctx)
@@ -678,24 +816,25 @@ get_temp_texture(GLcontext *ctx)
struct temp_texture *tex = &ctx->Meta->TempTex;
if (!tex->TexObj) {
- /* do one-time init */
+ init_temp_texture(ctx, tex);
+ }
- /* prefer texture rectangle */
- if (ctx->Extensions.NV_texture_rectangle) {
- tex->Target = GL_TEXTURE_RECTANGLE;
- tex->MaxSize = ctx->Const.MaxTextureRectSize;
- tex->NPOT = GL_TRUE;
- }
- else {
- /* use 2D texture, NPOT if possible */
- tex->Target = GL_TEXTURE_2D;
- tex->MaxSize = 1 << (ctx->Const.MaxTextureLevels - 1);
- tex->NPOT = ctx->Extensions.ARB_texture_non_power_of_two;
- }
- assert(tex->MaxSize > 0);
+ return tex;
+}
- _mesa_GenTextures(1, &tex->TexObj);
- _mesa_BindTexture(tex->Target, tex->TexObj);
+
+/**
+ * Return pointer to temp_texture info for _mesa_meta_bitmap().
+ * We use a separate texture for bitmaps to reduce texture
+ * allocation/deallocation.
+ */
+static struct temp_texture *
+get_bitmap_temp_texture(GLcontext *ctx)
+{
+ struct temp_texture *tex = &ctx->Meta->Bitmap.Tex;
+
+ if (!tex->TexObj) {
+ init_temp_texture(ctx, tex);
}
return tex;
@@ -717,6 +856,9 @@ alloc_texture(struct temp_texture *tex,
{
GLboolean newTex = GL_FALSE;
+ ASSERT(width <= tex->MaxSize);
+ ASSERT(height <= tex->MaxSize);
+
if (width > tex->Width ||
height > tex->Height ||
intFormat != tex->IntFormat) {
@@ -724,13 +866,13 @@ alloc_texture(struct temp_texture *tex,
if (tex->NPOT) {
/* use non-power of two size */
- tex->Width = width;
- tex->Height = height;
+ tex->Width = MAX2(tex->MinSize, width);
+ tex->Height = MAX2(tex->MinSize, height);
}
else {
/* find power of two size */
GLsizei w, h;
- w = h = 16;
+ w = h = tex->MinSize;
while (w < width)
w *= 2;
while (h < height)
@@ -803,7 +945,8 @@ setup_copypix_texture(struct temp_texture *tex,
* Setup/load texture for glDrawPixels.
*/
static void
-setup_drawpix_texture(struct temp_texture *tex,
+setup_drawpix_texture(GLcontext *ctx,
+ struct temp_texture *tex,
GLboolean newTex,
GLenum texIntFormat,
GLsizei width, GLsizei height,
@@ -824,9 +967,17 @@ setup_drawpix_texture(struct temp_texture *tex,
tex->Width, tex->Height, 0, format, type, pixels);
}
else {
+ struct gl_buffer_object *save_unpack_obj = NULL;
+
+ _mesa_reference_buffer_object(ctx, &save_unpack_obj,
+ ctx->Unpack.BufferObj);
+ _mesa_BindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
/* create empty texture */
_mesa_TexImage2D(tex->Target, 0, tex->IntFormat,
tex->Width, tex->Height, 0, format, type, NULL);
+ if (save_unpack_obj != NULL)
+ _mesa_BindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB,
+ save_unpack_obj->Name);
/* load image */
_mesa_TexSubImage2D(tex->Target, 0,
0, 0, width, height, format, type, pixels);
@@ -874,14 +1025,132 @@ init_blit_depth_pixels(GLcontext *ctx)
/**
+ * Try to do a glBiltFramebuffer using no-copy texturing.
+ * We can do this when the src renderbuffer is actually a texture.
+ * But if the src buffer == dst buffer we cannot do this.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ * normal path.
+ */
+static GLbitfield
+blitframebuffer_texture(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter)
+{
+ if (mask & GL_COLOR_BUFFER_BIT) {
+ const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+ const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+ const struct gl_renderbuffer_attachment *drawAtt =
+ &drawFb->Attachment[drawFb->_ColorDrawBufferIndexes[0]];
+ const struct gl_renderbuffer_attachment *readAtt =
+ &readFb->Attachment[readFb->_ColorReadBufferIndex];
+
+ if (readAtt && readAtt->Texture) {
+ const struct gl_texture_object *texObj = readAtt->Texture;
+ const GLenum minFilterSave = texObj->MinFilter;
+ const GLenum magFilterSave = texObj->MagFilter;
+ const GLenum target = texObj->Target;
+
+ if (drawAtt->Texture == readAtt->Texture) {
+ /* Can't use same texture as both the source and dest. We need
+ * to handle overlapping blits and besides, some hw may not
+ * support this.
+ */
+ return mask;
+ }
+
+ if (target != GL_TEXTURE_2D && target != GL_TEXTURE_RECTANGLE_ARB) {
+ /* Can't handle other texture types at this time */
+ return mask;
+ }
+
+ /*
+ printf("Blit from texture!\n");
+ printf(" srcAtt %p dstAtt %p\n", readAtt, drawAtt);
+ printf(" srcTex %p dstText %p\n", texObj, drawAtt->Texture);
+ */
+
+ /* Prepare src texture state */
+ _mesa_BindTexture(target, texObj->Name);
+ _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
+ _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+ /*_mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_FALSE);*/
+ _mesa_set_enable(ctx, target, GL_TRUE);
+
+ /* Prepare vertex data (the VBO was previously created and bound) */
+ {
+ struct vertex {
+ GLfloat x, y, s, t;
+ };
+ struct vertex verts[4];
+ GLfloat s0, t0, s1, t1;
+
+ if (target == GL_TEXTURE_2D) {
+ const struct gl_texture_image *texImage
+ = _mesa_select_tex_image(ctx, texObj, target,
+ readAtt->TextureLevel);
+ s0 = srcX0 / (float) texImage->Width;
+ s1 = srcX1 / (float) texImage->Width;
+ t0 = srcY0 / (float) texImage->Height;
+ t1 = srcY1 / (float) texImage->Height;
+ }
+ else {
+ assert(target == GL_TEXTURE_RECTANGLE_ARB);
+ s0 = srcX0;
+ s1 = srcX1;
+ t0 = srcY0;
+ t1 = srcY1;
+ }
+
+ verts[0].x = (GLfloat) dstX0;
+ verts[0].y = (GLfloat) dstY0;
+ verts[1].x = (GLfloat) dstX1;
+ verts[1].y = (GLfloat) dstY0;
+ verts[2].x = (GLfloat) dstX1;
+ verts[2].y = (GLfloat) dstY1;
+ verts[3].x = (GLfloat) dstX0;
+ verts[3].y = (GLfloat) dstY1;
+
+ verts[0].s = s0;
+ verts[0].t = t0;
+ verts[1].s = s1;
+ verts[1].t = t0;
+ verts[2].s = s1;
+ verts[2].t = t1;
+ verts[3].s = s0;
+ verts[3].t = t1;
+
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+ }
+
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ /* Restore texture's filter state, the texture binding will
+ * be restored by _mesa_meta_end().
+ */
+ _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave);
+
+ /* Done with color buffer */
+ mask &= ~GL_COLOR_BUFFER_BIT;
+ }
+ }
+
+ return mask;
+}
+
+
+/**
* Meta implementation of ctx->Driver.BlitFramebuffer() in terms
* of texture mapping and polygon rendering.
*/
void
-_mesa_meta_blit_framebuffer(GLcontext *ctx,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter)
+_mesa_meta_BlitFramebuffer(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter)
{
struct blit_state *blit = &ctx->Meta->Blit;
struct temp_texture *tex = get_temp_texture(ctx);
@@ -892,7 +1161,10 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
const GLint srcH = abs(srcY1 - srcY0);
const GLboolean srcFlipX = srcX1 < srcX0;
const GLboolean srcFlipY = srcY1 < srcY0;
- GLfloat verts[4][4]; /* four verts of X,Y,S,T */
+ struct vertex {
+ GLfloat x, y, s, t;
+ };
+ struct vertex verts[4];
GLboolean newTex;
if (srcW > maxTexSize || srcH > maxTexSize) {
@@ -931,10 +1203,8 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
NULL, GL_DYNAMIC_DRAW_ARB);
/* setup vertex arrays */
- _mesa_VertexPointer(2, GL_FLOAT, sizeof(verts[0]),
- (void *) (0 * sizeof(GLfloat)));
- _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(verts[0]),
- (void *) (2 * sizeof(GLfloat)));
+ _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
_mesa_EnableClientState(GL_VERTEX_ARRAY);
_mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
}
@@ -943,33 +1213,45 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
_mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO);
}
+ /* Try faster, direct texture approach first */
+ mask = blitframebuffer_texture(ctx, srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1, mask, filter);
+ if (mask == 0x0) {
+ _mesa_meta_end(ctx);
+ return;
+ }
+
+ /* Continue with "normal" approach which involves copying the src rect
+ * into a temporary texture and is "blitted" by drawing a textured quad.
+ */
+
newTex = alloc_texture(tex, srcW, srcH, GL_RGBA);
/* vertex positions/texcoords (after texture allocation!) */
{
- verts[0][0] = (GLfloat) dstX0;
- verts[0][1] = (GLfloat) dstY0;
- verts[1][0] = (GLfloat) dstX1;
- verts[1][1] = (GLfloat) dstY0;
- verts[2][0] = (GLfloat) dstX1;
- verts[2][1] = (GLfloat) dstY1;
- verts[3][0] = (GLfloat) dstX0;
- verts[3][1] = (GLfloat) dstY1;
-
- verts[0][2] = 0.0F;
- verts[0][3] = 0.0F;
- verts[1][2] = tex->Sright;
- verts[1][3] = 0.0F;
- verts[2][2] = tex->Sright;
- verts[2][3] = tex->Ttop;
- verts[3][2] = 0.0F;
- verts[3][3] = tex->Ttop;
+ verts[0].x = (GLfloat) dstX0;
+ verts[0].y = (GLfloat) dstY0;
+ verts[1].x = (GLfloat) dstX1;
+ verts[1].y = (GLfloat) dstY0;
+ verts[2].x = (GLfloat) dstX1;
+ verts[2].y = (GLfloat) dstY1;
+ verts[3].x = (GLfloat) dstX0;
+ verts[3].y = (GLfloat) dstY1;
+
+ verts[0].s = 0.0F;
+ verts[0].t = 0.0F;
+ verts[1].s = tex->Sright;
+ verts[1].t = 0.0F;
+ verts[2].s = tex->Sright;
+ verts[2].t = tex->Ttop;
+ verts[3].s = 0.0F;
+ verts[3].t = tex->Ttop;
/* upload new vertex data */
_mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
}
- _mesa_Enable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_TRUE);
if (mask & GL_COLOR_BUFFER_BIT) {
setup_copypix_texture(tex, newTex, srcX, srcY, srcW, srcH,
@@ -990,7 +1272,7 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
_mesa_ReadPixels(srcX, srcY, srcW, srcH,
GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
- setup_drawpix_texture(tex, newTex, GL_DEPTH_COMPONENT, srcW, srcH,
+ setup_drawpix_texture(ctx, tex, newTex, GL_DEPTH_COMPONENT, srcW, srcH,
GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
_mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP);
@@ -1011,7 +1293,7 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
/* XXX can't easily do stencil */
}
- _mesa_Disable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_FALSE);
_mesa_meta_end(ctx);
@@ -1026,10 +1308,13 @@ _mesa_meta_blit_framebuffer(GLcontext *ctx,
* Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
*/
void
-_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers)
+_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers)
{
struct clear_state *clear = &ctx->Meta->Clear;
- GLfloat verts[4][7]; /* four verts of X,Y,Z,R,G,B,A */
+ struct vertex {
+ GLfloat x, y, z, r, g, b, a;
+ };
+ struct vertex verts[4];
/* save all state but scissor, pixel pack/unpack */
GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE;
@@ -1054,10 +1339,8 @@ _mesa_meta_clear(GLcontext *ctx, GLbitfield buffers)
NULL, GL_DYNAMIC_DRAW_ARB);
/* setup vertex arrays */
- _mesa_VertexPointer(3, GL_FLOAT, sizeof(verts[0]),
- (void *) (0 * sizeof(GLfloat)));
- _mesa_ColorPointer(4, GL_FLOAT, sizeof(verts[0]),
- (void *) (3 * sizeof(GLfloat)));
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r));
_mesa_EnableClientState(GL_VERTEX_ARRAY);
_mesa_EnableClientState(GL_COLOR_ARRAY);
}
@@ -1107,22 +1390,25 @@ _mesa_meta_clear(GLcontext *ctx, GLbitfield buffers)
const GLfloat z = 1.0 - 2.0 * ctx->Depth.Clear;
GLuint i;
- verts[0][0] = x0;
- verts[0][1] = y0;
- verts[0][2] = z;
- verts[1][0] = x1;
- verts[1][1] = y0;
- verts[1][2] = z;
- verts[2][0] = x1;
- verts[2][1] = y1;
- verts[2][2] = z;
- verts[3][0] = x0;
- verts[3][1] = y1;
- verts[3][2] = z;
+ verts[0].x = x0;
+ verts[0].y = y0;
+ verts[0].z = z;
+ verts[1].x = x1;
+ verts[1].y = y0;
+ verts[1].z = z;
+ verts[2].x = x1;
+ verts[2].y = y1;
+ verts[2].z = z;
+ verts[3].x = x0;
+ verts[3].y = y1;
+ verts[3].z = z;
/* vertex colors */
for (i = 0; i < 4; i++) {
- COPY_4FV(&verts[i][3], ctx->Color.ClearColor);
+ verts[i].r = ctx->Color.ClearColor[0];
+ verts[i].g = ctx->Color.ClearColor[1];
+ verts[i].b = ctx->Color.ClearColor[2];
+ verts[i].a = ctx->Color.ClearColor[3];
}
/* upload new vertex data */
@@ -1141,13 +1427,16 @@ _mesa_meta_clear(GLcontext *ctx, GLbitfield buffers)
* of texture mapping and polygon rendering.
*/
void
-_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY,
- GLsizei width, GLsizei height,
- GLint dstX, GLint dstY, GLenum type)
+_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcX, GLint srcY,
+ GLsizei width, GLsizei height,
+ GLint dstX, GLint dstY, GLenum type)
{
struct copypix_state *copypix = &ctx->Meta->CopyPix;
struct temp_texture *tex = get_temp_texture(ctx);
- GLfloat verts[4][5]; /* four verts of X,Y,Z,S,T */
+ struct vertex {
+ GLfloat x, y, z, s, t;
+ };
+ struct vertex verts[4];
GLboolean newTex;
GLenum intFormat = GL_RGBA;
@@ -1185,10 +1474,8 @@ _mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY,
NULL, GL_DYNAMIC_DRAW_ARB);
/* setup vertex arrays */
- _mesa_VertexPointer(3, GL_FLOAT, sizeof(verts[0]),
- (void *) (0 * sizeof(GLfloat)));
- _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(verts[0]),
- (void *) (3 * sizeof(GLfloat)));
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
_mesa_EnableClientState(GL_VERTEX_ARRAY);
_mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
}
@@ -1207,26 +1494,26 @@ _mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY,
const GLfloat dstY1 = dstY + height * ctx->Pixel.ZoomY;
const GLfloat z = ctx->Current.RasterPos[2];
- verts[0][0] = dstX0;
- verts[0][1] = dstY0;
- verts[0][2] = z;
- verts[0][3] = 0.0F;
- verts[0][4] = 0.0F;
- verts[1][0] = dstX1;
- verts[1][1] = dstY0;
- verts[1][2] = z;
- verts[1][3] = tex->Sright;
- verts[1][4] = 0.0F;
- verts[2][0] = dstX1;
- verts[2][1] = dstY1;
- verts[2][2] = z;
- verts[2][3] = tex->Sright;
- verts[2][4] = tex->Ttop;
- verts[3][0] = dstX0;
- verts[3][1] = dstY1;
- verts[3][2] = z;
- verts[3][3] = 0.0F;
- verts[3][4] = tex->Ttop;
+ verts[0].x = dstX0;
+ verts[0].y = dstY0;
+ verts[0].z = z;
+ verts[0].s = 0.0F;
+ verts[0].t = 0.0F;
+ verts[1].x = dstX1;
+ verts[1].y = dstY0;
+ verts[1].z = z;
+ verts[1].s = tex->Sright;
+ verts[1].t = 0.0F;
+ verts[2].x = dstX1;
+ verts[2].y = dstY1;
+ verts[2].z = z;
+ verts[2].s = tex->Sright;
+ verts[2].t = tex->Ttop;
+ verts[3].x = dstX0;
+ verts[3].y = dstY1;
+ verts[3].z = z;
+ verts[3].s = 0.0F;
+ verts[3].t = tex->Ttop;
/* upload new vertex data */
_mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
@@ -1236,12 +1523,12 @@ _mesa_meta_copy_pixels(GLcontext *ctx, GLint srcX, GLint srcY,
setup_copypix_texture(tex, newTex, srcX, srcY, width, height,
GL_RGBA, GL_NEAREST);
- _mesa_Enable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_TRUE);
/* draw textured quad */
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
- _mesa_Disable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_FALSE);
_mesa_meta_end(ctx);
}
@@ -1279,9 +1566,8 @@ tiled_draw_pixels(GLcontext *ctx,
tileUnpack.SkipRows = unpack->SkipRows + j;
- _mesa_meta_draw_pixels(ctx, tileX, tileY,
- tileWidth, tileHeight,
- format, type, &tileUnpack, pixels);
+ _mesa_meta_DrawPixels(ctx, tileX, tileY, tileWidth, tileHeight,
+ format, type, &tileUnpack, pixels);
}
}
}
@@ -1390,17 +1676,20 @@ init_draw_depth_pixels(GLcontext *ctx)
* of texture mapping and polygon rendering.
*/
void
-_mesa_meta_draw_pixels(GLcontext *ctx,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
+_mesa_meta_DrawPixels(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
{
struct drawpix_state *drawpix = &ctx->Meta->DrawPix;
struct temp_texture *tex = get_temp_texture(ctx);
const struct gl_pixelstore_attrib unpackSave = ctx->Unpack;
const GLuint origStencilMask = ctx->Stencil.WriteMask[0];
- GLfloat verts[4][5]; /* four verts of X,Y,Z,S,T */
+ struct vertex {
+ GLfloat x, y, z, s, t;
+ };
+ struct vertex verts[4];
GLenum texIntFormat;
GLboolean fallback, newTex;
GLbitfield metaExtraSave = 0x0;
@@ -1494,26 +1783,26 @@ _mesa_meta_draw_pixels(GLcontext *ctx,
const GLfloat y1 = y + height * ctx->Pixel.ZoomY;
const GLfloat z = ctx->Current.RasterPos[2];
- verts[0][0] = x0;
- verts[0][1] = y0;
- verts[0][2] = z;
- verts[0][3] = 0.0F;
- verts[0][4] = 0.0F;
- verts[1][0] = x1;
- verts[1][1] = y0;
- verts[1][2] = z;
- verts[1][3] = tex->Sright;
- verts[1][4] = 0.0F;
- verts[2][0] = x1;
- verts[2][1] = y1;
- verts[2][2] = z;
- verts[2][3] = tex->Sright;
- verts[2][4] = tex->Ttop;
- verts[3][0] = x0;
- verts[3][1] = y1;
- verts[3][2] = z;
- verts[3][3] = 0.0F;
- verts[3][4] = tex->Ttop;
+ verts[0].x = x0;
+ verts[0].y = y0;
+ verts[0].z = z;
+ verts[0].s = 0.0F;
+ verts[0].t = 0.0F;
+ verts[1].x = x1;
+ verts[1].y = y0;
+ verts[1].z = z;
+ verts[1].s = tex->Sright;
+ verts[1].t = 0.0F;
+ verts[2].x = x1;
+ verts[2].y = y1;
+ verts[2].z = z;
+ verts[2].s = tex->Sright;
+ verts[2].t = tex->Ttop;
+ verts[3].x = x0;
+ verts[3].y = y1;
+ verts[3].z = z;
+ verts[3].s = 0.0F;
+ verts[3].t = tex->Ttop;
}
if (drawpix->ArrayObj == 0) {
@@ -1529,17 +1818,15 @@ _mesa_meta_draw_pixels(GLcontext *ctx,
verts, GL_DYNAMIC_DRAW_ARB);
/* setup vertex arrays */
- _mesa_VertexPointer(3, GL_FLOAT, sizeof(verts[0]),
- (void *) (0 * sizeof(GLfloat)));
- _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(verts[0]),
- (void *) (3 * sizeof(GLfloat)));
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
_mesa_EnableClientState(GL_VERTEX_ARRAY);
_mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
/* set given unpack params */
ctx->Unpack = *unpack;
- _mesa_Enable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_TRUE);
if (_mesa_is_stencil_format(format)) {
/* Drawing stencil */
@@ -1548,7 +1835,7 @@ _mesa_meta_draw_pixels(GLcontext *ctx,
if (!drawpix->StencilFP)
init_draw_stencil_pixels(ctx);
- setup_drawpix_texture(tex, newTex, texIntFormat, width, height,
+ setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
GL_ALPHA, type, pixels);
_mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
@@ -1591,19 +1878,19 @@ _mesa_meta_draw_pixels(GLcontext *ctx,
_mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
ctx->Current.RasterColor);
- setup_drawpix_texture(tex, newTex, texIntFormat, width, height,
+ setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
format, type, pixels);
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
}
else {
/* Drawing RGBA */
- setup_drawpix_texture(tex, newTex, texIntFormat, width, height,
+ setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
format, type, pixels);
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
}
- _mesa_Disable(tex->Target);
+ _mesa_set_enable(ctx, tex->Target, GL_FALSE);
_mesa_DeleteBuffersARB(1, &vbo);
@@ -1612,3 +1899,849 @@ _mesa_meta_draw_pixels(GLcontext *ctx,
_mesa_meta_end(ctx);
}
+
+
+/**
+ * Do glBitmap with a alpha texture quad. Use the alpha test to
+ * cull the 'off' bits. If alpha test is already enabled, fall back
+ * to swrast (should be a rare case).
+ * A bitmap cache as in the gallium/mesa state tracker would
+ * improve performance a lot.
+ */
+void
+_mesa_meta_Bitmap(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap1)
+{
+ struct bitmap_state *bitmap = &ctx->Meta->Bitmap;
+ struct temp_texture *tex = get_bitmap_temp_texture(ctx);
+ const GLenum texIntFormat = GL_ALPHA;
+ const struct gl_pixelstore_attrib unpackSave = *unpack;
+ struct vertex {
+ GLfloat x, y, z, s, t, r, g, b, a;
+ };
+ struct vertex verts[4];
+ GLboolean newTex;
+ GLubyte *bitmap8;
+
+ /*
+ * Check if swrast fallback is needed.
+ */
+ if (ctx->_ImageTransferState ||
+ ctx->Color.AlphaEnabled ||
+ ctx->Fog.Enabled ||
+ ctx->Texture._EnabledUnits ||
+ width > tex->MaxSize ||
+ height > tex->MaxSize) {
+ _swrast_Bitmap(ctx, x, y, width, height, unpack, bitmap1);
+ return;
+ }
+
+ /* Most GL state applies to glBitmap (like blending, stencil, etc),
+ * but a there's a few things we need to override:
+ */
+ _mesa_meta_begin(ctx, (META_ALPHA_TEST |
+ META_PIXEL_STORE |
+ META_RASTERIZATION |
+ META_SHADER |
+ META_TEXTURE |
+ META_TRANSFORM |
+ META_VERTEX |
+ META_VIEWPORT));
+
+ if (bitmap->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArraysAPPLE(1, &bitmap->ArrayObj);
+ _mesa_BindVertexArrayAPPLE(bitmap->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &bitmap->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+ NULL, GL_DYNAMIC_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+ _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+ _mesa_EnableClientState(GL_COLOR_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(bitmap->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO);
+ }
+
+ newTex = alloc_texture(tex, width, height, texIntFormat);
+
+ /* vertex positions, texcoords, colors (after texture allocation!) */
+ {
+ const GLfloat x0 = (GLfloat) x;
+ const GLfloat y0 = (GLfloat) y;
+ const GLfloat x1 = (GLfloat) (x + width);
+ const GLfloat y1 = (GLfloat) (y + height);
+ const GLfloat z = ctx->Current.RasterPos[2];
+ GLuint i;
+
+ verts[0].x = x0;
+ verts[0].y = y0;
+ verts[0].z = z;
+ verts[0].s = 0.0F;
+ verts[0].t = 0.0F;
+ verts[1].x = x1;
+ verts[1].y = y0;
+ verts[1].z = z;
+ verts[1].s = tex->Sright;
+ verts[1].t = 0.0F;
+ verts[2].x = x1;
+ verts[2].y = y1;
+ verts[2].z = z;
+ verts[2].s = tex->Sright;
+ verts[2].t = tex->Ttop;
+ verts[3].x = x0;
+ verts[3].y = y1;
+ verts[3].z = z;
+ verts[3].s = 0.0F;
+ verts[3].t = tex->Ttop;
+
+ for (i = 0; i < 4; i++) {
+ verts[i].r = ctx->Current.RasterColor[0];
+ verts[i].g = ctx->Current.RasterColor[1];
+ verts[i].b = ctx->Current.RasterColor[2];
+ verts[i].a = ctx->Current.RasterColor[3];
+ }
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+ }
+
+ bitmap1 = _mesa_map_pbo_source(ctx, &unpackSave, bitmap1);
+ if (!bitmap1)
+ return;
+
+ bitmap8 = (GLubyte *) _mesa_calloc(width * height);
+ if (bitmap8) {
+ _mesa_expand_bitmap(width, height, &unpackSave, bitmap1,
+ bitmap8, width, 0xff);
+
+ _mesa_set_enable(ctx, tex->Target, GL_TRUE);
+
+ _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_TRUE);
+ _mesa_AlphaFunc(GL_GREATER, 0.0);
+
+ setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
+ GL_ALPHA, GL_UNSIGNED_BYTE, bitmap8);
+
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+ _mesa_set_enable(ctx, tex->Target, GL_FALSE);
+
+ _mesa_free(bitmap8);
+ }
+
+ _mesa_unmap_pbo_source(ctx, &unpackSave);
+
+ _mesa_meta_end(ctx);
+}
+
+
+/**
+ * Check if the call to _mesa_meta_GenerateMipmap() will require a
+ * software fallback. The fallback path will require that the texture
+ * images are mapped.
+ */
+GLboolean
+_mesa_meta_check_generate_mipmap_fallback(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct gl_texture_image *baseImage =
+ _mesa_select_tex_image(ctx, texObj, target, texObj->BaseLevel);
+
+ /* check for fallbacks */
+ if (!ctx->Extensions.EXT_framebuffer_object ||
+ target == GL_TEXTURE_3D ||
+ !baseImage ||
+ baseImage->IsCompressed) {
+ return GL_TRUE;
+ }
+ return GL_FALSE;
+}
+
+
+/**
+ * Called via ctx->Driver.GenerateMipmap()
+ * Note: texture borders and 3D texture support not yet complete.
+ */
+void
+_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap;
+ struct vertex {
+ GLfloat x, y, s, t, r;
+ };
+ struct vertex verts[4];
+ const GLuint baseLevel = texObj->BaseLevel;
+ const GLuint maxLevel = texObj->MaxLevel;
+ const GLenum minFilterSave = texObj->MinFilter;
+ const GLenum magFilterSave = texObj->MagFilter;
+ const GLint baseLevelSave = texObj->BaseLevel;
+ const GLint maxLevelSave = texObj->MaxLevel;
+ const GLboolean genMipmapSave = texObj->GenerateMipmap;
+ const GLenum wrapSSave = texObj->WrapS;
+ const GLenum wrapTSave = texObj->WrapT;
+ const GLenum wrapRSave = texObj->WrapR;
+ const GLuint fboSave = ctx->DrawBuffer->Name;
+ GLenum faceTarget;
+ GLuint dstLevel;
+ GLuint border = 0;
+
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+ _mesa_generate_mipmap(ctx, target, texObj);
+ return;
+ }
+
+ if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
+ target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) {
+ faceTarget = target;
+ target = GL_TEXTURE_CUBE_MAP;
+ }
+ else {
+ faceTarget = target;
+ }
+
+ _mesa_meta_begin(ctx, META_ALL);
+
+ if (mipmap->ArrayObj == 0) {
+ /* one-time setup */
+
+ /* create vertex array object */
+ _mesa_GenVertexArraysAPPLE(1, &mipmap->ArrayObj);
+ _mesa_BindVertexArrayAPPLE(mipmap->ArrayObj);
+
+ /* create vertex array buffer */
+ _mesa_GenBuffersARB(1, &mipmap->VBO);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO);
+ _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+ NULL, GL_DYNAMIC_DRAW_ARB);
+
+ /* setup vertex arrays */
+ _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+ _mesa_TexCoordPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+ _mesa_EnableClientState(GL_VERTEX_ARRAY);
+ _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+ }
+ else {
+ _mesa_BindVertexArray(mipmap->ArrayObj);
+ _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO);
+ }
+
+ if (!mipmap->FBO) {
+ /* Bind the new renderbuffer to the color attachment point. */
+ _mesa_GenFramebuffersEXT(1, &mipmap->FBO);
+ }
+
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, mipmap->FBO);
+
+ _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, GL_FALSE);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
+
+ _mesa_set_enable(ctx, target, GL_TRUE);
+
+ /* setup texcoords once (XXX what about border?) */
+ switch (faceTarget) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ verts[0].s = 0.0F;
+ verts[0].t = 0.0F;
+ verts[0].r = 0.0F;
+ verts[1].s = 1.0F;
+ verts[1].t = 0.0F;
+ verts[1].r = 0.0F;
+ verts[2].s = 1.0F;
+ verts[2].t = 1.0F;
+ verts[2].r = 0.0F;
+ verts[3].s = 0.0F;
+ verts[3].t = 1.0F;
+ verts[3].r = 0.0F;
+ break;
+ case GL_TEXTURE_3D:
+ abort();
+ break;
+ default:
+ /* cube face */
+ {
+ static const GLfloat st[4][2] = {
+ {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
+ };
+ GLuint i;
+
+ /* loop over quad verts */
+ for (i = 0; i < 4; i++) {
+ /* Compute sc = +/-scale and tc = +/-scale.
+ * Not +/-1 to avoid cube face selection ambiguity near the edges,
+ * though that can still sometimes happen with this scale factor...
+ */
+ const GLfloat scale = 0.9999f;
+ const GLfloat sc = (2.0f * st[i][0] - 1.0f) * scale;
+ const GLfloat tc = (2.0f * st[i][1] - 1.0f) * scale;
+
+ switch (faceTarget) {
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ verts[i].s = 1.0f;
+ verts[i].t = -tc;
+ verts[i].r = -sc;
+ break;
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ verts[i].s = -1.0f;
+ verts[i].t = -tc;
+ verts[i].r = sc;
+ break;
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ verts[i].s = sc;
+ verts[i].t = 1.0f;
+ verts[i].r = tc;
+ break;
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ verts[i].s = sc;
+ verts[i].t = -1.0f;
+ verts[i].r = -tc;
+ break;
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ verts[i].s = sc;
+ verts[i].t = -tc;
+ verts[i].r = 1.0f;
+ break;
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+ verts[i].s = -sc;
+ verts[i].t = -tc;
+ verts[i].r = -1.0f;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+ }
+
+ _mesa_set_enable(ctx, target, GL_TRUE);
+
+ /* texture is already locked, unlock now */
+ _mesa_unlock_texture(ctx, texObj);
+
+ for (dstLevel = baseLevel + 1; dstLevel <= maxLevel; dstLevel++) {
+ const struct gl_texture_image *srcImage;
+ const GLuint srcLevel = dstLevel - 1;
+ GLsizei srcWidth, srcHeight, srcDepth;
+ GLsizei dstWidth, dstHeight, dstDepth;
+ GLenum status;
+
+ srcImage = _mesa_select_tex_image(ctx, texObj, faceTarget, srcLevel);
+ assert(srcImage->Border == 0); /* XXX we can fix this */
+
+ /* src size w/out border */
+ srcWidth = srcImage->Width - 2 * border;
+ srcHeight = srcImage->Height - 2 * border;
+ srcDepth = srcImage->Depth - 2 * border;
+
+ /* new dst size w/ border */
+ dstWidth = MAX2(1, srcWidth / 2) + 2 * border;
+ dstHeight = MAX2(1, srcHeight / 2) + 2 * border;
+ dstDepth = MAX2(1, srcDepth / 2) + 2 * border;
+
+ if (dstWidth == srcImage->Width &&
+ dstHeight == srcImage->Height &&
+ dstDepth == srcImage->Depth) {
+ /* all done */
+ break;
+ }
+
+ /* Create empty dest image */
+ if (target == GL_TEXTURE_1D) {
+ _mesa_TexImage1D(target, dstLevel, srcImage->InternalFormat,
+ dstWidth, border,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ }
+ else if (target == GL_TEXTURE_3D) {
+ _mesa_TexImage3D(target, dstLevel, srcImage->InternalFormat,
+ dstWidth, dstHeight, dstDepth, border,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ }
+ else {
+ /* 2D or cube */
+ _mesa_TexImage2D(faceTarget, dstLevel, srcImage->InternalFormat,
+ dstWidth, dstHeight, border,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+ if (target == GL_TEXTURE_CUBE_MAP) {
+ /* If texturing from a cube, we need to make sure all src faces
+ * have been defined (even if we're not sampling from them.)
+ * Otherwise the texture object will be 'incomplete' and
+ * texturing from it will not be allowed.
+ */
+ GLuint face;
+ for (face = 0; face < 6; face++) {
+ if (!texObj->Image[face][srcLevel] ||
+ texObj->Image[face][srcLevel]->Width != srcWidth) {
+ _mesa_TexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face,
+ srcLevel, srcImage->InternalFormat,
+ srcWidth, srcHeight, border,
+ GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+ }
+ }
+ }
+ }
+
+ /* setup vertex positions */
+ {
+ verts[0].x = 0.0F;
+ verts[0].y = 0.0F;
+ verts[1].x = (GLfloat) dstWidth;
+ verts[1].y = 0.0F;
+ verts[2].x = (GLfloat) dstWidth;
+ verts[2].y = (GLfloat) dstHeight;
+ verts[3].x = 0.0F;
+ verts[3].y = (GLfloat) dstHeight;
+
+ /* upload new vertex data */
+ _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+ }
+
+ /* limit sampling to src level */
+ _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
+
+ /* Set to draw into the current dstLevel */
+ if (target == GL_TEXTURE_1D) {
+ _mesa_FramebufferTexture1DEXT(GL_FRAMEBUFFER_EXT,
+ GL_COLOR_ATTACHMENT0_EXT,
+ target,
+ texObj->Name,
+ dstLevel);
+ }
+ else if (target == GL_TEXTURE_3D) {
+ GLint zoffset = 0; /* XXX unfinished */
+ _mesa_FramebufferTexture3DEXT(GL_FRAMEBUFFER_EXT,
+ GL_COLOR_ATTACHMENT0_EXT,
+ target,
+ texObj->Name,
+ dstLevel, zoffset);
+ }
+ else {
+ /* 2D / cube */
+ _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+ GL_COLOR_ATTACHMENT0_EXT,
+ faceTarget,
+ texObj->Name,
+ dstLevel);
+ }
+
+ _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+ /* sanity check */
+ status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
+ if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+ abort();
+ break;
+ }
+
+ _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+ }
+
+ _mesa_lock_texture(ctx, texObj); /* relock */
+
+ _mesa_meta_end(ctx);
+
+ _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave);
+ _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, genMipmapSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, wrapSSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, wrapTSave);
+ _mesa_TexParameteri(target, GL_TEXTURE_WRAP_R, wrapRSave);
+
+ _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboSave);
+}
+
+
+/**
+ * Determine the GL data type to use for the temporary image read with
+ * ReadPixels() and passed to Tex[Sub]Image().
+ */
+static GLenum
+get_temp_image_type(GLcontext *ctx, GLenum baseFormat)
+{
+ switch (baseFormat) {
+ case GL_RGBA:
+ case GL_RGB:
+ case GL_ALPHA:
+ case GL_LUMINANCE:
+ case GL_LUMINANCE_ALPHA:
+ case GL_INTENSITY:
+ if (ctx->DrawBuffer->Visual.redBits <= 8)
+ return GL_UNSIGNED_BYTE;
+ else if (ctx->DrawBuffer->Visual.redBits <= 8)
+ return GL_UNSIGNED_SHORT;
+ else
+ return GL_FLOAT;
+ case GL_DEPTH_COMPONENT:
+ return GL_UNSIGNED_INT;
+ case GL_DEPTH_STENCIL:
+ return GL_UNSIGNED_INT_24_8;
+ default:
+ _mesa_problem(ctx, "Unexpected format in get_temp_image_type()");
+ return 0;
+ }
+}
+
+
+/**
+ * Helper for _mesa_meta_CopyTexImage1/2D() functions.
+ * Have to be careful with locking and meta state for pixel transfer.
+ */
+static void
+copy_tex_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height, GLint border)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ GLsizei postConvWidth = width, postConvHeight = height;
+ GLenum format, type;
+ GLint bpp;
+ void *buf;
+
+ texUnit = _mesa_get_current_tex_unit(ctx);
+ texObj = _mesa_select_tex_object(ctx, texUnit, target);
+ texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+
+ format = _mesa_base_tex_format(ctx, internalFormat);
+ type = get_temp_image_type(ctx, format);
+ bpp = _mesa_bytes_per_pixel(format, type);
+ if (bpp <= 0) {
+ _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
+ return;
+ }
+
+ /*
+ * Alloc image buffer (XXX could use a PBO)
+ */
+ buf = _mesa_malloc(width * height * bpp);
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
+ return;
+ }
+
+ if (texImage->TexFormat == &_mesa_null_texformat)
+ texImage->TexFormat = ctx->Driver.ChooseTextureFormat(ctx,
+ internalFormat,
+ format,
+ type);
+
+ _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ ctx->Driver.ReadPixels(ctx, x, y, width, height,
+ format, type, &ctx->Pack, buf);
+ _mesa_meta_end(ctx);
+
+ /*
+ * Prepare for new texture image size/data
+ */
+#if FEATURE_convolve
+ if (_mesa_is_color_format(internalFormat)) {
+ _mesa_adjust_image_for_convolution(ctx, 2,
+ &postConvWidth, &postConvHeight);
+ }
+#endif
+
+ if (texImage->Data) {
+ ctx->Driver.FreeTexImageData(ctx, texImage);
+ }
+
+ _mesa_init_teximage_fields(ctx, target, texImage,
+ postConvWidth, postConvHeight, 1,
+ border, internalFormat);
+
+ /*
+ * Store texture data (with pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE);
+
+ _mesa_update_state(ctx); /* to update pixel transfer state */
+
+ if (target == GL_TEXTURE_1D) {
+ ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+ width, border, format, type,
+ buf, &ctx->Unpack, texObj, texImage);
+ }
+ else {
+ ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+ width, height, border, format, type,
+ buf, &ctx->Unpack, texObj, texImage);
+ }
+ _mesa_meta_end(ctx);
+
+ _mesa_lock_texture(ctx, texObj); /* re-lock */
+
+ _mesa_free(buf);
+}
+
+
+void
+_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLint border)
+{
+ copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
+ width, 1, border);
+}
+
+
+void
+_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height, GLint border)
+{
+ copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
+ width, height, border);
+}
+
+
+
+/**
+ * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
+ * Have to be careful with locking and meta state for pixel transfer.
+ */
+static void
+copy_tex_sub_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ GLenum format, type;
+ GLint bpp;
+ void *buf;
+
+ texUnit = _mesa_get_current_tex_unit(ctx);
+ texObj = _mesa_select_tex_object(ctx, texUnit, target);
+ texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+
+ format = texImage->TexFormat->BaseFormat;
+ type = get_temp_image_type(ctx, format);
+ bpp = _mesa_bytes_per_pixel(format, type);
+ if (bpp <= 0) {
+ _mesa_problem(ctx, "Bad bpp in meta copy_tex_sub_image()");
+ return;
+ }
+
+ /*
+ * Alloc image buffer (XXX could use a PBO)
+ */
+ buf = _mesa_malloc(width * height * bpp);
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage%uD", dims);
+ return;
+ }
+
+ _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ ctx->Driver.ReadPixels(ctx, x, y, width, height,
+ format, type, &ctx->Pack, buf);
+ _mesa_meta_end(ctx);
+
+ _mesa_update_state(ctx); /* to update pixel transfer state */
+
+ /*
+ * Store texture data (with pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE);
+ if (target == GL_TEXTURE_1D) {
+ ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
+ width, format, type, buf,
+ &ctx->Unpack, texObj, texImage);
+ }
+ else if (target == GL_TEXTURE_3D) {
+ ctx->Driver.TexSubImage3D(ctx, target, level, xoffset, yoffset, zoffset,
+ width, height, 1, format, type, buf,
+ &ctx->Unpack, texObj, texImage);
+ }
+ else {
+ ctx->Driver.TexSubImage2D(ctx, target, level, xoffset, yoffset,
+ width, height, format, type, buf,
+ &ctx->Unpack, texObj, texImage);
+ }
+ _mesa_meta_end(ctx);
+
+ _mesa_lock_texture(ctx, texObj); /* re-lock */
+
+ _mesa_free(buf);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset,
+ GLint x, GLint y, GLsizei width)
+{
+ copy_tex_sub_image(ctx, 1, target, level, xoffset, 0, 0,
+ x, y, width, 1);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ copy_tex_sub_image(ctx, 2, target, level, xoffset, yoffset, 0,
+ x, y, width, height);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ copy_tex_sub_image(ctx, 3, target, level, xoffset, yoffset, zoffset,
+ x, y, width, height);
+}
+
+
+void
+_mesa_meta_CopyColorTable(GLcontext *ctx,
+ GLenum target, GLenum internalformat,
+ GLint x, GLint y, GLsizei width)
+{
+ GLfloat *buf;
+
+ buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat));
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorTable");
+ return;
+ }
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+ GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+ _mesa_ColorTable(target, internalformat, width, GL_RGBA, GL_FLOAT, buf);
+
+ _mesa_meta_end(ctx);
+
+ _mesa_free(buf);
+}
+
+
+void
+_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start,
+ GLint x, GLint y, GLsizei width)
+{
+ GLfloat *buf;
+
+ buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat));
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorSubTable");
+ return;
+ }
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+ GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+ _mesa_ColorSubTable(target, start, width, GL_RGBA, GL_FLOAT, buf);
+
+ _mesa_meta_end(ctx);
+
+ _mesa_free(buf);
+}
+
+
+void
+_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target,
+ GLenum internalFormat,
+ GLint x, GLint y, GLsizei width)
+{
+ GLfloat *buf;
+
+ buf = (GLfloat *) _mesa_malloc(width * 4 * sizeof(GLfloat));
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D");
+ return;
+ }
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ _mesa_update_state(ctx);
+ ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+ GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+ _mesa_ConvolutionFilter1D(target, internalFormat, width,
+ GL_RGBA, GL_FLOAT, buf);
+
+ _mesa_meta_end(ctx);
+
+ _mesa_free(buf);
+}
+
+
+void
+_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ GLfloat *buf;
+
+ buf = (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat));
+ if (!buf) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D");
+ return;
+ }
+
+ /*
+ * Read image from framebuffer (disable pixel transfer ops)
+ */
+ _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+ _mesa_update_state(ctx);
+
+ ctx->Driver.ReadPixels(ctx, x, y, width, height,
+ GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+ _mesa_ConvolutionFilter2D(target, internalFormat, width, height,
+ GL_RGBA, GL_FLOAT, buf);
+
+ _mesa_meta_end(ctx);
+
+ _mesa_free(buf);
+}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index b03b64c48a..6225b94189 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -27,29 +27,6 @@
#define META_H
-/**
- * Flags passed to _mesa_meta_begin().
- * XXX these flags may evolve...
- */
-/*@{*/
-#define META_ALPHA_TEST 0x1
-#define META_BLEND 0x2 /**< includes logicop */
-#define META_COLOR_MASK 0x4
-#define META_DEPTH_TEST 0x8
-#define META_FOG 0x10
-#define META_RASTERIZATION 0x20
-#define META_SCISSOR 0x40
-#define META_SHADER 0x80
-#define META_STENCIL_TEST 0x100
-#define META_TRANSFORM 0x200 /**< modelview, projection */
-#define META_TEXTURE 0x400
-#define META_VERTEX 0x800
-#define META_VIEWPORT 0x1000
-#define META_PIXEL_STORE 0x2000
-#define META_ALL ~0x0
-/*@}*/
-
-
extern void
_mesa_meta_init(GLcontext *ctx);
@@ -57,25 +34,85 @@ extern void
_mesa_meta_free(GLcontext *ctx);
extern void
-_mesa_meta_blit_framebuffer(GLcontext *ctx,
- GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
- GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
- GLbitfield mask, GLenum filter);
+_mesa_meta_BlitFramebuffer(GLcontext *ctx,
+ GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+ GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+ GLbitfield mask, GLenum filter);
+
+extern void
+_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers);
+
+extern void
+_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
+ GLsizei width, GLsizei height,
+ GLint dstx, GLint dsty, GLenum type);
+
+extern void
+_mesa_meta_DrawPixels(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels);
+
+extern void
+_mesa_meta_Bitmap(GLcontext *ctx,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap);
+
+extern GLboolean
+_mesa_meta_check_generate_mipmap_fallback(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj);
+
+extern void
+_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj);
+
+extern void
+_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLint border);
+
+extern void
+_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height, GLint border);
+
+extern void
+_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset,
+ GLint x, GLint y, GLsizei width);
+
+extern void
+_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height);
+
+extern void
+_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint x, GLint y,
+ GLsizei width, GLsizei height);
+
+extern void
+_mesa_meta_CopyColorTable(GLcontext *ctx,
+ GLenum target, GLenum internalformat,
+ GLint x, GLint y, GLsizei width);
extern void
-_mesa_meta_clear(GLcontext *ctx, GLbitfield buffers);
+_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start,
+ GLint x, GLint y, GLsizei width);
extern void
-_mesa_meta_copy_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type);
+_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target,
+ GLenum internalFormat,
+ GLint x, GLint y, GLsizei width);
extern void
-_mesa_meta_draw_pixels(GLcontext *ctx,
- GLint x, GLint y, GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels);
+_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target,
+ GLenum internalFormat, GLint x, GLint y,
+ GLsizei width, GLsizei height);
#endif /* META_H */
diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h
index 40a030ce0d..5e86324eec 100644
--- a/src/mesa/drivers/dri/common/extension_helper.h
+++ b/src/mesa/drivers/dri/common/extension_helper.h
@@ -316,6 +316,13 @@ static const char CombinerOutputNV_names[] =
"";
#endif
+#if defined(need_GL_NV_vertex_program)
+static const char VertexAttribs3fvNV_names[] =
+ "iip\0" /* Parameter signature */
+ "glVertexAttribs3fvNV\0"
+ "";
+#endif
+
#if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_shader_objects)
static const char Uniform2fARB_names[] =
"iff\0" /* Parameter signature */
@@ -575,6 +582,13 @@ static const char MatrixIndexusvARB_names[] =
"";
#endif
+#if defined(need_GL_ARB_draw_elements_base_vertex)
+static const char DrawElementsBaseVertex_names[] =
+ "iiipi\0" /* Parameter signature */
+ "glDrawElementsBaseVertex\0"
+ "";
+#endif
+
#if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program)
static const char DisableVertexAttribArrayARB_names[] =
"i\0" /* Parameter signature */
@@ -970,10 +984,10 @@ static const char GlobalAlphaFactordSUN_names[] =
"";
#endif
-#if defined(need_GL_NV_vertex_program)
-static const char VertexAttribs3fvNV_names[] =
+#if defined(need_GL_NV_register_combiners)
+static const char GetFinalCombinerInputParameterfvNV_names[] =
"iip\0" /* Parameter signature */
- "glVertexAttribs3fvNV\0"
+ "glGetFinalCombinerInputParameterfvNV\0"
"";
#endif
@@ -3918,6 +3932,13 @@ static const char MapBufferARB_names[] =
"";
#endif
+#if defined(need_GL_ARB_draw_elements_base_vertex)
+static const char MultiDrawElementsBaseVertex_names[] =
+ "ipipip\0" /* Parameter signature */
+ "glMultiDrawElementsBaseVertex\0"
+ "";
+#endif
+
#if defined(need_GL_EXT_coordinate_frame)
static const char Binormal3svEXT_names[] =
"p\0" /* Parameter signature */
@@ -4396,10 +4417,11 @@ static const char SpriteParameterivSGIX_names[] =
"";
#endif
-#if defined(need_GL_EXT_provoking_vertex)
+#if defined(need_GL_EXT_provoking_vertex) || defined(need_GL_ARB_provoking_vertex)
static const char ProvokingVertexEXT_names[] =
"i\0" /* Parameter signature */
"glProvokingVertexEXT\0"
+ "glProvokingVertex\0"
"";
#endif
@@ -4554,10 +4576,10 @@ static const char UniformMatrix3x4fv_names[] =
"";
#endif
-#if defined(need_GL_EXT_coordinate_frame)
-static const char Binormal3fvEXT_names[] =
- "p\0" /* Parameter signature */
- "glBinormal3fvEXT\0"
+#if defined(need_GL_ARB_draw_elements_base_vertex)
+static const char DrawRangeElementsBaseVertex_names[] =
+ "iiiiipi\0" /* Parameter signature */
+ "glDrawRangeElementsBaseVertex\0"
"";
#endif
@@ -4710,10 +4732,10 @@ static const char GetFragmentLightfvSGIX_names[] =
"";
#endif
-#if defined(need_GL_NV_register_combiners)
-static const char GetFinalCombinerInputParameterfvNV_names[] =
- "iip\0" /* Parameter signature */
- "glGetFinalCombinerInputParameterfvNV\0"
+#if defined(need_GL_EXT_coordinate_frame)
+static const char Binormal3fvEXT_names[] =
+ "p\0" /* Parameter signature */
+ "glBinormal3fvEXT\0"
"";
#endif
@@ -5090,6 +5112,15 @@ static const struct dri_extension_function GL_ARB_draw_buffers_functions[] = {
};
#endif
+#if defined(need_GL_ARB_draw_elements_base_vertex)
+static const struct dri_extension_function GL_ARB_draw_elements_base_vertex_functions[] = {
+ { DrawElementsBaseVertex_names, DrawElementsBaseVertex_remap_index, -1 },
+ { MultiDrawElementsBaseVertex_names, MultiDrawElementsBaseVertex_remap_index, -1 },
+ { DrawRangeElementsBaseVertex_names, DrawRangeElementsBaseVertex_remap_index, -1 },
+ { NULL, 0, 0 }
+};
+#endif
+
#if defined(need_GL_ARB_framebuffer_object)
static const struct dri_extension_function GL_ARB_framebuffer_object_functions[] = {
{ BlitFramebufferEXT_names, BlitFramebufferEXT_remap_index, -1 },
@@ -5164,6 +5195,13 @@ static const struct dri_extension_function GL_ARB_point_parameters_functions[] =
};
#endif
+#if defined(need_GL_ARB_provoking_vertex)
+static const struct dri_extension_function GL_ARB_provoking_vertex_functions[] = {
+ { ProvokingVertexEXT_names, ProvokingVertexEXT_remap_index, -1 },
+ { NULL, 0, 0 }
+};
+#endif
+
#if defined(need_GL_ARB_shader_objects)
static const struct dri_extension_function GL_ARB_shader_objects_functions[] = {
{ UniformMatrix3fvARB_names, UniformMatrix3fvARB_remap_index, -1 },
@@ -5520,8 +5558,8 @@ static const struct dri_extension_function GL_EXT_coordinate_frame_functions[] =
{ Binormal3dvEXT_names, Binormal3dvEXT_remap_index, -1 },
{ Tangent3iEXT_names, Tangent3iEXT_remap_index, -1 },
{ Tangent3bvEXT_names, Tangent3bvEXT_remap_index, -1 },
- { Binormal3fvEXT_names, Binormal3fvEXT_remap_index, -1 },
{ Tangent3bEXT_names, Tangent3bEXT_remap_index, -1 },
+ { Binormal3fvEXT_names, Binormal3fvEXT_remap_index, -1 },
{ BinormalPointerEXT_names, BinormalPointerEXT_remap_index, -1 },
{ Tangent3svEXT_names, Tangent3svEXT_remap_index, -1 },
{ Binormal3bEXT_names, Binormal3bEXT_remap_index, -1 },
@@ -5963,6 +6001,7 @@ static const struct dri_extension_function GL_NV_point_sprite_functions[] = {
static const struct dri_extension_function GL_NV_register_combiners_functions[] = {
{ CombinerOutputNV_names, CombinerOutputNV_remap_index, -1 },
{ CombinerParameterfvNV_names, CombinerParameterfvNV_remap_index, -1 },
+ { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, -1 },
{ GetCombinerOutputParameterfvNV_names, GetCombinerOutputParameterfvNV_remap_index, -1 },
{ FinalCombinerInputNV_names, FinalCombinerInputNV_remap_index, -1 },
{ GetCombinerInputParameterfvNV_names, GetCombinerInputParameterfvNV_remap_index, -1 },
@@ -5971,7 +6010,6 @@ static const struct dri_extension_function GL_NV_register_combiners_functions[]
{ GetFinalCombinerInputParameterivNV_names, GetFinalCombinerInputParameterivNV_remap_index, -1 },
{ CombinerInputNV_names, CombinerInputNV_remap_index, -1 },
{ CombinerParameterfNV_names, CombinerParameterfNV_remap_index, -1 },
- { GetFinalCombinerInputParameterfvNV_names, GetFinalCombinerInputParameterfvNV_remap_index, -1 },
{ GetCombinerInputParameterivNV_names, GetCombinerInputParameterivNV_remap_index, -1 },
{ CombinerParameterivNV_names, CombinerParameterivNV_remap_index, -1 },
{ NULL, 0, 0 }
@@ -5998,6 +6036,7 @@ static const struct dri_extension_function GL_NV_vertex_array_range_functions[]
static const struct dri_extension_function GL_NV_vertex_program_functions[] = {
{ VertexAttrib4ubvNV_names, VertexAttrib4ubvNV_remap_index, -1 },
{ VertexAttrib4svNV_names, VertexAttrib4svNV_remap_index, -1 },
+ { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, -1 },
{ VertexAttribs1dvNV_names, VertexAttribs1dvNV_remap_index, -1 },
{ VertexAttrib1fvNV_names, VertexAttrib1fvNV_remap_index, -1 },
{ VertexAttrib4fNV_names, VertexAttrib4fNV_remap_index, -1 },
@@ -6006,7 +6045,6 @@ static const struct dri_extension_function GL_NV_vertex_program_functions[] = {
{ VertexAttribs3dvNV_names, VertexAttribs3dvNV_remap_index, -1 },
{ VertexAttribs4fvNV_names, VertexAttribs4fvNV_remap_index, -1 },
{ VertexAttrib2sNV_names, VertexAttrib2sNV_remap_index, -1 },
- { VertexAttribs3fvNV_names, VertexAttribs3fvNV_remap_index, -1 },
{ ProgramEnvParameter4fvARB_names, ProgramEnvParameter4fvARB_remap_index, -1 },
{ LoadProgramNV_names, LoadProgramNV_remap_index, -1 },
{ VertexAttrib4fvNV_names, VertexAttrib4fvNV_remap_index, -1 },
diff --git a/src/mesa/drivers/dri/ffb/ffb_tex.c b/src/mesa/drivers/dri/ffb/ffb_tex.c
index 69d30aedba..95058e9069 100644
--- a/src/mesa/drivers/dri/ffb/ffb_tex.c
+++ b/src/mesa/drivers/dri/ffb/ffb_tex.c
@@ -30,24 +30,6 @@
#include "ffb_tex.h"
/* No texture unit, all software. */
-/* XXX this function isn't needed since _mesa_init_driver_functions()
- * will make all these assignments.
- */
void ffbDDInitTexFuncs(GLcontext *ctx)
{
- /*
- ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format;
- ctx->Driver.TexImage1D = _mesa_store_teximage1d;
- ctx->Driver.TexImage2D = _mesa_store_teximage2d;
- ctx->Driver.TexImage3D = _mesa_store_teximage3d;
- ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d;
- ctx->Driver.TexSubImage2D = _mesa_store_texsubimage2d;
- ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d;
- ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d;
- ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d;
- ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d;
- ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d;
- ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d;
- ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage;
- */
}
diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile
deleted file mode 100644
index ac7e1de928..0000000000
--- a/src/mesa/drivers/dri/glcore/Makefile
+++ /dev/null
@@ -1,84 +0,0 @@
-# src/mesa/drivers/dri/glcore/Makefile
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-LIBNAME = glcore_dri.so
-
-DRIVER_SOURCES = glcore_driver.c \
- $(TOP)/src/mesa/drivers/common/driverfuncs.c \
- ../common/dri_util.c
-
-C_SOURCES = \
- $(DRIVER_SOURCES) \
- $(DRI_SOURCES)
-
-
-# Include directories
-INCLUDE_DIRS = \
- -I. \
- -I../common \
- -I../dri_client \
- -I../dri_client/imports \
- -Iserver \
- -I$(TOP)/include \
- -I$(DRM_SOURCE_PATH)/shared-core \
- -I$(TOP)/src/mesa \
- -I$(TOP)/src/mesa/main \
- -I$(TOP)/src/mesa/glapi \
- -I$(TOP)/src/mesa/math \
- -I$(TOP)/src/mesa/transform \
- -I$(TOP)/src/mesa/shader \
- -I$(TOP)/src/mesa/swrast \
- -I$(TOP)/src/mesa/swrast_setup
-
-# Core Mesa objects
-MESA_MODULES = $(TOP)/src/mesa/libmesa.a
-
-# Libraries that the driver shared lib depends on
-LIB_DEPS = -lm -lpthread -lc
-# LIB_DEPS = -lGL -lm -lpthread -lc
-
-
-ASM_SOURCES =
-
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(ASM_SOURCES:.S=.o)
-
-
-##### RULES #####
-
-.c.o:
- $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@
-
-.S.o:
- $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@
-
-
-##### TARGETS #####
-
-default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME)
-
-
-$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile
- CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \
- $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES)
-
-
-depend: $(C_SOURCES) $(ASM_SOURCES)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(C_SOURCES) $(ASM_SOURCES) \
- > /dev/null
-
-
-# Emacs tags
-tags:
- etags `find . -name \*.[ch]` `find ../include`
-
-
-clean:
- -rm -f *.o server/*.o
-
-
-include depend
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 9d049dea8f..393312e732 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -19,7 +19,6 @@ DRIVER_SOURCES = \
intel_batchbuffer.c \
intel_clear.c \
intel_extensions.c \
- intel_generatemipmap.c \
intel_mipmap_tree.c \
intel_tex_layout.c \
intel_tex_image.c \
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index 3ab7d682ee..7d4c7cfbab 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -40,6 +40,7 @@
#include "utils.h"
#include "i915_reg.h"
+#include "i915_program.h"
#include "intel_regions.h"
#include "intel_batchbuffer.h"
@@ -80,6 +81,8 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
i915_update_stencil(ctx);
if (new_state & (_NEW_LIGHT))
i915_update_provoking_vertex(ctx);
+ if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
+ i915_update_program(ctx);
}
@@ -139,7 +142,7 @@ i915CreateContext(const __GLcontextModes * mesaVis,
ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
-
+ ctx->Const.MaxVarying = I915_TEX_UNITS;
/* Advertise the full hardware capabilities. The new memory
* manager should cope much better with overload situations:
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
index 8de4a9d0d3..082d614442 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -121,10 +121,14 @@ enum {
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
+#define I915_MAX_INSN (I915_MAX_DECL_INSN + \
+ I915_MAX_TEX_INSN + \
+ I915_MAX_ALU_INSN)
-#define I915_PROGRAM_SIZE 192
-
-#define I915_MAX_INSN (I915_MAX_TEX_INSN+I915_MAX_ALU_INSN)
+/* Maximum size of the program packet, which matches the limits on
+ * decl, tex, and ALU instructions.
+ */
+#define I915_PROGRAM_SIZE (I915_MAX_INSN * 3 + 1)
/* Hardware version of a parsed fragment program. "Derived" from the
* mesa fragment_program struct.
@@ -154,8 +158,9 @@ struct i915_fragment_program
*/
GLcontext *ctx;
- GLuint declarations[I915_PROGRAM_SIZE];
- GLuint program[I915_PROGRAM_SIZE];
+ /* declarations contains the packet header. */
+ GLuint declarations[I915_MAX_DECL_INSN * 3 + 1];
+ GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3];
GLfloat constant[I915_MAX_CONSTANT][4];
GLuint constant_flags[I915_MAX_CONSTANT];
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 2db10c60e9..d9c61446f5 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -89,7 +89,8 @@ src_vector(struct i915_fragment_program *p,
*/
case PROGRAM_TEMPORARY:
if (source->Index >= I915_MAX_TEMPORARY) {
- i915_program_error(p, "Exceeded max temporary reg");
+ i915_program_error(p, "Exceeded max temporary reg: %d/%d",
+ source->Index, I915_MAX_TEMPORARY);
return 0;
}
src = UREG(REG_TYPE_R, source->Index);
@@ -121,10 +122,23 @@ src_vector(struct i915_fragment_program *p,
src = i915_emit_decl(p, REG_TYPE_T,
T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
D0_CHANNEL_ALL);
+ break;
+
+ case FRAG_ATTRIB_VAR0:
+ case FRAG_ATTRIB_VAR0 + 1:
+ case FRAG_ATTRIB_VAR0 + 2:
+ case FRAG_ATTRIB_VAR0 + 3:
+ case FRAG_ATTRIB_VAR0 + 4:
+ case FRAG_ATTRIB_VAR0 + 5:
+ case FRAG_ATTRIB_VAR0 + 6:
+ case FRAG_ATTRIB_VAR0 + 7:
+ src = i915_emit_decl(p, REG_TYPE_T,
+ T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0),
+ D0_CHANNEL_ALL);
break;
default:
- i915_program_error(p, "Bad source->Index");
+ i915_program_error(p, "Bad source->Index: %d", source->Index);
return 0;
}
break;
@@ -146,6 +160,7 @@ src_vector(struct i915_fragment_program *p,
case PROGRAM_CONSTANT:
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
+ case PROGRAM_UNIFORM:
src =
i915_emit_param4fv(p,
program->Base.Parameters->ParameterValues[source->
@@ -153,7 +168,7 @@ src_vector(struct i915_fragment_program *p,
break;
default:
- i915_program_error(p, "Bad source->File");
+ i915_program_error(p, "Bad source->File: %d", source->File);
return 0;
}
@@ -186,13 +201,14 @@ get_result_vector(struct i915_fragment_program *p,
p->depth_written = 1;
return UREG(REG_TYPE_OD, 0);
default:
- i915_program_error(p, "Bad inst->DstReg.Index");
+ i915_program_error(p, "Bad inst->DstReg.Index: %d",
+ inst->DstReg.Index);
return 0;
}
case PROGRAM_TEMPORARY:
return UREG(REG_TYPE_R, inst->DstReg.Index);
default:
- i915_program_error(p, "Bad inst->DstReg.File");
+ i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
return 0;
}
}
@@ -231,7 +247,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
case TEXTURE_CUBE_INDEX:
return D0_SAMPLE_TYPE_CUBE;
default:
- i915_program_error(p, "TexSrcBit");
+ i915_program_error(p, "TexSrcBit: %d", bit);
return 0;
}
}
@@ -351,7 +367,7 @@ upload_program(struct i915_fragment_program *p)
while (1) {
GLuint src0, src1, src2, flags;
- GLuint tmp = 0, consts0 = 0, consts1 = 0;
+ GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
switch (inst->Opcode) {
case OPCODE_ABS:
@@ -503,6 +519,10 @@ upload_program(struct i915_fragment_program *p)
EMIT_1ARG_ARITH(A0_FLR);
break;
+ case OPCODE_TRUNC:
+ EMIT_1ARG_ARITH(A0_TRC);
+ break;
+
case OPCODE_FRC:
EMIT_1ARG_ARITH(A0_FRC);
break;
@@ -516,6 +536,22 @@ upload_program(struct i915_fragment_program *p)
0, src0, T0_TEXKILL);
break;
+ case OPCODE_KIL_NV:
+ if (inst->DstReg.CondMask == COND_TR) {
+ tmp = i915_get_utemp(p);
+
+ i915_emit_texld(p, get_live_regs(p, inst),
+ tmp, A0_DEST_CHANNEL_ALL,
+ 0, /* use a dummy dest reg */
+ swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+ T0_TEXKILL);
+ } else {
+ p->error = 1;
+ i915_program_error(p, "Unsupported KIL_NV condition code: %d",
+ inst->DstReg.CondMask);
+ }
+ break;
+
case OPCODE_LG2:
src0 = src_vector(p, &inst->SrcReg[0], program);
@@ -615,6 +651,20 @@ upload_program(struct i915_fragment_program *p)
EMIT_2ARG_ARITH(A0_MUL);
break;
+ case OPCODE_NOISE1:
+ case OPCODE_NOISE2:
+ case OPCODE_NOISE3:
+ case OPCODE_NOISE4:
+ /* Don't implement noise because we just don't have the instructions
+ * to spare. We aren't the first vendor to do so.
+ */
+ i915_program_error(p, "Stubbed-out noise functions");
+ i915_emit_arith(p,
+ A0_MOV,
+ get_result_vector(p, inst),
+ get_result_flags(inst), 0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
+
case OPCODE_POW:
src0 = src_vector(p, &inst->SrcReg[0], program);
src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -721,9 +771,38 @@ upload_program(struct i915_fragment_program *p)
}
break;
- case OPCODE_SGE:
- EMIT_2ARG_ARITH(A0_SGE);
- break;
+ case OPCODE_SEQ:
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+ dst = get_result_vector(p, inst);
+
+ /* dst = src1 >= src2 */
+ i915_emit_arith(p,
+ A0_SGE,
+ dst,
+ flags, 0,
+ src_vector(p, &inst->SrcReg[0], program),
+ src_vector(p, &inst->SrcReg[1], program),
+ 0);
+ /* tmp = src1 <= src2 */
+ i915_emit_arith(p,
+ A0_SGE,
+ tmp,
+ flags, 0,
+ negate(src_vector(p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector(p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ /* dst = tmp && dst */
+ i915_emit_arith(p,
+ A0_MUL,
+ dst,
+ flags, 0,
+ dst,
+ tmp,
+ 0);
+ break;
case OPCODE_SIN:
src0 = src_vector(p, &inst->SrcReg[0], program);
@@ -809,10 +888,71 @@ upload_program(struct i915_fragment_program *p)
break;
+ case OPCODE_SGE:
+ EMIT_2ARG_ARITH(A0_SGE);
+ break;
+
+ case OPCODE_SGT:
+ i915_emit_arith(p,
+ A0_SLT,
+ get_result_vector( p, inst ),
+ get_result_flags( inst ), 0,
+ negate(src_vector( p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector( p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ break;
+
+ case OPCODE_SLE:
+ i915_emit_arith(p,
+ A0_SGE,
+ get_result_vector( p, inst ),
+ get_result_flags( inst ), 0,
+ negate(src_vector( p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector( p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ break;
+
case OPCODE_SLT:
EMIT_2ARG_ARITH(A0_SLT);
break;
+ case OPCODE_SNE:
+ tmp = i915_get_utemp(p);
+ flags = get_result_flags(inst);
+ dst = get_result_vector(p, inst);
+
+ /* dst = src1 < src2 */
+ i915_emit_arith(p,
+ A0_SLT,
+ dst,
+ flags, 0,
+ src_vector(p, &inst->SrcReg[0], program),
+ src_vector(p, &inst->SrcReg[1], program),
+ 0);
+ /* tmp = src1 > src2 */
+ i915_emit_arith(p,
+ A0_SLT,
+ tmp,
+ flags, 0,
+ negate(src_vector(p, &inst->SrcReg[0], program),
+ 1, 1, 1, 1),
+ negate(src_vector(p, &inst->SrcReg[1], program),
+ 1, 1, 1, 1),
+ 0);
+ /* dst = tmp || dst */
+ i915_emit_arith(p,
+ A0_ADD,
+ dst,
+ flags | A0_DEST_SATURATE, 0,
+ dst,
+ tmp,
+ 0);
+ break;
+
case OPCODE_SUB:
src0 = src_vector(p, &inst->SrcReg[0], program);
src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -869,8 +1009,39 @@ upload_program(struct i915_fragment_program *p)
case OPCODE_END:
return;
+ case OPCODE_BGNLOOP:
+ case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_BRK:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
+ case OPCODE_ENDSUB:
+ case OPCODE_IF:
+ case OPCODE_RET:
+ p->error = 1;
+ i915_program_error(p, "Unsupported opcode: %s",
+ _mesa_opcode_string(inst->Opcode));
+ return;
+
+ case OPCODE_EXP:
+ case OPCODE_LOG:
+ /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
+ * prog_instruction.h, but apparently GLSL doesn't ever emit them.
+ * Instead, it translates to EX2 or LG2.
+ */
+ case OPCODE_TXD:
+ case OPCODE_TXL:
+ /* These opcodes are claimed by GLSL in prog_instruction.h, but
+ * only NV_vp/fp appears to emit them.
+ */
default:
- i915_program_error(p, "bad opcode");
+ i915_program_error(p, "bad opcode: %s",
+ _mesa_opcode_string(inst->Opcode));
return;
}
@@ -906,7 +1077,7 @@ check_wpos(struct i915_fragment_program *p)
p->wpos_tex = -1;
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
- if (inputs & FRAG_BIT_TEX(i))
+ if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i)))
continue;
else if (inputs & FRAG_BIT_WPOS) {
p->wpos_tex = i;
@@ -1055,6 +1226,28 @@ i915ProgramStringNotify(GLcontext * ctx,
_tnl_program_string(ctx, target, prog);
}
+void
+i915_update_program(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ struct i915_context *i915 = i915_context(&intel->ctx);
+ struct i915_fragment_program *fp =
+ (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+
+ if (i915->current_program != fp) {
+ if (i915->current_program) {
+ i915->current_program->on_hardware = 0;
+ i915->current_program->params_uptodate = 0;
+ }
+
+ i915->current_program = fp;
+ }
+
+ if (!fp->translated)
+ translate_program(fp);
+
+ FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
+}
void
i915ValidateFragmentProgram(struct i915_context *i915)
@@ -1072,16 +1265,6 @@ i915ValidateFragmentProgram(struct i915_context *i915)
GLuint s2 = S2_TEXCOORD_NONE;
int i, offset = 0;
- if (i915->current_program != p) {
- if (i915->current_program) {
- i915->current_program->on_hardware = 0;
- i915->current_program->params_uptodate = 0;
- }
-
- i915->current_program = p;
- }
-
-
/* Important:
*/
VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
@@ -1125,6 +1308,14 @@ i915ValidateFragmentProgram(struct i915_context *i915)
EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
}
+ else if (inputsRead & FRAG_BIT_VAR(i)) {
+ int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
+
+ s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+ s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+ EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
+ }
else if (i == p->wpos_tex) {
/* If WPOS is required, duplicate the XYZ position data in an
diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
index e87700f8e0..e7908bd48f 100644
--- a/src/mesa/drivers/dri/i915/i915_program.c
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -130,6 +130,7 @@ i915_emit_decl(struct i915_fragment_program *p,
*(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
*(p->decl++) = D1_MBZ;
*(p->decl++) = D2_MBZ;
+ assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations));
p->nr_decl_insn++;
return reg;
@@ -186,6 +187,11 @@ i915_emit_arith(struct i915_fragment_program * p,
p->utemp_flag = old_utemp_flag; /* restore */
}
+ if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+ i915_program_error(p, "Program contains too many instructions");
+ return UREG_BAD;
+ }
+
*(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
*(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
*(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
@@ -270,6 +276,11 @@ GLuint i915_emit_texld( struct i915_fragment_program *p,
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
p->nr_tex_indirect++;
+ if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+ i915_program_error(p, "Program contains too many instructions");
+ return UREG_BAD;
+ }
+
*(p->csr++) = (op |
T0_DEST( dest ) |
T0_SAMPLER( sampler ));
@@ -424,12 +435,21 @@ i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
return 0;
}
-
-
+/* Warning the user about program errors seems to be quite valuable, from
+ * our bug reports. It unfortunately means piglit reporting errors
+ * when we fall back to software due to an unsupportable program, though.
+ */
void
-i915_program_error(struct i915_fragment_program *p, const char *msg)
+i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
{
- _mesa_problem(NULL, "i915_program_error: %s", msg);
+ va_list args;
+
+ fprintf(stderr, "i915_program_error: ");
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "\n");
p->error = 1;
}
@@ -511,7 +531,8 @@ i915_upload_program(struct i915_context *i915,
GLuint program_size = p->csr - p->program;
GLuint decl_size = p->decl - p->declarations;
- FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, p->error);
+ if (p->error)
+ return;
/* Could just go straight to the batchbuffer from here:
*/
diff --git a/src/mesa/drivers/dri/i915/i915_program.h b/src/mesa/drivers/dri/i915/i915_program.h
index 14a3f08801..0d17d04865 100644
--- a/src/mesa/drivers/dri/i915/i915_program.h
+++ b/src/mesa/drivers/dri/i915/i915_program.h
@@ -145,7 +145,7 @@ extern GLuint i915_emit_param4fv(struct i915_fragment_program *p,
const GLfloat * values);
extern void i915_program_error(struct i915_fragment_program *p,
- const char *msg);
+ const char *fmt, ...);
extern void i915_init_program(struct i915_context *i915,
struct i915_fragment_program *p);
@@ -155,7 +155,6 @@ extern void i915_upload_program(struct i915_context *i915,
extern void i915_fini_program(struct i915_fragment_program *p);
-
-
+extern void i915_update_program(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
index 9a723d3cd7..9e2523932f 100644
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -54,8 +54,7 @@ i915_render_prevalidate(struct intel_context *intel)
{
struct i915_context *i915 = i915_context(&intel->ctx);
- if (!intel->Fallback)
- i915ValidateFragmentProgram(i915);
+ i915ValidateFragmentProgram(i915);
}
static void
diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c
deleted file mode 120000
index 4c6b37ada0..0000000000
--- a/src/mesa/drivers/dri/i915/intel_generatemipmap.c
+++ /dev/null
@@ -1 +0,0 @@
-../intel/intel_generatemipmap.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
index a905455342..0641e6df9d 100644
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -1076,7 +1076,9 @@ intelRunPipeline(GLcontext * ctx)
intel->NewGLState = 0;
}
+ intel_map_vertex_shader_textures(ctx);
_tnl_run_pipeline(ctx);
+ intel_unmap_vertex_shader_textures(ctx);
_mesa_unlock_context_textures(ctx);
}
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 6e9a9a29a3..57dcc91586 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -14,7 +14,6 @@ DRIVER_SOURCES = \
intel_decode.c \
intel_extensions.c \
intel_fbo.c \
- intel_generatemipmap.c \
intel_mipmap_tree.c \
intel_regions.c \
intel_screen.c \
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index c724218cf5..1088a7a607 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -39,12 +39,14 @@
static void prepare_cc_vp( struct brw_context *brw )
{
+ GLcontext *ctx = &brw->intel.ctx;
struct brw_cc_viewport ccv;
memset(&ccv, 0, sizeof(ccv));
- ccv.min_depth = 0.0;
- ccv.max_depth = 1.0;
+ /* _NEW_VIEWPORT */
+ ccv.min_depth = ctx->Viewport.Near;
+ ccv.max_depth = ctx->Viewport.Far;
dri_bo_unreference(brw->cc.vp_bo);
brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
@@ -52,7 +54,7 @@ static void prepare_cc_vp( struct brw_context *brw )
const struct brw_tracked_state brw_cc_vp = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_VIEWPORT,
.brw = BRW_NEW_CONTEXT,
.cache = 0
},
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 5762c9577c..234b3744bf 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -43,11 +43,14 @@ struct brw_clip_unit_key {
unsigned int curbe_offset;
unsigned int nr_urb_entries, urb_size;
+
+ GLboolean depth_clamp;
};
static void
clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
{
+ GLcontext *ctx = &brw->intel.ctx;
memset(key, 0, sizeof(*key));
/* CACHE_NEW_CLIP_PROG */
@@ -62,6 +65,9 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
/* BRW_NEW_URB_FENCE */
key->nr_urb_entries = brw->urb.nr_clip_entries;
key->urb_size = brw->urb.vsize;
+
+ /* _NEW_TRANSOFORM */
+ key->depth_clamp = ctx->Transform.DepthClamp;
}
static dri_bo *
@@ -117,7 +123,8 @@ clip_unit_create_from_key(struct brw_context *brw,
clip.clip5.userclip_enable_flags = 0x7f;
clip.clip5.userclip_must_clip = 1;
clip.clip5.guard_band_enable = 0;
- clip.clip5.viewport_z_clip_enable = 1;
+ if (!key->depth_clamp)
+ clip.clip5.viewport_z_clip_enable = 1;
clip.clip5.viewport_xy_clip_enable = 1;
clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
clip.clip5.api_mode = BRW_CLIP_API_OGL;
@@ -168,7 +175,7 @@ static void upload_clip_unit( struct brw_context *brw )
const struct brw_tracked_state brw_clip_unit = {
.dirty = {
- .mesa = 0,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_URB_FENCE),
.cache = CACHE_NEW_CLIP_PROG
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index a5209ac41b..fa3e32c7ff 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -705,10 +705,6 @@ void brw_debug_batch(struct intel_context *intel);
/*======================================================================
* brw_tex.c
*/
-void brwUpdateTextureState( struct intel_context *intel );
-void brw_FrameBufferTexInit( struct brw_context *brw,
- struct intel_region *region );
-void brw_FrameBufferTexDestroy( struct brw_context *brw );
void brw_validate_textures( struct brw_context *brw );
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index c53bd47bb5..44bb7bd588 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -25,13 +25,15 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
#include "main/context.h"
#include "main/state.h"
-#include "main/api_validate.h"
#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
#include "brw_draw.h"
#include "brw_defines.h"
@@ -42,11 +44,6 @@
#include "intel_batchbuffer.h"
#include "intel_buffer_objects.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo_context.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-
#define FILE_DEBUG_FLAG DEBUG_BATCH
static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
@@ -145,7 +142,7 @@ static void brw_emit_prim(struct brw_context *brw,
prim_packet.start_vert_location += brw->ib.start_vertex_offset;
prim_packet.instance_count = 1;
prim_packet.start_instance_location = 0;
- prim_packet.base_vert_location = 0;
+ prim_packet.base_vert_location = prim->basevertex;
/* Can't wrap here, since we rely on the validated state. */
brw->no_batch_wrap = GL_TRUE;
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 4aa17fa02d..765ae5a2fe 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -25,9 +25,9 @@
*
**************************************************************************/
-#include <stdlib.h>
#include "main/glheader.h"
+#include "main/bufferobj.h"
#include "main/context.h"
#include "main/state.h"
#include "main/api_validate.h"
@@ -384,7 +384,7 @@ static void brw_prepare_vertices(struct brw_context *brw)
input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
- if (input->glarray->BufferObj->Name != 0) {
+ if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
struct intel_buffer_object *intel_buffer =
intel_buffer_object(input->glarray->BufferObj);
@@ -623,7 +623,7 @@ static void brw_prepare_indices(struct brw_context *brw)
/* Turn into a proper VBO:
*/
- if (!bufferobj->Name) {
+ if (!_mesa_is_bufferobj(bufferobj)) {
brw->ib.start_vertex_offset = 0;
/* Get new bufferobj, offset:
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
index a761c03153..ed9d2ffe60 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -93,7 +93,10 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
gs.thread4.nr_urb_entries = key->nr_urb_entries;
gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
- gs.thread4.max_threads = 0; /* Hardware requirement */
+ if (key->nr_urb_entries >= 8)
+ gs.thread4.max_threads = 1;
+ else
+ gs.thread4.max_threads = 0;
if (BRW_IS_IGDNG(brw))
gs.thread4.rendering_enable = 1;
diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c
index 71bff166dd..e911b105b2 100644
--- a/src/mesa/drivers/dri/i965/brw_tex.c
+++ b/src/mesa/drivers/dri/i965/brw_tex.c
@@ -39,38 +39,6 @@
#include "intel_tex.h"
#include "brw_context.h"
-
-void brw_FrameBufferTexInit( struct brw_context *brw,
- struct intel_region *region )
-{
- struct intel_context *intel = &brw->intel;
- GLcontext *ctx = &intel->ctx;
- struct gl_texture_object *obj;
- struct gl_texture_image *img;
-
- intel->frame_buffer_texobj = obj =
- ctx->Driver.NewTextureObject( ctx, (GLuint) -1, GL_TEXTURE_2D );
-
- obj->MinFilter = GL_NEAREST;
- obj->MagFilter = GL_NEAREST;
-
- img = ctx->Driver.NewTextureImage( ctx );
-
- _mesa_init_teximage_fields( ctx, GL_TEXTURE_2D, img,
- region->pitch, region->height, 1, 0,
- region->cpp == 4 ? GL_RGBA : GL_RGB );
-
- _mesa_set_tex_image( obj, GL_TEXTURE_2D, 0, img );
-}
-
-void brw_FrameBufferTexDestroy( struct brw_context *brw )
-{
- if (brw->intel.frame_buffer_texobj != NULL)
- brw->intel.ctx.Driver.DeleteTexture( &brw->intel.ctx,
- brw->intel.frame_buffer_texobj );
- brw->intel.frame_buffer_texobj = NULL;
-}
-
/**
* Finalizes all textures, completing any rendering that needs to be done
* to prepare them.
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index 108e19cdbc..1638ef8111 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1208,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
0, /* response len */
eot, /* eot */
- 1, /* writes complete */
+ eot, /* writes complete */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
@@ -1270,9 +1270,27 @@ post_vs_emit( struct brw_vs_compile *c,
}
static uint32_t
-get_predicate(uint32_t swizzle)
+get_predicate(const struct prog_instruction *inst)
{
- switch (swizzle) {
+ if (inst->DstReg.CondMask == COND_TR)
+ return BRW_PREDICATE_NONE;
+
+ /* All of GLSL only produces predicates for COND_NE and one channel per
+ * vector. Fail badly if someone starts doing something else, as it might
+ * mean infinite looping or something.
+ *
+ * We'd like to support all the condition codes, but our hardware doesn't
+ * quite match the Mesa IR, which is modeled after the NV extensions. For
+ * those, the instruction may update the condition codes or not, then any
+ * later instruction may use one of those condition codes. For gen4, the
+ * instruction may update the flags register based on one of the condition
+ * codes output by the instruction, and then further instructions may
+ * predicate on that. We can probably support this, but it won't
+ * necessarily be easy.
+ */
+ assert(inst->DstReg.CondMask == COND_NE);
+
+ switch (inst->DstReg.CondSwizzle) {
case SWIZZLE_XXXX:
return BRW_PREDICATE_ALIGN16_REPLICATE_X;
case SWIZZLE_YYYY:
@@ -1282,7 +1300,8 @@ get_predicate(uint32_t swizzle)
case SWIZZLE_WWWW:
return BRW_PREDICATE_ALIGN16_REPLICATE_W;
default:
- _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", swizzle);
+ _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+ inst->DstReg.CondMask);
return BRW_PREDICATE_NORMAL;
}
}
@@ -1294,6 +1313,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
GLuint end_offset = 0;
@@ -1492,8 +1512,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
- if_inst[if_depth]->header.predicate_control =
- get_predicate(inst->DstReg.CondSwizzle);
+ /* Note that brw_IF smashes the predicate_control field. */
+ if_inst[if_depth]->header.predicate_control = get_predicate(inst);
if_depth++;
break;
case OPCODE_ELSE:
@@ -1503,45 +1523,48 @@ void brw_vs_emit(struct brw_vs_compile *c )
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
break;
-#if 0
case OPCODE_BGNLOOP:
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
break;
case OPCODE_BRK:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CONT:
+ brw_set_predicate_control(p, get_predicate(inst));
brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
{
struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
loop_depth--;
+
+ if (BRW_IS_IGDNG(brw))
+ br = 2;
+
inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
/* patch all the BREAK/CONT instructions from last BEGINLOOP */
while (inst0 > loop_inst[loop_depth]) {
inst0--;
if (inst0->header.opcode == BRW_OPCODE_BREAK) {
- inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
inst0->bits3.if_else.pop_count = 0;
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
- inst0->bits3.if_else.jump_count = inst1 - inst0;
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
inst0->bits3.if_else.pop_count = 0;
}
}
}
break;
-#else
- (void) loop_inst;
- (void) loop_depth;
-#endif
case OPCODE_BRA:
- brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ brw_set_predicate_control(p, get_predicate(inst));
brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
- brw_set_predicate_control_flag_value(p, 0xff);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CAL:
brw_set_access_mode(p, BRW_ALIGN_1);
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index ac11790151..124fde25fe 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -69,8 +69,6 @@ static void brw_destroy_context( struct intel_context *intel )
_mesa_free(brw->wm.compile_data);
- brw_FrameBufferTexDestroy( brw );
-
for (i = 0; i < brw->state.nr_color_regions; i++)
intel_region_release(&brw->state.color_regions[i]);
brw->state.nr_color_regions = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index ae98b5492d..872b1f3ecf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -38,6 +38,8 @@
#include "brw_context.h"
#include "brw_eu.h"
+#define SATURATE (1<<5)
+
/* A big lookup table is used to figure out which and how many
* additional regs will inserted before the main payload in the WM
* program execution. These mainly relate to depth and stencil
@@ -203,7 +205,6 @@ struct brw_wm_compile {
GLuint fp_temp;
GLuint fp_interp_emitted;
GLuint fp_fragcolor_emitted;
- GLuint fp_deriv_emitted;
struct prog_src_register pixel_xy;
struct prog_src_register delta_xy;
@@ -299,5 +300,10 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index 268f7965c0..bf80a2942a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,8 +34,6 @@
#include "brw_context.h"
#include "brw_wm.h"
-#define SATURATE (1<<5)
-
/* Not quite sure how correct this is - need to understand horiz
* vs. vertical strides a little better.
*/
@@ -281,6 +279,79 @@ static void emit_frontfacing( struct brw_compile *p,
brw_set_predicate_control_flag_value(p, 0xff);
}
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ * DDX DDY
+ * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
+ * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
+ * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
+ * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
+ * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
+ * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
+ * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
+ * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other. We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ GLboolean is_ddx,
+ const struct brw_reg *arg0)
+{
+ int i;
+ struct brw_reg src0, src1;
+
+ if (mask & SATURATE)
+ brw_set_saturate(p, 1);
+ for (i = 0; i < 4; i++ ) {
+ if (mask & (1<<i)) {
+ if (is_ddx) {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ } else {
+ src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ }
+ brw_ADD(p, dst[i], src0, negate(src1));
+ }
+ }
+ if (mask & SATURATE)
+ brw_set_saturate(p, 0);
+}
+
static void emit_alu1( struct brw_compile *p,
struct brw_instruction *(*func)(struct brw_compile *,
struct brw_reg,
@@ -908,6 +979,20 @@ static void emit_kil( struct brw_wm_compile *c,
}
}
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+ struct brw_compile *p = &c->func;
+ struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+ brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+ brw_pop_insn_state(p);
+}
static void fire_fb_write( struct brw_wm_compile *c,
GLuint base_reg,
@@ -1258,6 +1343,14 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
break;
+ case OPCODE_DDX:
+ emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+ break;
+
+ case OPCODE_DDY:
+ emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1387,6 +1480,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_kil(c, args[0]);
break;
+ case OPCODE_KIL_NV:
+ emit_kil_nv(c);
+ break;
+
default:
_mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
inst->opcode, inst->opcode < MAX_OPCODE ?
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 123fe841c3..4e3edfbbff 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -494,38 +494,6 @@ static void emit_interp( struct brw_wm_compile *c,
c->fp_interp_emitted |= 1<<idx;
}
-static void emit_ddx( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDX,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
-static void emit_ddy( struct brw_wm_compile *c,
- const struct prog_instruction *inst )
-{
- GLuint idx = inst->SrcReg[0].Index;
- struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-
- c->fp_deriv_emitted |= 1<<idx;
- emit_op(c,
- OPCODE_DDY,
- inst->DstReg,
- 0,
- interp,
- get_pixel_w(c),
- src_undef());
-}
-
/***********************************************************************
* Hacks to extend the program parameter and constant lists.
*/
@@ -1186,12 +1154,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
*/
out->DstReg.WriteMask = 0;
break;
- case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
- case OPCODE_DDY:
- emit_ddy(c, inst);
- break;
case OPCODE_END:
emit_fb_write(c);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 7c210abbce..c9fe1dd8ad 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -22,6 +22,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
{
int i;
+
for (i = 0; i < fp->Base.NumInstructions; i++) {
const struct prog_instruction *inst = &fp->Base.Instructions[i];
switch (inst->Opcode) {
@@ -31,8 +32,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
case OPCODE_CAL:
case OPCODE_BRK:
case OPCODE_RET:
- case OPCODE_DDX:
- case OPCODE_DDY:
case OPCODE_NOISE1:
case OPCODE_NOISE2:
case OPCODE_NOISE3:
@@ -293,7 +292,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
int i, j;
struct brw_reg reg;
int urb_read_length = 0;
- GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
+ GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
GLuint reg_index = 0;
memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
@@ -1474,61 +1473,6 @@ static void emit_sne(struct brw_wm_compile *c,
emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
}
-static void emit_ddx(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
- src0 = get_src_reg(c, inst, 0, 0);
- w = get_src_reg(c, inst, 1, 3);
- nr = src0.nr;
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, interp[i]);
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
-static void emit_ddy(struct brw_wm_compile *c,
- const struct prog_instruction *inst)
-{
- struct brw_compile *p = &c->func;
- GLuint mask = inst->DstReg.WriteMask;
- struct brw_reg interp[4];
- struct brw_reg dst;
- struct brw_reg src0, w;
- GLuint nr, i;
-
- src0 = get_src_reg(c, inst, 0, 0);
- nr = src0.nr;
- w = get_src_reg(c, inst, 1, 3);
- interp[0] = brw_vec1_grf(nr, 0);
- interp[1] = brw_vec1_grf(nr, 4);
- interp[2] = brw_vec1_grf(nr+1, 0);
- interp[3] = brw_vec1_grf(nr+1, 4);
- brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
- for(i = 0; i < 4; i++ ) {
- if (mask & (1<<i)) {
- dst = get_dst_reg(c, inst, i);
- brw_MOV(p, dst, suboffset(interp[i], 1));
- brw_MUL(p, dst, dst, w);
- }
- }
- brw_set_saturate(p, 0);
-}
-
static INLINE struct brw_reg high_words( struct brw_reg reg )
{
return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
@@ -2780,6 +2724,21 @@ static void post_wm_emit( struct brw_wm_compile *c )
brw_resolve_cals(&c->func);
}
+static void
+get_argument_regs(struct brw_wm_compile *c,
+ const struct prog_instruction *inst,
+ int index,
+ struct brw_reg *regs,
+ int mask)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1 << i))
+ regs[i] = get_src_reg(c, inst, index, i);
+ }
+}
+
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
#define MAX_IF_DEPTH 32
@@ -2797,6 +2756,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
+ int dst_flags;
+ struct brw_reg args[3][4], dst[4];
+ int j;
c->cur_inst = i;
@@ -2814,6 +2776,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
else
brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+ dst_flags = inst->DstReg.WriteMask;
+ if (inst->SaturateMode == SATURATE_ZERO_ONE)
+ dst_flags |= SATURATE;
+
switch (inst->Opcode) {
case WM_PIXELXY:
emit_pixel_xy(c, inst);
@@ -2899,10 +2865,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_min_max(c, inst);
break;
case OPCODE_DDX:
- emit_ddx(c, inst);
- break;
case OPCODE_DDY:
- emit_ddy(c, inst);
+ for (j = 0; j < 4; j++) {
+ if (inst->DstReg.WriteMask & (1 << j))
+ dst[j] = get_dst_reg(c, inst, j);
+ else
+ dst[j] = brw_null_reg();
+ }
+ get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+ emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+ args[0]);
break;
case OPCODE_SLT:
emit_slt(c, inst);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index 3436a24717..b449394029 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -178,6 +178,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
read1 = writemask;
break;
+ case OPCODE_DDX:
+ case OPCODE_DDY:
+ read0 = writemask;
+ break;
+
case OPCODE_MAD:
case OPCODE_CMP:
case OPCODE_LRP:
@@ -270,6 +275,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_DST:
case WM_FRONTFACING:
+ case OPCODE_KIL_NV:
default:
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 39f8c6d522..361f91292b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -107,6 +107,12 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
/* as far as we can tell */
key->computes_depth =
(fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+ /* BRW_NEW_DEPTH_BUFFER
+ * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+ * Depth field.
+ */
+ if (brw->state.depth_region == NULL)
+ key->computes_depth = 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
@@ -300,6 +306,7 @@ const struct brw_tracked_state brw_wm_unit = {
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |
+ BRW_NEW_DEPTH_BUFFER |
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 3dcc592bde..9c28a22a29 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -545,15 +545,20 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
irb->texformat->MesaFormat);
}
key.tiling = region->tiling;
- key.width = region->width;
- key.height = region->height;
+ if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+ key.width = rb->Width;
+ key.height = rb->Height;
+ } else {
+ key.width = region->width;
+ key.height = region->height;
+ }
key.pitch = region->pitch;
key.cpp = region->cpp;
key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
} else {
key.surface_type = BRW_SURFACE_NULL;
key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
- key.tiling = 0;
+ key.tiling = I915_TILING_X;
key.width = 1;
key.height = 1;
key.cpp = 4;
@@ -655,7 +660,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
if (bind_bo == NULL) {
GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
- uint32_t *data = malloc(data_size);
+ uint32_t data[BRW_WM_MAX_SURF];
int i;
for (i = 0; i < brw->wm.nr_surfaces; i++)
@@ -680,8 +685,6 @@ brw_wm_get_binding_table(struct brw_context *brw)
brw->wm.surf_bo[i]);
}
}
-
- free(data);
}
return bind_bo;
@@ -719,17 +722,8 @@ static void prepare_wm_surfaces(struct brw_context *brw )
/* _NEW_TEXTURE, BRW_NEW_TEXDATA */
if (texUnit->_ReallyEnabled) {
- if (texUnit->_Current == intel->frame_buffer_texobj) {
- /* render to texture */
- dri_bo_unreference(brw->wm.surf_bo[surf]);
- brw->wm.surf_bo[surf] = brw->wm.surf_bo[0];
- dri_bo_reference(brw->wm.surf_bo[surf]);
- brw->wm.nr_surfaces = surf + 1;
- } else {
- /* regular texture */
- brw_update_texture_surface(ctx, i);
- brw->wm.nr_surfaces = surf + 1;
- }
+ brw_update_texture_surface(ctx, i);
+ brw->wm.nr_surfaces = surf + 1;
} else {
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = NULL;
diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c
deleted file mode 120000
index 4c6b37ada0..0000000000
--- a/src/mesa/drivers/dri/i965/intel_generatemipmap.c
+++ /dev/null
@@ -1 +0,0 @@
-../intel/intel_generatemipmap.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 51579df09e..9a619fbd5c 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -157,7 +157,7 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \
- assert((delta) >= 0); \
+ assert((unsigned) (delta) <= buf->size); \
intel_batchbuffer_emit_reloc(intel->batch, buf, \
read_domains, write_domain, delta); \
} while (0)
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 0c5be4c798..9e114db6c7 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -26,9 +26,6 @@
**************************************************************************/
-#include <stdio.h>
-#include <errno.h>
-
#include "main/mtypes.h"
#include "main/context.h"
#include "main/enums.h"
@@ -374,8 +371,6 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
skipBuffers = BUFFER_BIT_STENCIL;
}
- /* XXX Move this flush/lock into the following conditional? */
- intelFlush(&intel->ctx);
LOCK_HARDWARE(intel);
intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
index 1b0e221789..fb62f0f430 100644
--- a/src/mesa/drivers/dri/intel/intel_clear.c
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -172,7 +172,7 @@ intelClear(GLcontext *ctx, GLbitfield mask)
DBG("\n");
}
- _mesa_meta_clear(&intel->ctx, tri_mask);
+ _mesa_meta_Clear(&intel->ctx, tri_mask);
}
if (swrast_mask) {
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index c49f06e44a..934f7aa187 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -588,11 +588,6 @@ intelInitDriverFunctions(struct dd_function_table *functions)
functions->GetString = intelGetString;
functions->UpdateState = intelInvalidateState;
- functions->CopyColorTable = _swrast_CopyColorTable;
- functions->CopyColorSubTable = _swrast_CopyColorSubTable;
- functions->CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
- functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
-
intelInitTextureFuncs(functions);
intelInitTextureImageFuncs(functions);
intelInitTextureSubImageFuncs(functions);
@@ -645,10 +640,6 @@ intelInitContext(struct intel_context *intel,
intel->maxBatchSize = BATCH_SZ;
intel->bufmgr = intelScreen->bufmgr;
-
- if (0) /* for debug */
- drm_intel_bufmgr_set_debug(intel->bufmgr, 1);
-
intel->ttm = intelScreen->ttm;
if (intel->ttm) {
int bo_reuse_mode;
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index 03e7cf39d6..b104096912 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -254,9 +254,6 @@ struct intel_context
intel_line_func draw_line;
intel_tri_func draw_tri;
- /* These refer to the current drawing buffer:
- */
- struct gl_texture_object *frame_buffer_texobj;
/**
* Set to true if a single constant cliprect should be used in the
* batchbuffer. Otherwise, cliprects must be calculated at batchbuffer
@@ -296,7 +293,6 @@ struct intel_context
GLboolean use_texture_tiling;
GLboolean use_early_z;
-
drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */
int perf_boxes;
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index 2e61c556d8..2eb08a8f05 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -31,6 +31,7 @@
#define need_GL_ARB_copy_buffer
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_framebuffer_object
#define need_GL_ARB_map_buffer_range
#define need_GL_ARB_occlusion_query
@@ -73,11 +74,15 @@
*/
static const struct dri_extension card_extensions[] = {
{ "GL_ARB_copy_buffer", GL_ARB_copy_buffer_functions },
+ { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{ "GL_ARB_half_float_pixel", NULL },
{ "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions },
{ "GL_ARB_multitexture", NULL },
{ "GL_ARB_point_parameters", GL_ARB_point_parameters_functions },
{ "GL_ARB_point_sprite", NULL },
+ { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
+ { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
{ "GL_ARB_sync", GL_ARB_sync_functions },
{ "GL_ARB_texture_border_clamp", NULL },
{ "GL_ARB_texture_cube_map", NULL },
@@ -89,6 +94,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_ARB_texture_rectangle", NULL },
{ "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions},
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
+ { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
{ "GL_ARB_window_pos", GL_ARB_window_pos_functions },
{ "GL_EXT_blend_color", GL_EXT_blend_color_functions },
{ "GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions },
@@ -139,6 +145,7 @@ static const struct dri_extension i915_extensions[] = {
/** i965-only extensions */
static const struct dri_extension brw_extensions[] = {
+ { "GL_ARB_depth_clamp", NULL },
{ "GL_ARB_depth_texture", NULL },
{ "GL_ARB_fragment_program", NULL },
{ "GL_ARB_fragment_program_shadow", NULL },
@@ -147,13 +154,9 @@ static const struct dri_extension brw_extensions[] = {
{ "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ "GL_ARB_point_sprite", NULL },
{ "GL_ARB_seamless_cube_map", NULL },
- { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
- { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
- { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
{ "GL_ARB_shadow", NULL },
{ "GL_MESA_texture_signed_rgba", NULL },
{ "GL_ARB_texture_non_power_of_two", NULL },
- { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions },
{ "GL_EXT_shadow_funcs", NULL },
{ "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_EXT_texture_sRGB", NULL },
@@ -168,6 +171,7 @@ static const struct dri_extension brw_extensions[] = {
static const struct dri_extension arb_oq_extensions[] = {
+ { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions },
{ NULL, NULL }
};
@@ -179,6 +183,10 @@ static const struct dri_extension ttm_extensions[] = {
{ NULL, NULL }
};
+static const struct dri_extension fragment_shader_extensions[] = {
+ { "GL_ARB_fragment_shader", NULL },
+ { NULL, NULL }
+};
/**
* Initializes potential list of extensions if ctx == NULL, or actually enables
@@ -202,6 +210,14 @@ intelInitExtensions(GLcontext *ctx, GLboolean enable_imaging)
driInitExtensions(ctx, brw_extensions, GL_FALSE);
if (intel == NULL || IS_915(intel->intelScreen->deviceID)
- || IS_945(intel->intelScreen->deviceID))
+ || IS_945(intel->intelScreen->deviceID)) {
driInitExtensions(ctx, i915_extensions, GL_FALSE);
+
+ if (intel == NULL || driQueryOptionb(&intel->optionCache, "fragment_shader"))
+ driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE);
+
+ if (intel == NULL || driQueryOptionb(&intel->optionCache,
+ "stub_occlusion_query"))
+ driInitExtensions(ctx, arb_oq_extensions, GL_FALSE);
+ }
}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 804c034840..8dfb24290d 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -715,5 +715,5 @@ intel_fbo_init(struct intel_context *intel)
intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
- intel->ctx.Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+ intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
}
diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c
deleted file mode 100644
index 12059e122c..0000000000
--- a/src/mesa/drivers/dri/intel/intel_generatemipmap.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- * Copyright © 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "main/glheader.h"
-#include "main/enums.h"
-#include "main/image.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/bufferobj.h"
-#include "main/teximage.h"
-#include "main/texenv.h"
-#include "main/texobj.h"
-#include "main/texstate.h"
-#include "main/texparam.h"
-#include "main/varray.h"
-#include "main/attrib.h"
-#include "main/enable.h"
-#include "main/buffers.h"
-#include "main/fbobject.h"
-#include "main/framebuffer.h"
-#include "main/renderbuffer.h"
-#include "main/depth.h"
-#include "main/hash.h"
-#include "main/mipmap.h"
-#include "main/blend.h"
-#include "glapi/dispatch.h"
-#include "swrast/swrast.h"
-
-#include "intel_screen.h"
-#include "intel_context.h"
-#include "intel_batchbuffer.h"
-#include "intel_pixel.h"
-#include "intel_tex.h"
-#include "intel_mipmap_tree.h"
-
-static const char *intel_fp_tex2d =
- "!!ARBfp1.0\n"
- "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n"
- "END\n";
-
-static GLboolean
-intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name,
- int level, int width, int height)
-{
- struct intel_context *intel = intel_context(ctx);
- GLfloat vertices[4][2];
- GLint status;
-
- /* Set to source from the previous level */
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1);
-
- /* Set to draw into the current level */
- _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
- GL_COLOR_ATTACHMENT0_EXT,
- GL_TEXTURE_2D,
- tex_name,
- level);
- /* Choose to render to the color attachment. */
- _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
-
- status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT);
- if (status != GL_FRAMEBUFFER_COMPLETE_EXT)
- return GL_FALSE;
-
- meta_set_passthrough_transform(&intel->meta);
-
- /* XXX: Doing it right would involve setting up the transformation to do
- * 0-1 mapping or something, and not changing the vertex data.
- */
- vertices[0][0] = 0;
- vertices[0][1] = 0;
- vertices[1][0] = width;
- vertices[1][1] = 0;
- vertices[2][0] = width;
- vertices[2][1] = height;
- vertices[3][0] = 0;
- vertices[3][1] = height;
-
- _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
- _mesa_Enable(GL_VERTEX_ARRAY);
- meta_set_default_texrect(&intel->meta);
-
- _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
-
- meta_restore_texcoords(&intel->meta);
- meta_restore_transform(&intel->meta);
-
- return GL_TRUE;
-}
-
-static GLboolean
-intel_generate_mipmap_2d(GLcontext *ctx,
- GLenum target,
- struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- GLint old_active_texture;
- int level, max_levels, start_level, end_level;
- GLuint fb_name;
- GLboolean success = GL_FALSE;
- struct gl_framebuffer *saved_fbo = NULL;
- struct gl_buffer_object *saved_array_buffer = NULL;
- struct gl_buffer_object *saved_element_buffer = NULL;
-
- _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
- GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT |
- GL_DEPTH_BUFFER_BIT);
- _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
- old_active_texture = ctx->Texture.CurrentUnit;
- _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer);
-
- /* use default array/index buffers */
- _mesa_reference_buffer_object(ctx, &saved_array_buffer,
- ctx->Array.ArrayBufferObj);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
- ctx->Shared->NullBufferObj);
- _mesa_reference_buffer_object(ctx, &saved_element_buffer,
- ctx->Array.ElementArrayBufferObj);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
- ctx->Shared->NullBufferObj);
-
- _mesa_Disable(GL_POLYGON_STIPPLE);
- _mesa_Disable(GL_DEPTH_TEST);
- _mesa_Disable(GL_STENCIL_TEST);
- _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
- _mesa_DepthMask(GL_FALSE);
-
- /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our
- * minification.
- */
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
- _mesa_Enable(GL_TEXTURE_2D);
- _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
- GL_LINEAR_MIPMAP_NEAREST);
- _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-
- /* Bind the new renderbuffer to the color attachment point. */
- _mesa_GenFramebuffersEXT(1, &fb_name);
- _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
-
- meta_set_fragment_program(&intel->meta, &intel->meta.tex2d_fp,
- intel_fp_tex2d);
- meta_set_passthrough_vertex_program(&intel->meta);
-
- max_levels = _mesa_max_texture_levels(ctx, texObj->Target);
- start_level = texObj->BaseLevel;
- end_level = texObj->MaxLevel;
-
- /* Loop generating level+1 from level. */
- for (level = start_level; level < end_level && level < max_levels - 1; level++) {
- const struct gl_texture_image *srcImage;
- int width, height;
-
- srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
- if (srcImage->Border != 0)
- goto fail;
-
- width = srcImage->Width / 2;
- if (width < 1)
- width = 1;
- height = srcImage->Height / 2;
- if (height < 1)
- height = 1;
-
- if (width == srcImage->Width &&
- height == srcImage->Height) {
- /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the
- * maximum texture level for the object, so break out when we've gone
- * over the edge.
- */
- break;
- }
-
- /* Make sure that there's space allocated for the target level.
- * We could skip this if there's already space allocated and save some
- * time.
- */
- _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat,
- width, height, 0,
- GL_RGBA, GL_UNSIGNED_INT, NULL);
-
- if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1,
- width, height))
- goto fail;
- }
-
- success = GL_TRUE;
-
-fail:
- meta_restore_fragment_program(&intel->meta);
- meta_restore_vertex_program(&intel->meta);
-
- /* restore array/index buffers */
- _mesa_reference_buffer_object(ctx, &ctx->Array.ArrayBufferObj,
- saved_array_buffer);
- _mesa_reference_buffer_object(ctx, &saved_array_buffer, NULL);
- _mesa_reference_buffer_object(ctx, &ctx->Array.ElementArrayBufferObj,
- saved_element_buffer);
- _mesa_reference_buffer_object(ctx, &saved_element_buffer, NULL);
-
-
- _mesa_DeleteFramebuffersEXT(1, &fb_name);
- _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
- if (saved_fbo)
- _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name);
- _mesa_reference_framebuffer(&saved_fbo, NULL);
- _mesa_PopClientAttrib();
- _mesa_PopAttrib();
-
- return success;
-}
-
-
-/**
- * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap
- * level).
- *
- * The texture object's miptree must be mapped.
- *
- * It would be really nice if this was just called by Mesa whenever mipmaps
- * needed to be regenerated, rather than us having to remember to do so in
- * each texture image modification path.
- *
- * This function should also include an accelerated path.
- */
-void
-intel_generate_mipmap(GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct intel_context *intel = intel_context(ctx);
- struct intel_texture_object *intelObj = intel_texture_object(texObj);
- GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
- int face, i;
-
- /* HW path */
- if (target == GL_TEXTURE_2D &&
- ctx->Extensions.EXT_framebuffer_object &&
- ctx->Extensions.ARB_fragment_program &&
- ctx->Extensions.ARB_vertex_program) {
- GLboolean success;
-
- /* We'll be accessing this texture using GL entrypoints, which should
- * be resilient against other access to this texture.
- */
- _mesa_unlock_texture(ctx, texObj);
- success = intel_generate_mipmap_2d(ctx, target, texObj);
- _mesa_lock_texture(ctx, texObj);
-
- if (success)
- return;
- }
-
- /* SW path */
- intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
- _mesa_generate_mipmap(ctx, target, texObj);
- intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
-
- /* Update the level information in our private data in the new images, since
- * it didn't get set as part of a normal TexImage path.
- */
- for (face = 0; face < nr_faces; face++) {
- for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
- struct intel_texture_image *intelImage;
-
- intelImage = intel_texture_image(texObj->Image[face][i]);
- if (intelImage == NULL)
- break;
-
- intelImage->level = i;
- intelImage->face = face;
- /* Unreference the miptree to signal that the new Data is a bare
- * pointer from mesa.
- */
- intel_miptree_release(intel, &intelImage->mt);
- }
- }
-}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
index a300141655..993e427a99 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -129,20 +129,6 @@ intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
return GL_TRUE;
}
-
-GLboolean
-intel_check_meta_tex_fragment_ops(GLcontext * ctx)
-{
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- /* Some of _ImageTransferState (scale, bias) could be done with
- * fragment programs on i915.
- */
- return !(ctx->_ImageTransferState || ctx->Fog.Enabled || /* not done yet */
- ctx->Texture._EnabledUnits || ctx->FragmentProgram._Enabled);
-}
-
/* The intel_region struct doesn't really do enough to capture the
* format of the pixels in the region. For now this code assumes that
* the region is a display surface and hence is either ARGB8888 or
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
index 96a6dd17b2..743b6497c5 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel.h
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
@@ -34,8 +34,6 @@ void intelInitPixelFuncs(struct dd_function_table *functions);
GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
GLboolean src_alpha_is_one);
-GLboolean intel_check_meta_tex_fragment_ops(GLcontext * ctx);
-
GLboolean intel_check_blit_format(struct intel_region *region,
GLenum format, GLenum type);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index b543a0bbc3..9a0bcc07a5 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -209,7 +209,7 @@ do_blit_bitmap( GLcontext *ctx,
if (!dst)
return GL_FALSE;
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return GL_TRUE; /* even though this is an error, we're done */
@@ -329,7 +329,7 @@ out:
if (INTEL_DEBUG & DEBUG_SYNC)
intel_batchbuffer_flush(intel->batch);
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
unpack->BufferObj);
@@ -418,7 +418,7 @@ intel_texture_bitmap(GLcontext * ctx,
return GL_FALSE;
}
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
bitmap = map_pbo(ctx, width, height, unpack, bitmap);
if (bitmap == NULL)
return GL_TRUE; /* even though this is an error, we're done */
@@ -428,7 +428,7 @@ intel_texture_bitmap(GLcontext * ctx,
a8_bitmap = _mesa_calloc(width * height);
_mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
- if (unpack->BufferObj->Name) {
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
/* done with PBO so unmap it now */
ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
unpack->BufferObj);
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
index 07ca8f7ddb..f058b3c8e4 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -240,5 +240,5 @@ intelCopyPixels(GLcontext * ctx,
return;
/* this will use swrast if needed */
- _mesa_meta_copy_pixels(ctx, srcx, srcy, width, height, destx, desty, type);
+ _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
index 7fbb89fd6a..5ffa847fd4 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -54,7 +54,7 @@
#include "intel_fbo.h"
-/** XXX compare perf of this vs. _mesa_meta_draw_pixels(STENCIL) */
+/** XXX compare perf of this vs. _mesa_meta_DrawPixels(STENCIL) */
static GLboolean
intel_stencil_drawpixels(GLcontext * ctx,
GLint x, GLint y,
@@ -265,7 +265,7 @@ intelDrawPixels(GLcontext * ctx,
/* XXX this function doesn't seem to work reliably even when all
* the pre-requisite conditions are met.
* Note that this function is never hit with conform.
- * Fall back to swrast because even the _mesa_meta_draw_pixels() approach
+ * Fall back to swrast because even the _mesa_meta_DrawPixels() approach
* isn't working because of an apparent stencil bug.
*/
if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
@@ -280,6 +280,6 @@ intelDrawPixels(GLcontext * ctx,
}
#endif
- _mesa_meta_draw_pixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
+ _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c
index e036736323..4707500180 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -180,16 +180,7 @@ do_blit_readpixels(GLcontext * ctx,
if (!src)
return GL_FALSE;
- if (pack->BufferObj->Name) {
- /* XXX This validation should be done by core mesa:
- */
- if (!_mesa_validate_pbo_access(2, pack, width, height, 1,
- format, type, pixels)) {
- _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels");
- return GL_TRUE;
- }
- }
- else {
+ if (!_mesa_is_bufferobj(pack->BufferObj)) {
/* PBO only for now:
*/
if (INTEL_DEBUG & DEBUG_PIXEL)
@@ -225,9 +216,8 @@ do_blit_readpixels(GLcontext * ctx,
rowLength = -rowLength;
}
- /* XXX 64-bit cast? */
- dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height,
- format, type, 0, 0, 0);
+ dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height,
+ format, type, 0, 0, 0);
/* Although the blits go on the command buffer, need to do this and
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 1b8c56e68d..24f7fbc992 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -79,6 +79,10 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
DRI_CONF_OPT_END
+ DRI_CONF_OPT_BEGIN(fragment_shader, bool, false)
+ DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.")
+ DRI_CONF_OPT_END
+
DRI_CONF_SECTION_END
DRI_CONF_SECTION_QUALITY
DRI_CONF_FORCE_S3TC_ENABLE(false)
@@ -88,10 +92,14 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_NO_RAST(false)
DRI_CONF_ALWAYS_FLUSH_BATCH(false)
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
+
+ DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false)
+ DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.")
+ DRI_CONF_OPT_END
DRI_CONF_SECTION_END
DRI_CONF_END;
-const GLuint __driNConfigOptions = 10;
+const GLuint __driNConfigOptions = 12;
#ifdef USE_NEW_INTERFACE
static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
index 8df4990880..dcfcad1d95 100644
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -444,23 +444,30 @@ intel_renderbuffer_unmap(struct intel_context *intel,
* _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields.
*/
static void
-intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
+intel_map_unmap_framebuffer(struct intel_context *intel,
+ struct gl_framebuffer *fb,
+ GLboolean map)
{
- GLcontext *ctx = &intel->ctx;
- GLuint i, j;
+ GLuint i;
/* color draw buffers */
- for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) {
+ for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
if (map)
- intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+ intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]);
else
- intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+ intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]);
}
+ /* color read buffer */
+ if (map)
+ intel_renderbuffer_map(intel, fb->_ColorReadBuffer);
+ else
+ intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer);
+
/* check for render to textures */
for (i = 0; i < BUFFER_COUNT; i++) {
struct gl_renderbuffer_attachment *att =
- ctx->DrawBuffer->Attachment + i;
+ fb->Attachment + i;
struct gl_texture_object *tex = att->Texture;
if (tex) {
/* render to texture */
@@ -472,34 +479,23 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
}
}
- /* color read buffers */
- if (map)
- intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer);
- else
- intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer);
-
/* depth buffer (Note wrapper!) */
- if (ctx->DrawBuffer->_DepthBuffer) {
+ if (fb->_DepthBuffer) {
if (map)
- intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped);
+ intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped);
else
- intel_renderbuffer_unmap(intel,
- ctx->DrawBuffer->_DepthBuffer->Wrapped);
+ intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped);
}
/* stencil buffer (Note wrapper!) */
- if (ctx->DrawBuffer->_StencilBuffer) {
+ if (fb->_StencilBuffer) {
if (map)
- intel_renderbuffer_map(intel,
- ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped);
else
- intel_renderbuffer_unmap(intel,
- ctx->DrawBuffer->_StencilBuffer->Wrapped);
+ intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped);
}
}
-
-
/**
* Prepare for software rendering. Map current read/draw framebuffers'
* renderbuffes and all currently bound texture objects.
@@ -522,7 +518,9 @@ intelSpanRenderStart(GLcontext * ctx)
}
}
- intel_map_unmap_buffers(intel, GL_TRUE);
+ intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE);
+ if (ctx->ReadBuffer != ctx->DrawBuffer)
+ intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE);
}
/**
@@ -544,7 +542,9 @@ intelSpanRenderFinish(GLcontext * ctx)
}
}
- intel_map_unmap_buffers(intel, GL_FALSE);
+ intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE);
+ if (ctx->ReadBuffer != ctx->DrawBuffer)
+ intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE);
UNLOCK_HARDWARE(intel);
}
@@ -558,6 +558,43 @@ intelInitSpanFuncs(GLcontext * ctx)
swdd->SpanRenderFinish = intelSpanRenderFinish;
}
+void
+intel_map_vertex_shader_textures(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ int i;
+
+ if (ctx->VertexProgram._Current == NULL)
+ return;
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled &&
+ ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+ intel_tex_map_images(intel, intel_texture_object(texObj));
+ }
+ }
+}
+
+void
+intel_unmap_vertex_shader_textures(GLcontext *ctx)
+{
+ struct intel_context *intel = intel_context(ctx);
+ int i;
+
+ if (ctx->VertexProgram._Current == NULL)
+ return;
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled &&
+ ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+ struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+ intel_tex_unmap_images(intel, intel_texture_object(texObj));
+ }
+ }
+}
/**
* Plug in appropriate span read/write functions for the given renderbuffer.
@@ -711,6 +748,9 @@ intel_set_span_functions(struct intel_context *intel,
intel_YTile_InitStencilPointers_z24_s8(rb);
break;
}
+ } else {
+ _mesa_problem(NULL,
+ "Unexpected ActualFormat in intelSetSpanFunctions");
}
break;
default:
diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h
index acbeb4abe1..bffe109aa5 100644
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -36,5 +36,7 @@ void intel_renderbuffer_map(struct intel_context *intel,
struct gl_renderbuffer *rb);
void intel_renderbuffer_unmap(struct intel_context *intel,
struct gl_renderbuffer *rb);
+void intel_map_vertex_shader_textures(GLcontext *ctx);
+void intel_unmap_vertex_shader_textures(GLcontext *ctx);
#endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
index df63f29a42..3cbc379dbd 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.c
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -2,6 +2,7 @@
#include "main/texobj.h"
#include "main/teximage.h"
#include "main/mipmap.h"
+#include "drivers/common/meta.h"
#include "intel_context.h"
#include "intel_mipmap_tree.h"
#include "intel_tex.h"
@@ -158,11 +159,36 @@ timed_memcpy(void *dest, const void *src, size_t n)
}
#endif /* DO_DEBUG */
+
+/**
+ * Called via ctx->Driver.GenerateMipmap()
+ * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks
+ * if we'll be using software mipmap generation. In that case, we need to
+ * map/unmap the base level texture image.
+ */
+static void
+intelGenerateMipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+ /* sw path: need to map texture images */
+ struct intel_context *intel = intel_context(ctx);
+ struct intel_texture_object *intelObj = intel_texture_object(texObj);
+ intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+ _mesa_generate_mipmap(ctx, target, texObj);
+ intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+ }
+ else {
+ _mesa_meta_GenerateMipmap(ctx, target, texObj);
+ }
+}
+
+
void
intelInitTextureFuncs(struct dd_function_table *functions)
{
functions->ChooseTextureFormat = intelChooseTextureFormat;
- functions->GenerateMipmap = intel_generate_mipmap;
+ functions->GenerateMipmap = intelGenerateMipmap;
functions->NewTextureObject = intelNewTextureObject;
functions->NewTextureImage = intelNewTextureImage;
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index 471aa2a240..57ed0b1aab 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -71,7 +71,4 @@ void intel_tex_unmap_images(struct intel_context *intel,
int intel_compressed_num_bytes(GLuint mesaFormat);
-void intel_generate_mipmap(GLcontext *ctx, GLenum target,
- struct gl_texture_object *texObj);
-
#endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index b241c11625..f3e312420d 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -30,7 +30,8 @@
#include "main/image.h"
#include "main/teximage.h"
#include "main/mipmap.h"
-#include "swrast/swrast.h"
+
+#include "drivers/common/meta.h"
#include "intel_screen.h"
#include "intel_context.h"
@@ -91,7 +92,6 @@ do_copy_texsubimage(struct intel_context *intel,
GLint x, GLint y, GLsizei width, GLsizei height)
{
GLcontext *ctx = &intel->ctx;
- struct gl_texture_object *texObj = intelImage->base.TexObject;
const struct intel_region *src =
get_teximage_source(intel, internalFormat);
@@ -171,11 +171,6 @@ do_copy_texsubimage(struct intel_context *intel,
UNLOCK_HARDWARE(intel);
- /* GL_SGIS_generate_mipmap */
- if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
-
return GL_TRUE;
}
@@ -222,8 +217,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
return;
fail:
- _swrast_copy_teximage1d(ctx, target, level, internalFormat, x, y,
- width, border);
+ _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
+ width, border);
}
@@ -270,8 +265,8 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
return;
fail:
- _swrast_copy_teximage2d(ctx, target, level, internalFormat, x, y,
- width, height, border);
+ _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
+ width, height, border);
}
@@ -295,7 +290,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
if (!do_copy_texsubimage(intel_context(ctx), target,
intel_texture_image(texImage),
internalFormat, xoffset, 0, x, y, width, 1)) {
- _swrast_copy_texsubimage1d(ctx, target, level, xoffset, x, y, width);
+ _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width);
}
}
@@ -321,10 +316,10 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
internalFormat,
xoffset, yoffset, x, y, width, height)) {
- DBG("%s - fallback to swrast\n", __FUNCTION__);
+ DBG("%s - fallback to _mesa_meta_CopyTexSubImage2D\n", __FUNCTION__);
- _swrast_copy_texsubimage2d(ctx, target, level,
- xoffset, yoffset, x, y, width, height);
+ _mesa_meta_CopyTexSubImage2D(ctx, target, level,
+ xoffset, yoffset, x, y, width, height);
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
index c5f5220837..66201b1f46 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -1,15 +1,11 @@
-#include <stdlib.h>
-#include <stdio.h>
-
#include "main/glheader.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/enums.h"
-#include "main/colortab.h"
+#include "main/bufferobj.h"
#include "main/convolve.h"
#include "main/context.h"
-#include "main/simple_list.h"
#include "main/texcompress.h"
#include "main/texformat.h"
#include "main/texgetimage.h"
@@ -206,7 +202,7 @@ try_pbo_upload(struct intel_context *intel,
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
- if (unpack->BufferObj->Name == 0 ||
+ if (!_mesa_is_bufferobj(unpack->BufferObj) ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -264,7 +260,7 @@ try_pbo_zcopy(struct intel_context *intel,
GLuint src_offset, src_stride;
GLuint dst_offset, dst_stride;
- if (unpack->BufferObj->Name == 0 ||
+ if (!_mesa_is_bufferobj(unpack->BufferObj) ||
intel->ctx._ImageTransferState ||
unpack->SkipPixels || unpack->SkipRows) {
DBG("%s: failure 1\n", __FUNCTION__);
@@ -427,7 +423,7 @@ intelTexImage(GLcontext * ctx,
*/
if (dims <= 2 &&
intelImage->mt &&
- unpack->BufferObj->Name != 0 &&
+ _mesa_is_bufferobj(unpack->BufferObj) &&
check_pbo_format(internalFormat, format,
type, intelImage->base.TexFormat)) {
@@ -550,11 +546,6 @@ intelTexImage(GLcontext * ctx,
}
UNLOCK_HARDWARE(intel);
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 89037073f8..751ec2c98c 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -129,11 +129,6 @@ intelTexSubimage(GLcontext * ctx,
}
UNLOCK_HARDWARE(intel);
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target, texObj);
- }
}
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
index 42635bf9d9..fbce70c37b 100644
--- a/src/mesa/drivers/dri/r200/Makefile
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -55,8 +55,7 @@ C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
X86_SOURCES =
-DRIVER_DEFINES = -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R200 \
- -Wall
+DRIVER_DEFINES = -DRADEON_R200 -Wall
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
index 5d8d6f6658..c64f940623 100644
--- a/src/mesa/drivers/dri/r300/Makefile
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -54,6 +54,7 @@ DRIVER_SOURCES = \
r300_vertprog.c \
r300_fragprog_common.c \
r300_shader.c \
+ radeon_mesa_to_rc.c \
r300_emit.c \
r300_swtcl.c \
$(RADEON_COMMON_SOURCES) \
@@ -62,8 +63,7 @@ DRIVER_SOURCES = \
C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
-DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \
- -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \
+DRIVER_DEFINES = -DRADEON_R300
# -DRADEON_BO_TRACK \
-Wall
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index d973844192..d83888d90a 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -8,10 +8,17 @@ LIBNAME = r300compiler
C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
- radeon_nqssadce.c \
radeon_program.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
radeon_program_alu.c \
radeon_program_pair.c \
+ radeon_pair_translate.c \
+ radeon_pair_schedule.c \
+ radeon_pair_regalloc.c \
+ radeon_dataflow.c \
+ radeon_dataflow_deadcode.c \
+ radeon_dataflow_swizzles.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
new file mode 100755
index 0000000000..46075a8aee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -0,0 +1,37 @@
+Import('*')
+
+env = env.Clone()
+env.Append(CPPPATH = '#/include')
+env.Append(CPPPATH = '#/src/mesa')
+
+# temporary fix
+env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
+
+r300compiler = env.ConvenienceLibrary(
+ target = 'r300compiler',
+ source = [
+ 'radeon_code.c',
+ 'radeon_compiler.c',
+ 'radeon_program.c',
+ 'radeon_program_print.c',
+ 'radeon_opcodes.c',
+ 'radeon_program_alu.c',
+ 'radeon_program_pair.c',
+ 'radeon_pair_translate.c',
+ 'radeon_pair_schedule.c',
+ 'radeon_pair_regalloc.c',
+ 'radeon_dataflow.c',
+ 'radeon_dataflow_deadcode.c',
+ 'radeon_dataflow_swizzles.c',
+ 'r3xx_fragprog.c',
+ 'r300_fragprog.c',
+ 'r300_fragprog_swizzle.c',
+ 'r300_fragprog_emit.c',
+ 'r500_fragprog.c',
+ 'r500_fragprog_emit.c',
+ 'r3xx_vertprog.c',
+ 'r3xx_vertprog_dump.c',
+ 'memory_pool.c',
+ ])
+
+Return('r300compiler')
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
index 6c9fba4914..aa69b0fc72 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -27,17 +27,17 @@
#include "r300_fragprog.h"
-#include "shader/prog_parameter.h"
+#include <stdio.h>
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- struct prog_src_register reg = { 0, };
+ struct rc_src_register reg = { 0, };
- reg.File = PROGRAM_STATE_VAR;
+ reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = SWIZZLE_WWWW;
+ reg.Swizzle = RC_SWIZZLE_WWWW;
return reg;
}
@@ -47,7 +47,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*/
-GLboolean r300_transform_TEX(
+int r300_transform_TEX(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* data)
@@ -55,77 +55,77 @@ GLboolean r300_transform_TEX(
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- if (inst->I.Opcode != OPCODE_TEX &&
- inst->I.Opcode != OPCODE_TXB &&
- inst->I.Opcode != OPCODE_TXP &&
- inst->I.Opcode != OPCODE_KIL)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst->I.Opcode != OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- inst->I.Opcode = OPCODE_MOV;
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- if (comparefunc == GL_ALWAYS) {
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
} else {
- inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
- return GL_TRUE;
+ return 1;
} else {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
- inst_rcp->I.Opcode = OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
- inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- inst_cmp->I.DstReg = inst->I.DstReg;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = rc_find_free_temporary(c);
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-
- inst_mad->I.Opcode = OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
- inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
- inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
else
- inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
- inst_cmp->I.Opcode = OPCODE_CMP;
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
/* DstReg has been filled out above */
- inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
pass = 1;
fail = 2;
} else {
@@ -133,9 +133,9 @@ GLboolean r300_transform_TEX(
fail = 1;
}
- inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
- inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
- inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
}
@@ -143,52 +143,52 @@ GLboolean r300_transform_TEX(
* instead of [0..Width]x[0..Height].
* Add a scaling instruction.
*/
- if (inst->I.Opcode != OPCODE_KIL && inst->I.TexSrcTarget == TEXTURE_RECT_INDEX) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.TexSrcTarget == RC_TEXTURE_RECT) {
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst->Prev);
- inst_mul->I.Opcode = OPCODE_MUL;
- inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mul->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mul->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mul->I.SrcReg[1].File = PROGRAM_STATE_VAR;
- inst_mul->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->I.TexSrcUnit);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mul->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mul->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mul->U.I.DstReg.Index;
}
/* Cannot write texture to output registers or with masks */
- if (inst->I.Opcode != OPCODE_KIL &&
- (inst->I.DstReg.File != PROGRAM_TEMPORARY || inst->I.DstReg.WriteMask != WRITEMASK_XYZW)) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg = inst->I.DstReg;
- inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
- if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
- return GL_TRUE;
+ return 1;
}
/* just some random things... */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
index 0ac46dbd9c..418df36c93 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -33,9 +33,6 @@
#ifndef __R300_FRAGPROG_H_
#define __R300_FRAGPROG_H_
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_compiler.h"
#include "radeon_program.h"
@@ -44,6 +41,6 @@ extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler
extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern GLboolean r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
+extern int r300_transform_TEX(struct radeon_compiler * c, struct rc_instruction* inst, void* data);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
index c7227bbd15..375838d98e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -56,7 +56,6 @@ struct r300_emit_state {
};
#define PROG_CODE \
- struct r300_emit_state * emit = (struct r300_emit_state*)data; \
struct r300_fragment_program_compiler *c = emit->compiler; \
struct r300_fragment_program_code *code = &c->code->code.r300
@@ -69,64 +68,76 @@ struct r300_emit_state {
/**
* Mark a temporary register as used.
*/
-static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
{
if (index > code->pixsize)
code->pixsize = index;
}
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | (1 << 5);
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
-static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTC_CMP;
- case OPCODE_DP3: return R300_ALU_OUTC_DP3;
- case OPCODE_DP4: return R300_ALU_OUTC_DP4;
- case OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R300_ALU_OUTC_MAD;
- case OPCODE_MAX: return R300_ALU_OUTC_MAX;
- case OPCODE_MIN: return R300_ALU_OUTC_MIN;
- case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
}
}
-static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R300_ALU_OUTA_CMP;
- case OPCODE_DP3: return R300_ALU_OUTA_DP4;
- case OPCODE_DP4: return R300_ALU_OUTA_DP4;
- case OPCODE_EX2: return R300_ALU_OUTA_EX2;
- case OPCODE_FRC: return R300_ALU_OUTA_FRC;
- case OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R300_ALU_OUTA_MAD;
- case OPCODE_MAX: return R300_ALU_OUTA_MAX;
- case OPCODE_MIN: return R300_ALU_OUTA_MIN;
- case OPCODE_RCP: return R300_ALU_OUTA_RCP;
- case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
}
}
/**
* Emit one paired ALU instruction.
*/
-static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
{
PROG_CODE;
if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
error("Too many ALU instructions");
- return GL_FALSE;
+ return 0;
}
int ip = code->alu.length++;
@@ -136,17 +147,13 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
- GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
- if (!inst->RGB.Src[j].Constant)
- use_temporary(code, inst->RGB.Src[j].Index);
+ unsigned int src = use_source(code, inst->RGB.Src[j]);
code->alu.inst[ip].rgb_addr |= src << (6*j);
- src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
- if (!inst->Alpha.Src[j].Constant)
- use_temporary(code, inst->Alpha.Src[j].Index);
+ src = use_source(code, inst->Alpha.Src[j]);
code->alu.inst[ip].alpha_addr |= src << (6*j);
- GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].rgb_inst |= arg << (7*j);
@@ -186,27 +193,27 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
if (inst->Alpha.DepthWriteMask) {
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
emit->node_flags |= R300_W_OUT;
- c->code->writes_depth = GL_TRUE;
+ c->code->writes_depth = 1;
}
- return GL_TRUE;
+ return 1;
}
/**
* Finish the current node without advancing to the next one.
*/
-static GLboolean finish_node(struct r300_emit_state * emit)
+static int finish_node(struct r300_emit_state * emit)
{
struct r300_fragment_program_compiler * c = emit->compiler;
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
if (code->alu.length == emit->node_first_alu) {
/* Generate a single NOP for this node */
- struct radeon_pair_instruction inst;
- _mesa_bzero(&inst, sizeof(inst));
+ struct rc_pair_instruction inst;
+ memset(&inst, 0, sizeof(inst));
if (!emit_alu(emit, &inst))
- return GL_FALSE;
+ return 0;
}
unsigned alu_offset = emit->node_first_alu;
@@ -217,7 +224,7 @@ static GLboolean finish_node(struct r300_emit_state * emit)
if (code->tex.length == emit->node_first_tex) {
if (emit->current_node > 0) {
error("Node %i has no TEX instructions", emit->current_node);
- return GL_FALSE;
+ return 0;
}
tex_end = 0;
@@ -240,7 +247,7 @@ static GLboolean finish_node(struct r300_emit_state * emit)
(tex_end << R300_TEX_SIZE_SHIFT) |
emit->node_flags;
- return GL_TRUE;
+ return 1;
}
@@ -248,79 +255,72 @@ static GLboolean finish_node(struct r300_emit_state * emit)
* Begin a block of texture instructions.
* Create the necessary indirection.
*/
-static GLboolean begin_tex(void* data)
+static int begin_tex(struct r300_emit_state * emit)
{
PROG_CODE;
if (code->alu.length == emit->node_first_alu &&
code->tex.length == emit->node_first_tex) {
- return GL_TRUE;
+ return 1;
}
if (emit->current_node == 3) {
error("Too many texture indirections");
- return GL_FALSE;
+ return 0;
}
if (!finish_node(emit))
- return GL_FALSE;
+ return 0;
emit->current_node++;
emit->node_first_tex = code->tex.length;
emit->node_first_alu = code->alu.length;
emit->node_flags = 0;
- return GL_TRUE;
+ return 1;
}
-static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
{
PROG_CODE;
if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
error("Too many TEX instructions");
- return GL_FALSE;
+ return 0;
}
- GLuint unit = inst->TexSrcUnit;
- GLuint dest = inst->DestIndex;
- GLuint opcode;
+ unsigned int unit = inst->U.I.TexSrcUnit;
+ unsigned int dest = inst->U.I.DstReg.Index;
+ unsigned int opcode;
- switch(inst->Opcode) {
- case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
- case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
- case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
- case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
- error("Unknown texture opcode %i", inst->Opcode);
- return GL_FALSE;
+ error("Unknown texture opcode %i", inst->U.I.Opcode);
+ return 0;
}
- if (inst->Opcode == RADEON_OPCODE_KIL) {
+ if (inst->U.I.Opcode == RC_OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
- use_temporary(code, inst->SrcIndex);
+ use_temporary(code, inst->U.I.SrcReg[0].Index);
code->tex.inst[code->tex.length++] =
- (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
+ (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
- return GL_TRUE;
+ return 1;
}
-static const struct radeon_pair_handler pair_handler = {
- .EmitPaired = &emit_alu,
- .EmitTex = &emit_tex,
- .BeginTexBlock = &begin_tex,
- .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
-};
-
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
@@ -333,9 +333,26 @@ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
memset(&emit, 0, sizeof(emit));
emit.compiler = compiler;
- _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
+ memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ begin_tex(&emit);
+ continue;
+ }
+
+ emit_tex(&emit, inst);
+ } else {
+ emit_alu(&emit, &inst->U.P);
+ }
+ }
+
+ if (code->pixsize >= R300_PFS_NUM_TEMP_REGS)
+ rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
- radeonPairProgram(compiler, &pair_handler, &emit);
if (compiler->Base.Error)
return;
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
index 1b14cc3888..cfa48a59e3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -33,16 +33,17 @@
#include "r300_fragprog_swizzle.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
#include "radeon_compiler.h"
-#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
struct swizzle_data {
- GLuint hash; /**< swizzle value this matches */
- GLuint base; /**< base value for hw swizzle */
- GLuint stride; /**< difference in base between arg0/1/2 */
+ unsigned int hash; /**< swizzle value this matches */
+ unsigned int base; /**< base value for hw swizzle */
+ unsigned int stride; /**< difference in base between arg0/1/2 */
};
static const struct swizzle_data native_swizzles[] = {
@@ -65,15 +66,15 @@ static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swi
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
*/
-static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
{
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data* sd = &native_swizzles[i];
for(comp = 0; comp < 3; ++comp) {
- GLuint swz = GET_SWZ(swizzle, comp);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != GET_SWZ(sd->hash, comp))
break;
@@ -90,71 +91,72 @@ static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
if (reg.Abs)
- reg.Negate = NEGATE_NONE;
+ reg.Negate = RC_MASK_NONE;
- if (opcode == OPCODE_KIL ||
- opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP) {
+ if (opcode == RC_OPCODE_KIL ||
+ opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP) {
int j;
if (reg.Abs || reg.Negate)
- return GL_FALSE;
+ return 0;
for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(reg.Swizzle, j);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz != j)
- return GL_FALSE;
+ return 0;
}
- return GL_TRUE;
+ return 1;
}
- GLuint relevant = 0;
+ unsigned int relevant = 0;
int j;
for(j = 0; j < 3; ++j)
- if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
+ if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
relevant |= 1 << j;
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
+ return 0;
if (!lookup_native_swizzle(reg.Swizzle))
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
+ return 1;
}
-/**
- * Generate MOV dst, src using only native swizzles.
- */
-void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
{
if (src.Abs)
- src.Negate = NEGATE_NONE;
+ src.Negate = RC_MASK_NONE;
+
+ split->NumPhases = 0;
- while(dst.WriteMask) {
+ while(mask) {
const struct swizzle_data *best_swizzle = 0;
- GLuint best_matchcount = 0;
- GLuint best_matchmask = 0;
+ unsigned int best_matchcount = 0;
+ unsigned int best_matchmask = 0;
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data *sd = &native_swizzles[i];
- GLuint matchcount = 0;
- GLuint matchmask = 0;
+ unsigned int matchcount = 0;
+ unsigned int matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
- if (!GET_BIT(dst.WriteMask, comp))
+ if (!GET_BIT(mask, comp))
continue;
- GLuint swz = GET_SWZ(src.Swizzle, comp);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
/* check if the negate bit of current component
@@ -170,34 +172,35 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst,
best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
- if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
+ if (matchmask == (mask & RC_MASK_XYZ))
break;
}
}
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? NEGATE_XYZW : NEGATE_NONE;
- /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
- dst.WriteMask &= ~inst->I.DstReg.WriteMask;
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
}
}
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
* instruction source.
*/
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
if (!sd) {
- _mesa_printf("Not a native swizzle: %08x\n", swizzle);
+ fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
return 0;
}
@@ -209,15 +212,15 @@ GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
* Translate an Alpha (W) swizzle into the hardware code for the given
* instruction source.
*/
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
if (swizzle < 3)
return swizzle + 3*src;
switch(swizzle) {
- case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
- case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
- case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
default: return R300_ALU_ARGA_ONE;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
index 231bf4eef5..118476af13 100644
--- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -28,15 +28,11 @@
#ifndef __R300_FRAGPROG_SWIZZLE_H_
#define __R300_FRAGPROG_SWIZZLE_H_
-#include "main/glheader.h"
-#include "shader/prog_instruction.h"
+#include "radeon_swizzle.h"
-struct nqssadce_state;
+extern struct rc_swizzle_caps r300_swizzle_caps;
-GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
-void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-
-GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
-GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index 76c3a7ecfd..5581f25352 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -22,22 +22,21 @@
#include "radeon_compiler.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
+#include <stdio.h>
-#include "radeon_nqssadce.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
-static void nqssadce_init(struct nqssadce_state* s)
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_fragment_program_compiler * c = s->UserData;
- s->Outputs[c->OutputColor].Sourced = WRITEMASK_XYZW;
- s->Outputs[c->OutputDepth].Sourced = WRITEMASK_W;
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor, RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
}
static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
@@ -45,35 +44,35 @@ static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
struct rc_instruction *rci;
for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction * inst = &rci->I;
+ struct rc_sub_instruction * inst = &rci->U.I;
- if (inst->DstReg.File != PROGRAM_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
continue;
- if (inst->DstReg.WriteMask & WRITEMASK_Z) {
- inst->DstReg.WriteMask = WRITEMASK_W;
+ if (inst->DstReg.WriteMask & RC_MASK_Z) {
+ inst->DstReg.WriteMask = RC_MASK_W;
} else {
inst->DstReg.WriteMask = 0;
continue;
}
switch (inst->Opcode) {
- case OPCODE_FRC:
- case OPCODE_MOV:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
- inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
- inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
@@ -89,11 +88,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
if (c->is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r500_transform_TEX, c },
+ { &r500_transform_IF, 0 },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
- radeonLocalTransform(&c->Base, 4, transformations);
+ radeonLocalTransform(&c->Base, 5, transformations);
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_TEX, c },
@@ -101,32 +103,66 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(&c->Base, 3, transformations);
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
}
if (c->Base.Debug) {
- _mesa_printf("Fragment Program: After native rewrite:\n");
+ fprintf(stderr, "Fragment Program: After native rewrite:\n");
rc_print_program(&c->Base.Program);
fflush(stderr);
}
- if (c->is_r500) {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r500FPIsNativeSwizzle,
- .BuildSwizzle = &r500FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
- } else {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadce_init,
- .IsNativeSwizzle = &r300FPIsNativeSwizzle,
- .BuildSwizzle = &r300FPBuildSwizzle
- };
- radeonNqssaDce(&c->Base, &nqssadce, c);
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Fragment Program: After deadcode:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after dataflow passes:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ rc_pair_translate(c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after pair translate:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
}
+ rc_pair_schedule(c);
+ if (c->Base.Error)
+ return;
+
+ if (c->Base.Debug) {
+ fprintf(stderr, "Compiler: after pair scheduling:\n");
+ rc_print_program(&c->Base.Program);
+ fflush(stderr);
+ }
+
+ if (c->is_r500)
+ rc_pair_regalloc(c, 128);
+ else
+ rc_pair_regalloc(c, R300_PFS_NUM_TEMP_REGS);
+
+ if (c->Base.Error)
+ return;
+
if (c->Base.Debug) {
- _mesa_printf("Compiler: after NqSSA-DCE:\n");
+ fprintf(stderr, "Compiler: after pair register allocation:\n");
rc_print_program(&c->Base.Program);
fflush(stderr);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index dad27fc98e..1b2cb8dde7 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -22,13 +22,13 @@
#include "radeon_compiler.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-#include "radeon_nqssadce.h"
-#include "radeon_program.h"
+#include "radeon_dataflow.h"
#include "radeon_program_alu.h"
-
-#include "shader/prog_print.h"
+#include "radeon_swizzle.h"
/*
@@ -42,104 +42,83 @@
t_swizzle(y), \
t_swizzle(y), \
t_src_class(vpi->SrcReg[x].File), \
- NEGATE_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
-static unsigned long t_dst_mask(GLuint mask)
+static unsigned long t_dst_mask(unsigned int mask)
{
- /* WRITEMASK_* is equivalent to VSF_FLAG_* */
- return mask & WRITEMASK_XYZW;
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
}
-static unsigned long t_dst_class(gl_register_file file)
+static unsigned long t_dst_class(rc_register_file file)
{
-
switch (file) {
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
return PVS_DST_REG_TEMPORARY;
- case PROGRAM_OUTPUT:
+ case RC_FILE_OUTPUT:
return PVS_DST_REG_OUT;
- case PROGRAM_ADDRESS:
+ case RC_FILE_ADDRESS:
return PVS_DST_REG_A0;
- /*
- case PROGRAM_INPUT:
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_STATE_VAR:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT)
+ if (dst->File == RC_FILE_OUTPUT)
return vp->outputs[dst->Index];
return dst->Index;
}
-static unsigned long t_src_class(gl_register_file file)
+static unsigned long t_src_class(rc_register_file file)
{
switch (file) {
- case PROGRAM_BUILTIN:
- case PROGRAM_TEMPORARY:
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
return PVS_SRC_REG_TEMPORARY;
- case PROGRAM_INPUT:
+ case RC_FILE_INPUT:
return PVS_SRC_REG_INPUT;
- case PROGRAM_LOCAL_PARAM:
- case PROGRAM_ENV_PARAM:
- case PROGRAM_NAMED_PARAM:
- case PROGRAM_CONSTANT:
- case PROGRAM_STATE_VAR:
+ case RC_FILE_CONSTANT:
return PVS_SRC_REG_CONSTANT;
- /*
- case PROGRAM_OUTPUT:
- case PROGRAM_WRITE_ONLY:
- case PROGRAM_ADDRESS:
- */
- default:
- fprintf(stderr, "problem in %s", __FUNCTION__);
- _mesa_exit(-1);
- return -1;
}
}
-static GLboolean t_src_conflict(struct prog_src_register a, struct prog_src_register b)
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
{
unsigned long aclass = t_src_class(a.File);
unsigned long bclass = t_src_class(b.File);
if (aclass != bclass)
- return GL_FALSE;
+ return 0;
if (aclass == PVS_SRC_REG_TEMPORARY)
- return GL_FALSE;
+ return 0;
if (a.RelAddr || b.RelAddr)
- return GL_TRUE;
+ return 1;
if (a.Index != b.Index)
- return GL_TRUE;
+ return 1;
- return GL_FALSE;
+ return 0;
}
-static INLINE unsigned long t_swizzle(GLubyte swizzle)
+static inline unsigned long t_swizzle(unsigned int swizzle)
{
- /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
return swizzle;
}
static unsigned long t_src_index(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- if (src->File == PROGRAM_INPUT) {
+ if (src->File == RC_FILE_INPUT) {
assert(vp->inputs[src->Index] != -1);
return vp->inputs[src->Index];
} else {
@@ -155,9 +134,9 @@ static unsigned long t_src_index(struct r300_vertex_program_code *vp,
/* these two functions should probably be merged... */
static unsigned long t_src(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -170,9 +149,9 @@ static unsigned long t_src(struct r300_vertex_program_code *vp,
}
static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
- struct prog_src_register *src)
+ struct rc_src_register *src)
{
- /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
* which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
*/
return PVS_SRC_OPERAND(t_src_index(vp, src),
@@ -181,79 +160,79 @@ static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_swizzle(GET_SWZ(src->Swizzle, 0)),
t_src_class(src->File),
- src->Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(src->RelAddr << 4);
}
-static GLboolean valid_dst(struct r300_vertex_program_code *vp,
- struct prog_dst_register *dst)
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
{
- if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
- return GL_FALSE;
- } else if (dst->File == PROGRAM_ADDRESS) {
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
assert(dst->Index == 0);
}
- return GL_TRUE;
+ return 1;
}
static void ei_vector1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_vector2(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src(vp, &vpi->SrcReg[0]);
inst[2] = t_src(vp, &vpi->SrcReg[1]);
- inst[3] = __CONST(1, SWIZZLE_ZERO);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
}
static void ei_math1(struct r300_vertex_program_code *vp,
- GLuint hw_opcode,
- struct prog_instruction *vpi,
- GLuint * inst)
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
- inst[3] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
}
static void ei_lit(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -263,27 +242,27 @@ static void ei_lit(struct r300_vertex_program_code *vp,
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
PVS_SRC_SELECT_FORCE_0, // Z
t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
t_src_class(vpi->SrcReg[0].File),
- vpi->SrcReg[0].Negate ? NEGATE_XYZW : NEGATE_NONE) |
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
(vpi->SrcReg[0].RelAddr << 4);
}
static void ei_mad(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
/* Remarks about hardware limitations of MAD
* (please preserve this comment, as this information is _NOT_
@@ -311,22 +290,22 @@ static void ei_mad(struct r300_vertex_program_code *vp,
* according to AMD docs, this should improve performance by one clock
* as a nice side bonus.
*/
- if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
- vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
- GL_FALSE,
- GL_TRUE,
+ 0,
+ 1,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
} else {
inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
- GL_FALSE,
- GL_FALSE,
+ 0,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
@@ -337,17 +316,17 @@ static void ei_mad(struct r300_vertex_program_code *vp,
}
static void ei_pow(struct r300_vertex_program_code *vp,
- struct prog_instruction *vpi,
- GLuint * inst)
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
{
inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
- GL_TRUE,
- GL_FALSE,
+ 1,
+ 0,
t_dst_index(vp, &vpi->DstReg),
t_dst_mask(vpi->DstReg.WriteMask),
t_dst_class(vpi->DstReg.File));
inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
- inst[2] = __CONST(0, SWIZZLE_ZERO);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
@@ -362,8 +341,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
compiler->SetHwInputOutput(compiler);
for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
- struct prog_instruction *vpi = &rci->I;
- GLuint *inst = compiler->code->body.d + compiler->code->length;
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
/* Skip instructions writing to non-existing destination */
if (!valid_dst(compiler->code, &vpi->DstReg))
@@ -375,26 +354,26 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
switch (vpi->Opcode) {
- case OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
- case OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
- case OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
- case OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
- case OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
- case OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
- case OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
- case OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
- case OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
- case OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
- case OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
- case OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
- case OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
- case OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
- case OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
- case OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
- case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
- case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
default:
rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
return;
@@ -408,38 +387,37 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
struct temporary_allocation {
- GLuint Allocated:1;
- GLuint HwTemp:15;
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
struct rc_instruction * LastRead;
};
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
- GLuint num_orig_temps = 0;
- GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ unsigned int num_orig_temps = 0;
+ char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
struct temporary_allocation * ta;
- GLuint i, j;
+ unsigned int i, j;
compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
/* Pass 1: Count original temporaries and allocate structures */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- if (inst->I.SrcReg[i].Index >= num_orig_temps)
- num_orig_temps = inst->I.SrcReg[i].Index + 1;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- if (inst->I.DstReg.Index >= num_orig_temps)
- num_orig_temps = inst->I.DstReg.Index + 1;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
}
}
}
@@ -450,32 +428,31 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
- ta[inst->I.SrcReg[i].Index].LastRead = inst;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
}
}
/* Pass 3: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
- GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
- GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for (i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.SrcReg[i].Index;
- inst->I.SrcReg[i].Index = ta[orig].HwTemp;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
if (ta[orig].Allocated && inst == ta[orig].LastRead)
- hwtemps[ta[orig].HwTemp] = GL_FALSE;
+ hwtemps[ta[orig].HwTemp] = 0;
}
}
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
- GLuint orig = inst->I.DstReg.Index;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
if (!ta[orig].Allocated) {
for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
@@ -485,16 +462,16 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
if (j >= VSF_MAX_FRAGMENT_TEMPS) {
fprintf(stderr, "Out of hw temporaries\n");
} else {
- ta[orig].Allocated = GL_TRUE;
+ ta[orig].Allocated = 1;
ta[orig].HwTemp = j;
- hwtemps[j] = GL_TRUE;
+ hwtemps[j] = 1;
if (j >= compiler->code->num_temporaries)
compiler->code->num_temporaries = j + 1;
}
}
- inst->I.DstReg.Index = ta[orig].HwTemp;
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
}
}
}
@@ -505,45 +482,45 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
* Vertex engine cannot read two inputs or two constants at the same time.
* Introduce intermediate MOVs to temporary registers to account for this.
*/
-static GLboolean transform_source_conflicts(
+static int transform_source_conflicts(
struct radeon_compiler *c,
struct rc_instruction* inst,
void* unused)
{
- GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (num_operands == 3) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
- || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
-
- reset_srcreg(&inst->I.SrcReg[2]);
- inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[2].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
}
}
- if (num_operands >= 2) {
- if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
int tmpreg = rc_find_free_temporary(c);
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = tmpreg;
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
-
- reset_srcreg(&inst->I.SrcReg[1]);
- inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[1].Index = tmpreg;
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
}
}
- return GL_TRUE;
+ return 1;
}
static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
@@ -554,44 +531,52 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
if ((compiler->RequiredOutputs & (1 << i)) &&
!(compiler->Base.Program.OutputsWritten & (1 << i))) {
struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = i;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
- inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
- inst->I.SrcReg[0].Index = 0;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
compiler->Base.Program.OutputsWritten |= 1 << i;
}
}
}
-static void nqssadceInit(struct nqssadce_state* s)
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
{
- struct r300_vertex_program_compiler * compiler = s->UserData;
+ struct r300_vertex_program_compiler * c = userdata;
int i;
- for(i = 0; i < VERT_RESULT_MAX; ++i) {
- if (compiler->RequiredOutputs & (1 << i))
- s->Outputs[i].Sourced = WRITEMASK_XYZW;
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
}
}
-static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
(void) opcode;
(void) reg;
- return GL_TRUE;
+ return 1;
}
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
addArtificialOutputs(compiler);
{
@@ -624,22 +609,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
fflush(stderr);
}
- {
- struct radeon_nqssadce_descr nqssadce = {
- .Init = &nqssadceInit,
- .IsNativeSwizzle = &swizzleIsNative,
- .BuildSwizzle = NULL
- };
- radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after deadcode:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
+ }
- /* We need this step for reusing temporary registers */
- allocate_temporary_registers(compiler);
+ rc_dataflow_swizzles(&compiler->Base);
- if (compiler->Base.Debug) {
- fprintf(stderr, "Vertex program after NQSSADCE:\n");
- rc_print_program(&compiler->Base.Program);
- fflush(stderr);
- }
+ allocate_temporary_registers(compiler);
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Vertex program after dataflow:\n");
+ rc_print_program(&compiler->Base.Program);
+ fflush(stderr);
}
translate_vertex_program(compiler);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 980ef3eaea..66f9b0529f 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -146,7 +146,7 @@ static void r300_vs_op_dump(uint32_t op)
static void r300_vs_src_dump(uint32_t src)
{
fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
- (src >> 5) & 0x7f, r300_vs_src_debug[src & 0x3],
+ (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
src & (1 << 25) ? "-" : " ",
r300_vs_swiz_debug[(src >> 13) & 0x7],
src & (1 << 26) ? "-" : " ",
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 7e2faed690..d87acecdab 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -27,15 +27,17 @@
#include "r500_fragprog.h"
+#include <stdio.h>
+
#include "../r300_reg.h"
-static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
{
- struct prog_src_register reg = { 0, };
+ struct rc_src_register reg = { 0, };
- reg.File = PROGRAM_STATE_VAR;
+ reg.File = RC_FILE_CONSTANT;
reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
- reg.Swizzle = SWIZZLE_WWWW;
+ reg.Swizzle = RC_SWIZZLE_WWWW;
return reg;
}
@@ -44,7 +46,7 @@ static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int t
* - implement texture compare (shadow extensions)
* - extract non-native source / destination operands
*/
-GLboolean r500_transform_TEX(
+int r500_transform_TEX(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data)
@@ -52,77 +54,77 @@ GLboolean r500_transform_TEX(
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
- if (inst->I.Opcode != OPCODE_TEX &&
- inst->I.Opcode != OPCODE_TXB &&
- inst->I.Opcode != OPCODE_TXP &&
- inst->I.Opcode != OPCODE_KIL)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
/* ARB_shadow & EXT_shadow_funcs */
- if (inst->I.Opcode != OPCODE_KIL &&
- c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
- if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
- inst->I.Opcode = OPCODE_MOV;
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
- if (comparefunc == GL_ALWAYS) {
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
} else {
- inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
- return GL_TRUE;
+ return 1;
} else {
- GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
- GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
int pass, fail;
- inst_rcp->I.Opcode = OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
- inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
- inst_cmp->I.DstReg = inst->I.DstReg;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = rc_find_free_temporary(c);
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-
- inst_mad->I.Opcode = OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
- inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
- inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
- inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
- inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
- inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
- if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
- inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
else
- inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
- inst_cmp->I.Opcode = OPCODE_CMP;
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
/* DstReg has been filled out above */
- inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
- if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
pass = 1;
fail = 2;
} else {
@@ -130,131 +132,161 @@ GLboolean r500_transform_TEX(
fail = 1;
}
- inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
- inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
- inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
}
}
/* Cannot write texture to output registers */
- if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ if (inst->U.I.Opcode != RC_OPCODE_KIL && inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg = inst->I.DstReg;
- inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
/* Cannot read texture coordinate from constants file */
- if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
- inst_mov->I.Opcode = OPCODE_MOV;
- inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
- inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
- reset_srcreg(&inst->I.SrcReg[0]);
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
}
- return GL_TRUE;
+ return 1;
+}
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_IF)
+ return 0;
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.WriteMask = 0;
+ inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+ inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+ inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+ inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].Negate = 0;
+
+ return 1;
}
-GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
- GLuint relevant;
+ unsigned int relevant;
int i;
- if (opcode == OPCODE_TEX ||
- opcode == OPCODE_TXB ||
- opcode == OPCODE_TXP ||
- opcode == OPCODE_KIL) {
+ if (opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP ||
+ opcode == RC_OPCODE_KIL) {
if (reg.Abs)
- return GL_FALSE;
+ return 0;
- if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
- return GL_FALSE;
+ if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+ return 0;
if (reg.Negate)
- reg.Negate ^= NEGATE_XYZW;
+ reg.Negate ^= RC_MASK_XYZW;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz == SWIZZLE_NIL) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED) {
reg.Negate &= ~(1 << i);
continue;
}
if (swz >= 4)
- return GL_FALSE;
+ return 0;
}
if (reg.Negate)
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
- } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+ return 1;
+ } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
* if it doesn't fit perfectly into a .xyzw case... */
- if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
- return GL_TRUE;
+ if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+ return 1;
- return GL_FALSE;
+ return 0;
} else {
/* ALU instructions support almost everything */
if (reg.Abs)
- return GL_TRUE;
+ return 1;
relevant = 0;
for(i = 0; i < 3; ++i) {
- GLuint swz = GET_SWZ(reg.Swizzle, i);
- if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
- return GL_FALSE;
+ return 0;
- return GL_TRUE;
+ return 1;
}
}
/**
- * Implement a MOV with a potentially non-native swizzle.
+ * Split source register access.
*
* The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
+ * negation.
*/
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
{
- GLuint negatebase[2] = { 0, 0 };
+ unsigned int negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(src.Swizzle, i);
- if (swz == SWIZZLE_NIL)
+ unsigned int swz = GET_SWZ(src.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
continue;
negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
}
+ split->NumPhases = 0;
+
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
- struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg = dst;
- inst->I.DstReg.WriteMask = negatebase[i];
- inst->I.SrcReg[0] = src;
- inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
+ split->Phase[split->NumPhases++] = negatebase[i];
}
}
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
static char *toswiz(int swiz_val) {
switch(swiz_val) {
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 9091f65cd2..0918cdf518 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -33,21 +33,21 @@
#ifndef __R500_FRAGPROG_H_
#define __R500_FRAGPROG_H_
-#include "shader/prog_parameter.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_compiler.h"
-#include "radeon_nqssadce.h"
+#include "radeon_swizzle.h"
extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
-extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
+extern struct rc_swizzle_caps r500_swizzle_caps;
-extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src);
+extern int r500_transform_TEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
-extern GLboolean r500_transform_TEX(
+extern int r500_transform_IF(
struct radeon_compiler * c,
struct rc_instruction * inst,
void* data);
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index d694725c9b..b1b14394b6 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -37,10 +37,6 @@
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
- * \todo Depth write, WPOS/FOGC inputs
- *
- * \todo FogOption
- *
*/
#include "r500_fragprog.h"
@@ -51,7 +47,6 @@
#define PROG_CODE \
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
struct r500_fragment_program_code *code = &c->code->code.r500
#define error(fmt, args...) do { \
@@ -60,63 +55,80 @@
} while(0)
-static GLuint translate_rgb_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+struct branch_info {
+ int If;
+ int Else;
+ int Endif;
+};
+
+struct emit_state {
+ struct radeon_compiler * C;
+ struct r500_fragment_program_code * Code;
+
+ struct branch_info * Branches;
+ unsigned int CurrentBranchDepth;
+ unsigned int BranchesReserved;
+
+ unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
- case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
- case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
- case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
- case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
- case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
- case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
- case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
- case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
-static GLuint translate_alpha_op(struct r300_fragment_program_compiler *c, GLuint opcode)
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
{
switch(opcode) {
- case OPCODE_CMP: return R500_ALPHA_OP_CMP;
- case OPCODE_COS: return R500_ALPHA_OP_COS;
- case OPCODE_DDX: return R500_ALPHA_OP_MDH;
- case OPCODE_DDY: return R500_ALPHA_OP_MDV;
- case OPCODE_DP3: return R500_ALPHA_OP_DP;
- case OPCODE_DP4: return R500_ALPHA_OP_DP;
- case OPCODE_EX2: return R500_ALPHA_OP_EX2;
- case OPCODE_FRC: return R500_ALPHA_OP_FRC;
- case OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+ case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op(%d): unknown opcode\n", opcode);
/* fall through */
- case OPCODE_NOP:
+ case RC_OPCODE_NOP:
/* fall through */
- case OPCODE_MAD: return R500_ALPHA_OP_MAD;
- case OPCODE_MAX: return R500_ALPHA_OP_MAX;
- case OPCODE_MIN: return R500_ALPHA_OP_MIN;
- case OPCODE_RCP: return R500_ALPHA_OP_RCP;
- case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
- case OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
-static GLuint fix_hw_swizzle(GLuint swz)
+static unsigned int fix_hw_swizzle(unsigned int swz)
{
if (swz == 5) swz = 6;
- if (swz == SWIZZLE_NIL) swz = 4;
+ if (swz == RC_SWIZZLE_UNUSED) swz = 4;
return swz;
}
-static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
{
- GLuint t = inst->RGB.Arg[arg].Source;
+ unsigned int t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
@@ -127,39 +139,57 @@ static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
return t;
}
-static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
{
- GLuint t = inst->Alpha.Arg[i].Source;
+ unsigned int t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
-static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+ switch(func) {
+ case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+ case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+ case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+ case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+ default:
+ rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+ return 0;
+ }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
-static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
{
- if (!src.Constant)
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | 0x100;
+ } else if (src.File == RC_FILE_TEMPORARY) {
use_temporary(code, src.Index);
- return src.Index | src.Constant << 8;
+ return src.Index;
+ }
+
+ return 0;
}
/**
* Emit a paired ALU instruction.
*/
-static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_alu: Too many instructions");
- return GL_FALSE;
+ return;
}
int ip = ++code->inst_end;
@@ -167,17 +197,22 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
- if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
- else
+ if (inst->WriteALUResult) {
+ error("%s: cannot write output and ALU result at the same time");
+ return;
+ }
+ } else {
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ }
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
- c->code->writes_depth = GL_TRUE;
+ c->code->writes_depth = 1;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
@@ -206,12 +241,21 @@ static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
- return GL_TRUE;
+ if (inst->WriteALUResult) {
+ code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+ else
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+ code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+ }
}
-static GLuint translate_strq_swizzle(GLuint swizzle)
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
{
- GLuint swiz = 0;
+ unsigned int swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
@@ -221,67 +265,193 @@ static GLuint translate_strq_swizzle(GLuint swizzle)
/**
* Emit a single TEX instruction
*/
-static GLboolean emit_tex(void *data, struct radeon_pair_texture_instruction *inst)
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_tex: Too many instructions");
- return GL_FALSE;
+ return 0;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
- | (inst->WriteMask << 11)
+ | (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
- if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
- code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+ if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
- case RADEON_OPCODE_KIL:
+ case RC_OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
- case RADEON_OPCODE_TEX:
+ case RC_OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
- case RADEON_OPCODE_TXB:
+ case RC_OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
- case RADEON_OPCODE_TXP:
+ case RC_OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
- code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcIndex)
- | (translate_strq_swizzle(inst->SrcSwizzle) << 8)
- | R500_TEX_DST_ADDR(inst->DestIndex)
+ use_temporary(code, inst->SrcReg[0].Index);
+ if (inst->Opcode != RC_OPCODE_KIL)
+ use_temporary(code, inst->DstReg.Index);
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
- return GL_TRUE;
+ return 1;
}
-static const struct radeon_pair_handler pair_handler = {
- .EmitPaired = emit_paired,
- .EmitTex = emit_tex,
- .MaxHwTemps = 128
-};
+static void grow_branches(struct emit_state * s)
+{
+ unsigned int newreserved = s->BranchesReserved * 2;
+ struct branch_info * newbranches;
+
+ if (!newreserved)
+ newreserved = 4;
+
+ newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
+ memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
+
+ s->Branches = newbranches;
+ s->BranchesReserved = newreserved;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+ if (s->Code->inst_end >= 511) {
+ rc_error(s->C, "emit_tex: Too many instructions");
+ return;
+ }
+
+ unsigned int newip = ++s->Code->inst_end;
+
+ s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ if (s->CurrentBranchDepth >= 32) {
+ rc_error(s->C, "Branch depth exceeds hardware limit");
+ return;
+ }
+
+ if (s->CurrentBranchDepth >= s->BranchesReserved)
+ grow_branches(s);
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch->If = newip;
+ branch->Else = -1;
+ branch->Endif = -1;
+
+ if (s->CurrentBranchDepth > s->MaxBranchDepth)
+ s->MaxBranchDepth = s->CurrentBranchDepth;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Else = newip;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Endif = newip;
+
+ s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+ | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ ;
+
+ if (branch->Else >= 0) {
+ /* increment branch counter also if jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+ s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_B_ELSE /* all active pixels want to jump */
+ | R500_FC_B_OP0_NONE /* no counter op if stay */
+ | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ } else {
+ /* don't touch branch counter on jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+ s->CurrentBranchDepth--;
+ } else {
+ rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode);
+ }
+}
void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
{
+ struct emit_state s;
struct r500_fragment_program_code *code = &compiler->code->code.r500;
- _mesa_bzero(code, sizeof(*code));
+ memset(&s, 0, sizeof(s));
+ s.C = &compiler->Base;
+ s.Code = code;
+
+ memset(code, 0, sizeof(*code));
code->max_temp_idx = 1;
code->inst_end = -1;
- radeonPairProgram(compiler, &pair_handler, compiler);
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ emit_flowcontrol(&s, inst);
+ } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ continue;
+ } else {
+ emit_tex(compiler, &inst->U.I);
+ }
+ } else {
+ emit_paired(compiler, &inst->U.P);
+ }
+ }
+
+ if (code->max_temp_idx >= 128)
+ rc_error(&compiler->Base, "Too many hardware temporaries used");
+
if (compiler->Base.Error)
return;
@@ -296,4 +466,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
+
+ if (s.MaxBranchDepth >= 4) {
+ if (code->max_temp_idx < 1)
+ code->max_temp_idx = 1;
+
+ code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
index c7923004df..1a3d8bb641 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -25,11 +25,13 @@
*
*/
-#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
-
#include "radeon_code.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
void rc_constants_init(struct rc_constant_list * c)
{
memset(c, 0, sizeof(*c));
@@ -143,7 +145,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
for(unsigned comp = 0; comp < c->Constants[index].Size; ++comp) {
if (c->Constants[index].u.Immediate[comp] == data) {
- *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
return index;
}
}
@@ -156,7 +158,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
if (free_index >= 0) {
unsigned comp = c->Constants[free_index].Size++;
c->Constants[free_index].u.Immediate[comp] = data;
- *swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
+ *swizzle = RC_MAKE_SWIZZLE(comp, comp, comp, comp);
return free_index;
}
@@ -164,7 +166,7 @@ unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float da
constant.Type = RC_CONSTANT_IMMEDIATE;
constant.Size = 1;
constant.u.Immediate[0] = data;
- *swizzle = SWIZZLE_XXXX;
+ *swizzle = RC_SWIZZLE_XXXX;
return rc_constants_add(c, &constant);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 3e88554ba1..902b7cfa53 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -89,6 +89,23 @@ unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const floa
unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+ RC_COMPARE_FUNC_NEVER = 0,
+ RC_COMPARE_FUNC_LESS,
+ RC_COMPARE_FUNC_EQUAL,
+ RC_COMPARE_FUNC_LEQUAL,
+ RC_COMPARE_FUNC_GREATER,
+ RC_COMPARE_FUNC_NOTEQUAL,
+ RC_COMPARE_FUNC_GEQUAL,
+ RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
@@ -105,10 +122,12 @@ struct r300_fragment_program_external_state {
/**
* If the sampler is used as a shadow sampler,
- * this field is (texture_compare_func - GL_NEVER).
- * [e.g. if compare function is GL_LEQUAL, this field is 3]
+ * this field specifies the compare function.
+ *
+ * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
*
* Otherwise, this field is 0.
+ * \sa rc_compare_func
*/
unsigned texture_compare_func : 3;
} unit[16];
@@ -163,6 +182,8 @@ struct r500_fragment_program_code {
int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
int max_temp_idx;
+
+ uint32_t us_fc_ctrl;
};
struct rX00_fragment_program_code {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index da950d5289..c0e7a7f7a0 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -23,6 +23,8 @@
#include "radeon_compiler.h"
#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
#include "radeon_program.h"
@@ -34,7 +36,7 @@ void rc_init(struct radeon_compiler * c)
memory_pool_init(&c->Pool);
c->Program.Instructions.Prev = &c->Program.Instructions;
c->Program.Instructions.Next = &c->Program.Instructions;
- c->Program.Instructions.I.Opcode = OPCODE_END;
+ c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
}
void rc_destroy(struct radeon_compiler * c)
@@ -60,7 +62,7 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
{
va_list ap;
- c->Error = GL_TRUE;
+ c->Error = 1;
if (!c->ErrorMsg) {
/* Only remember the first error */
@@ -91,28 +93,63 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...)
}
}
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+ }
+ }
+}
+
/**
* Rewrite the program such that everything that source the given input
* register will source new_input instead.
*/
-void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input)
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
{
struct rc_instruction * inst;
c->Program.InputsRead &= ~(1 << input);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
- for(i = 0; i < numsrcs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT && inst->I.SrcReg[i].Index == input) {
- inst->I.SrcReg[i].File = new_input.File;
- inst->I.SrcReg[i].Index = new_input.Index;
- inst->I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->I.SrcReg[i].Swizzle);
- if (!inst->I.SrcReg[i].Abs) {
- inst->I.SrcReg[i].Negate ^= new_input.Negate;
- inst->I.SrcReg[i].Abs = new_input.Abs;
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+ inst->U.I.SrcReg[i].File = new_input.File;
+ inst->U.I.SrcReg[i].Index = new_input.Index;
+ inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+ if (!inst->U.I.SrcReg[i].Abs) {
+ inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->U.I.SrcReg[i].Abs = new_input.Abs;
}
c->Program.InputsRead |= 1 << new_input.Index;
@@ -134,12 +171,12 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
c->Program.OutputsWritten &= ~(1 << output);
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
- inst->I.DstReg.Index = new_output;
- inst->I.DstReg.WriteMask &= writemask;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.Index = new_output;
+ inst->U.I.DstReg.WriteMask &= writemask;
c->Program.OutputsWritten |= 1 << new_output;
}
@@ -157,33 +194,33 @@ void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_ou
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- if (numdsts) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT && inst->I.DstReg.Index == output) {
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = tempreg;
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tempreg;
}
}
}
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = output;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = output;
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = tempreg;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_OUTPUT;
- inst->I.DstReg.Index = dup_output;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = dup_output;
- inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[0].Index = tempreg;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
c->Program.OutputsWritten |= 1 << dup_output;
}
@@ -201,60 +238,60 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
/* perspective divide */
struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
- inst_rcp->I.Opcode = OPCODE_RCP;
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
- inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_rcp->I.DstReg.Index = tempregi;
- inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tempregi;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
- inst_rcp->I.SrcReg[0].File = PROGRAM_INPUT;
- inst_rcp->I.SrcReg[0].Index = new_input;
- inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
+ inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_rcp->U.I.SrcReg[0].Index = new_input;
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
struct rc_instruction * inst_mul = rc_insert_new_instruction(c, inst_rcp);
- inst_mul->I.Opcode = OPCODE_MUL;
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
- inst_mul->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mul->I.DstReg.Index = tempregi;
- inst_mul->I.DstReg.WriteMask = WRITEMASK_XYZ;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = tempregi;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mul->I.SrcReg[0].File = PROGRAM_INPUT;
- inst_mul->I.SrcReg[0].Index = new_input;
+ inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_mul->U.I.SrcReg[0].Index = new_input;
- inst_mul->I.SrcReg[1].File = PROGRAM_TEMPORARY;
- inst_mul->I.SrcReg[1].Index = tempregi;
- inst_mul->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = tempregi;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
/* viewport transformation */
struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_mul);
- inst_mad->I.Opcode = OPCODE_MAD;
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
- inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
- inst_mad->I.DstReg.Index = tempregi;
- inst_mad->I.DstReg.WriteMask = WRITEMASK_XYZ;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tempregi;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
- inst_mad->I.SrcReg[0].File = PROGRAM_TEMPORARY;
- inst_mad->I.SrcReg[0].Index = tempregi;
- inst_mad->I.SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = tempregi;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
- inst_mad->I.SrcReg[1].File = PROGRAM_STATE_VAR;
- inst_mad->I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
- inst_mad->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
- inst_mad->I.SrcReg[2].File = PROGRAM_STATE_VAR;
- inst_mad->I.SrcReg[2].Index = inst_mad->I.SrcReg[1].Index;
- inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[2].Index = inst_mad->U.I.SrcReg[1].Index;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO);
struct rc_instruction * inst;
for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
- const unsigned numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
unsigned i;
- for(i = 0; i < numsrcs; i++) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT &&
- inst->I.SrcReg[i].Index == wpos) {
- inst->I.SrcReg[i].File = PROGRAM_TEMPORARY;
- inst->I.SrcReg[i].Index = tempregi;
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == wpos) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
}
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index e63ab8840a..87a732cd90 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -23,35 +23,11 @@
#ifndef RADEON_COMPILER_H
#define RADEON_COMPILER_H
-#include "main/mtypes.h"
-#include "shader/prog_instruction.h"
-
#include "memory_pool.h"
#include "radeon_code.h"
+#include "radeon_program.h"
-
-struct rc_instruction {
- struct rc_instruction * Prev;
- struct rc_instruction * Next;
- struct prog_instruction I;
-};
-
-struct rc_program {
- /**
- * Instructions.Next points to the first instruction,
- * Instructions.Prev points to the last instruction.
- */
- struct rc_instruction Instructions;
-
- /* Long term, we should probably remove InputsRead & OutputsWritten,
- * since updating dependent state can be fragile, and they aren't
- * actually used very often. */
- uint32_t InputsRead;
- uint32_t OutputsWritten;
- uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
-
- struct rc_constant_list Constants;
-};
+struct rc_swizzle_caps;
struct radeon_compiler {
struct memory_pool Pool;
@@ -59,6 +35,14 @@ struct radeon_compiler {
unsigned Debug:1;
unsigned Error:1;
char * ErrorMsg;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
};
void rc_init(struct radeon_compiler * c);
@@ -67,11 +51,26 @@ void rc_destroy(struct radeon_compiler * c);
void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
void rc_error(struct radeon_compiler * c, const char * fmt, ...);
-void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
void rc_calculate_inputs_outputs(struct radeon_compiler * c);
-void rc_move_input(struct radeon_compiler * c, unsigned input, struct prog_src_register new_input);
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input);
@@ -97,7 +96,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
struct r300_vertex_program_compiler {
struct radeon_compiler Base;
struct r300_vertex_program_code *code;
- GLbitfield RequiredOutputs;
+ uint32_t RequiredOutputs;
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000000..cce9166e64
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_program.h"
+
+
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+
+ if (inst->SrcReg[src].File == RC_FILE_NONE)
+ return;
+
+ for(unsigned int chan = 0; chan < 4; ++chan)
+ refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
+
+ refmask &= RC_MASK_XYZW;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(refmask, chan)) {
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
+ }
+ }
+
+ if (refmask && inst->SrcReg[src].RelAddr)
+ cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int refmasks[3] = { 0, 0, 0 };
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ for(unsigned int chan = 0; chan < 3; ++chan) {
+ unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+ if (swz < 4)
+ refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+ }
+ }
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ if (inst->Alpha.Arg[arg].Swizzle < 4)
+ refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+ }
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ for(unsigned int chan = 0; chan < 3; ++chan) {
+ if (GET_BIT(refmasks[src], chan))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
+ }
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ if (GET_BIT(refmasks[src], 3))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
+ }
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ reads_normal(inst, cb, userdata);
+ } else {
+ reads_pair(inst, cb, userdata);
+ }
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(inst->DstReg.WriteMask, chan))
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
+ }
+ }
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ for(unsigned int chan = 0; chan < 3; ++chan) {
+ if (GET_BIT(inst->RGB.WriteMask, chan))
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
+ }
+
+ if (inst->Alpha.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ writes_normal(inst, cb, userdata);
+ } else {
+ writes_pair(inst, cb, userdata);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000000..5aa4cb64f3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+
+/**
+ * Help analyze the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000000..e0c66c4aeb
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+ unsigned char Output[RC_REGISTER_MAX_INDEX];
+ unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+ unsigned char Address;
+ unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+ unsigned char WriteMask:4;
+ unsigned char WriteALUResult:1;
+ unsigned char SrcReg[3];
+};
+
+struct branchinfo {
+ unsigned int HaveElse:1;
+
+ struct updatemask_state StoreEndif;
+ struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+ struct radeon_compiler * C;
+ struct instruction_state * Instructions;
+
+ struct updatemask_state R;
+
+ struct branchinfo * BranchStack;
+ unsigned int BranchStackSize;
+ unsigned int BranchStackReserved;
+};
+
+
+static void or_updatemasks(
+ struct updatemask_state * dst,
+ struct updatemask_state * a,
+ struct updatemask_state * b)
+{
+ for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+ dst->Output[i] = a->Output[i] | b->Output[i];
+ dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+ }
+
+ for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+ dst->Special[i] = a->Special[i] | b->Special[i];
+
+ dst->Address = a->Address | b->Address;
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+ if (s->BranchStackSize >= s->BranchStackReserved) {
+ unsigned int new_reserve = 2 * s->BranchStackReserved;
+ struct branchinfo * new_stack;
+
+ if (!new_reserve)
+ new_reserve = 4;
+
+ new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct branchinfo));
+ memcpy(new_stack, s->BranchStack, s->BranchStackSize * sizeof(struct branchinfo));
+
+ s->BranchStack = new_stack;
+ s->BranchStackReserved = new_reserve;
+ }
+
+ struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch->HaveElse = 0;
+ memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+ return 0;
+ }
+
+ if (file == RC_FILE_OUTPUT)
+ return &s->R.Output[index];
+ else
+ return &s->R.Temporary[index];
+ } else if (file == RC_FILE_ADDRESS) {
+ return &s->R.Address;
+ } else if (file == RC_FILE_SPECIAL) {
+ if (index >= RC_NUM_SPECIAL_REGISTERS) {
+ rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->R.Special[index];
+ }
+
+ return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned char * pused = get_used_ptr(s, file, index);
+ if (pused)
+ *pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ struct instruction_state * insts = &s->Instructions[inst->IP];
+ unsigned int usedmask = 0;
+
+ if (opcode->HasDstReg) {
+ unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+ if (pused) {
+ usedmask = *pused & inst->U.I.DstReg.WriteMask;
+ *pused &= ~usedmask;
+ }
+ }
+
+ insts->WriteMask |= usedmask;
+
+ if (inst->U.I.WriteALUResult) {
+ unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+ if (pused && *pused) {
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usedmask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usedmask |= RC_MASK_W;
+
+ *pused = 0;
+ insts->WriteALUResult = 1;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(opcode, usedmask, srcmasks);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+ unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+ insts->SrcReg[src] |= newsrcmask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(newsrcmask, chan))
+ refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ continue;
+
+ mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+ if (inst->U.I.SrcReg[src].RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct deadcode_state * s = data;
+
+ mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct deadcode_state s;
+ unsigned int nr_instructions;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ nr_instructions = rc_recompute_ips(c);
+ s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+ memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+ dce(userdata, &s, &mark_output_use);
+
+ for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+ inst != &c->Program.Instructions;
+ inst = inst->Prev) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ if (opcode->Opcode == RC_OPCODE_ENDIF) {
+ push_branch(&s);
+ } else {
+ if (s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+ } else {
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
+ }
+ } else {
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ }
+ } else {
+ rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ }
+ }
+ }
+
+ update_instruction(&s, inst);
+ }
+
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next, ++ip) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);\
+ int dead = 1;
+
+ if (!opcode->HasDstReg) {
+ dead = 0;
+ } else {
+ inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+ if (s.Instructions[ip].WriteMask)
+ dead = 0;
+
+ if (s.Instructions[ip].WriteALUResult)
+ dead = 0;
+ else
+ inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+ }
+
+ if (dead) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+
+ unsigned int srcmasks[3];
+ unsigned int usemask = s.Instructions[ip].WriteMask;
+
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usemask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usemask |= RC_MASK_W;
+
+ rc_compute_sources_for_writemask(opcode, usemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000000..33acbd30f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask;
+
+ usemask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+ for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->U.I.Opcode = RC_OPCODE_MOV;
+ mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->U.I.DstReg.Index = tempreg;
+ mov->U.I.DstReg.WriteMask = split.Phase[phase];
+ mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->U.I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ }
+
+ inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[src].Index = tempreg;
+ inst->U.I.SrcReg[src].Swizzle = 0;
+ inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->U.I.SrcReg[src].Abs = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
deleted file mode 100644
index aaaa50ad1f..0000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file
- *
- * "Not-quite SSA" and Dead-Code Elimination.
- *
- * @note This code uses SWIZZLE_NIL in a source register to indicate that
- * the corresponding component is ignored by the corresponding instruction.
- */
-
-#include "radeon_nqssadce.h"
-
-#include "radeon_compiler.h"
-
-
-/**
- * Return the @ref register_state for the given register (or 0 for untracked
- * registers, i.e. constants).
- */
-static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_OUTPUT: return &s->Outputs[index];
- case PROGRAM_ADDRESS: return &s->Address;
- default: return 0;
- }
-}
-
-
-/**
- * Left multiplication of a register with a swizzle
- *
- * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
- */
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
-{
- struct prog_src_register tmp = srcreg;
- int i;
- tmp.Swizzle = 0;
- tmp.Negate = NEGATE_NONE;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(swizzle, i);
- if (swz < 4) {
- tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
- tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
- } else {
- tmp.Swizzle |= swz << (i*3);
- }
- }
- return tmp;
-}
-
-
-static void track_used_srcreg(struct nqssadce_state* s,
- GLint src, GLuint sourced)
-{
- struct prog_instruction * inst = &s->IP->I;
- int i;
- GLuint deswz_source = 0;
-
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i)) {
- GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
- deswz_source |= 1 << swz;
- } else {
- inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- }
-
- if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
- struct prog_dst_register dstreg = inst->DstReg;
- dstreg.File = PROGRAM_TEMPORARY;
- dstreg.Index = rc_find_free_temporary(s->Compiler);
- dstreg.WriteMask = sourced;
-
- s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
-
- inst->SrcReg[src].File = PROGRAM_TEMPORARY;
- inst->SrcReg[src].Index = dstreg.Index;
- inst->SrcReg[src].Swizzle = 0;
- inst->SrcReg[src].Negate = NEGATE_NONE;
- inst->SrcReg[src].Abs = 0;
- for(i = 0; i < 4; ++i) {
- if (GET_BIT(sourced, i))
- inst->SrcReg[src].Swizzle |= i << (3*i);
- else
- inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
- }
- deswz_source = sourced;
- }
-
- struct register_state *regstate;
-
- if (inst->SrcReg[src].RelAddr) {
- regstate = get_reg_state(s, PROGRAM_ADDRESS, 0);
- if (regstate)
- regstate->Sourced |= WRITEMASK_X;
- } else {
- regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
- if (regstate)
- regstate->Sourced |= deswz_source & 0xf;
- }
-}
-
-static void unalias_srcregs(struct rc_instruction *inst, GLuint oldindex, GLuint newindex)
-{
- int nsrc = _mesa_num_inst_src_regs(inst->I.Opcode);
- int i;
- for(i = 0; i < nsrc; ++i)
- if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY && inst->I.SrcReg[i].Index == oldindex)
- inst->I.SrcReg[i].Index = newindex;
-}
-
-static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
-{
- GLuint newindex = rc_find_free_temporary(s->Compiler);
- struct rc_instruction * inst;
- for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) {
- if (inst->I.DstReg.File == PROGRAM_TEMPORARY && inst->I.DstReg.Index == oldindex)
- inst->I.DstReg.Index = newindex;
- unalias_srcregs(inst, oldindex, newindex);
- }
- unalias_srcregs(s->IP, oldindex, newindex);
-}
-
-
-/**
- * Handle one instruction.
- */
-static void process_instruction(struct nqssadce_state* s)
-{
- struct prog_instruction *inst = &s->IP->I;
- GLuint WriteMask;
-
- if (inst->Opcode == OPCODE_END)
- return;
-
- if (inst->Opcode != OPCODE_KIL) {
- struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
- if (!regstate) {
- rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n",
- inst->DstReg.File, inst->DstReg.Index);
- return;
- }
-
- inst->DstReg.WriteMask &= regstate->Sourced;
- regstate->Sourced &= ~inst->DstReg.WriteMask;
-
- if (inst->DstReg.WriteMask == 0) {
- struct rc_instruction * inst_remove = s->IP;
- s->IP = s->IP->Prev;
- rc_remove_instruction(inst_remove);
- return;
- }
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
- unalias_temporary(s, inst->DstReg.Index);
- }
-
- WriteMask = inst->DstReg.WriteMask;
-
- switch (inst->Opcode) {
- case OPCODE_ARL:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MOV:
- track_used_srcreg(s, 0, WriteMask);
- break;
- case OPCODE_ADD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MUL:
- case OPCODE_SGE:
- case OPCODE_SLT:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- break;
- case OPCODE_CMP:
- case OPCODE_MAD:
- track_used_srcreg(s, 0, WriteMask);
- track_used_srcreg(s, 1, WriteMask);
- track_used_srcreg(s, 2, WriteMask);
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- track_used_srcreg(s, 0, 0x1);
- break;
- case OPCODE_DP3:
- track_used_srcreg(s, 0, 0x7);
- track_used_srcreg(s, 1, 0x7);
- break;
- case OPCODE_DP4:
- track_used_srcreg(s, 0, 0xf);
- track_used_srcreg(s, 1, 0xf);
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- track_used_srcreg(s, 0, 0xf);
- break;
- case OPCODE_DST:
- track_used_srcreg(s, 0, 0x6);
- track_used_srcreg(s, 1, 0xa);
- break;
- case OPCODE_EXP:
- case OPCODE_LOG:
- case OPCODE_POW:
- track_used_srcreg(s, 0, 0x3);
- break;
- case OPCODE_LIT:
- track_used_srcreg(s, 0, 0xb);
- break;
- default:
- rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
- return;
- }
-
- s->IP = s->IP->Prev;
-}
-
-void rc_calculate_inputs_outputs(struct radeon_compiler * c)
-{
- struct rc_instruction *inst;
-
- c->Program.InputsRead = 0;
- c->Program.OutputsWritten = 0;
-
- for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
- {
- int i;
- int num_src_regs = _mesa_num_inst_src_regs(inst->I.Opcode);
-
- for (i = 0; i < num_src_regs; ++i) {
- if (inst->I.SrcReg[i].File == PROGRAM_INPUT)
- c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index;
- }
-
- if (_mesa_num_inst_dst_regs(inst->I.Opcode)) {
- if (inst->I.DstReg.File == PROGRAM_OUTPUT)
- c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index;
- }
- }
-}
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data)
-{
- struct nqssadce_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Compiler = c;
- s.Descr = descr;
- s.UserData = data;
- s.Descr->Init(&s);
- s.IP = c->Program.Instructions.Prev;
-
- while(s.IP != &c->Program.Instructions && !c->Error)
- process_instruction(&s);
-
- rc_calculate_inputs_outputs(c);
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
deleted file mode 100644
index b3fc77a35a..0000000000
--- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2008 Nicolai Haehnle.
- *
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_PROGRAM_NQSSADCE_H_
-#define __RADEON_PROGRAM_NQSSADCE_H_
-
-#include "radeon_program.h"
-
-struct register_state {
- /**
- * Bitmask indicating which components of the register are sourced
- * by later instructions.
- */
- GLuint Sourced : 4;
-};
-
-/**
- * Maintain state such as which registers are used, which registers are
- * read from, etc.
- */
-struct nqssadce_state {
- struct radeon_compiler *Compiler;
- struct radeon_nqssadce_descr *Descr;
-
- /**
- * All instructions after this instruction pointer have been dealt with.
- */
- struct rc_instruction * IP;
-
- /**
- * Which registers are read by subsequent instructions?
- */
- struct register_state Temps[MAX_PROGRAM_TEMPS];
- struct register_state Outputs[VERT_RESULT_MAX];
- struct register_state Address;
-
- void * UserData;
-};
-
-
-/**
- * This structure contains a description of the hardware in-so-far as
- * it is required for the NqSSA-DCE pass.
- */
-struct radeon_nqssadce_descr {
- /**
- * Fill in which outputs
- */
- void (*Init)(struct nqssadce_state *);
-
- /**
- * Check whether the given swizzle, absolute and negate combination
- * can be implemented natively by the hardware for this opcode.
- */
- GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
-
- /**
- * Emit (at the current IP) the instruction MOV dst, src;
- * The transformation will work recursively on the emitted instruction(s).
- */
- void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
-};
-
-void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data);
-struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg);
-
-#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000000..c1c0181fac
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,429 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+ {
+ .Opcode = RC_OPCODE_NOP,
+ .Name = "NOP"
+ },
+ {
+ .Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+ .Name = "ILLEGAL OPCODE"
+ },
+ {
+ .Opcode = RC_OPCODE_ABS,
+ .Name = "ABS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ADD,
+ .Name = "ADD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ARL,
+ .Name = "ARL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CMP,
+ .Name = "CMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_COS,
+ .Name = "COS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDX,
+ .Name = "DDX",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDY,
+ .Name = "DDY",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP3,
+ .Name = "DP3",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP4,
+ .Name = "DP4",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DPH,
+ .Name = "DPH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DST,
+ .Name = "DST",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EX2,
+ .Name = "EX2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EXP,
+ .Name = "EXP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FLR,
+ .Name = "FLR",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FRC,
+ .Name = "FRC",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_KIL,
+ .Name = "KIL",
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LG2,
+ .Name = "LG2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LIT,
+ .Name = "LIT",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LOG,
+ .Name = "LOG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LRP,
+ .Name = "LRP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAD,
+ .Name = "MAD",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAX,
+ .Name = "MAX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MIN,
+ .Name = "MIN",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MOV,
+ .Name = "MOV",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MUL,
+ .Name = "MUL",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_POW,
+ .Name = "POW",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RCP,
+ .Name = "RCP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RSQ,
+ .Name = "RSQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SCS,
+ .Name = "SCS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SEQ,
+ .Name = "SEQ",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SFL,
+ .Name = "SFL",
+ .NumSrcRegs = 0,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGE,
+ .Name = "SGE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGT,
+ .Name = "SGT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SIN,
+ .Name = "SIN",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLE,
+ .Name = "SLE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLT,
+ .Name = "SLT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SNE,
+ .Name = "SNE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SUB,
+ .Name = "SUB",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SWZ,
+ .Name = "SWZ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_XPD,
+ .Name = "XPD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TEX,
+ .Name = "TEX",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXB,
+ .Name = "TXB",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXD,
+ .Name = "TXD",
+ .HasTexture = 1,
+ .NumSrcRegs = 3,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXL,
+ .Name = "TXL",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXP,
+ .Name = "TXP",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_IF,
+ .Name = "IF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ELSE,
+ .Name = "ELSE",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDIF,
+ .Name = "ENDIF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_REPL_ALPHA,
+ .Name = "REPL_ALPHA",
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_BEGIN_TEX,
+ .Name = "BEGIN_TEX"
+ }
+};
+
+void rc_compute_sources_for_writemask(
+ const struct rc_opcode_info * opcode,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (opcode->Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+ else if (opcode->Opcode == RC_OPCODE_IF)
+ srcmasks[0] |= RC_MASK_X;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(opcode->Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TEX:
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= 0x6;
+ srcmasks[1] |= 0xa;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= 0xb;
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000000..a3c5b86954
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+ RC_OPCODE_NOP = 0,
+ RC_OPCODE_ILLEGAL_OPCODE,
+
+ /** vec4 instruction: dst.c = abs(src0.c); */
+ RC_OPCODE_ABS,
+
+ /** vec4 instruction: dst.c = src0.c + src1.c; */
+ RC_OPCODE_ADD,
+
+ /** special instruction: load address register
+ * dst.x = floor(src.x), where dst must be an address register */
+ RC_OPCODE_ARL,
+
+ /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+ RC_OPCODE_CMP,
+
+ /** scalar instruction: dst = cos(src0.x) */
+ RC_OPCODE_COS,
+
+ /** special instruction: take vec4 partial derivative in X direction
+ * dst.c = d src0.c / dx */
+ RC_OPCODE_DDX,
+
+ /** special instruction: take vec4 partial derivative in Y direction
+ * dst.c = d src0.c / dy */
+ RC_OPCODE_DDY,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+ RC_OPCODE_DP3,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+ RC_OPCODE_DP4,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+ RC_OPCODE_DPH,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_DST,
+
+ /** scalar instruction: dst = 2**src0.x */
+ RC_OPCODE_EX2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_EXP,
+
+ /** vec4 instruction: dst.c = floor(src0.c) */
+ RC_OPCODE_FLR,
+
+ /** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+ RC_OPCODE_FRC,
+
+ /** special instruction: stop execution if any component of src0 is negative */
+ RC_OPCODE_KIL,
+
+ /** scalar instruction: dst = log_2(src0.x) */
+ RC_OPCODE_LG2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LIT,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LOG,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+ RC_OPCODE_LRP,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+ RC_OPCODE_MAD,
+
+ /** vec4 instruction: dst.c = max(src0.c, src1.c) */
+ RC_OPCODE_MAX,
+
+ /** vec4 instruction: dst.c = min(src0.c, src1.c) */
+ RC_OPCODE_MIN,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_MOV,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c */
+ RC_OPCODE_MUL,
+
+ /** scalar instruction: dst = src0.x ** src1.x */
+ RC_OPCODE_POW,
+
+ /** scalar instruction: dst = 1 / src0.x */
+ RC_OPCODE_RCP,
+
+ /** scalar instruction: dst = 1 / sqrt(src0.x) */
+ RC_OPCODE_RSQ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_SCS,
+
+ /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SEQ,
+
+ /** vec4 instruction: dst.c = 0.0 */
+ RC_OPCODE_SFL,
+
+ /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGE,
+
+ /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGT,
+
+ /** scalar instruction: dst = sin(src0.x) */
+ RC_OPCODE_SIN,
+
+ /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLE,
+
+ /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLT,
+
+ /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SNE,
+
+ /** vec4 instruction: dst.c = src0.c - src1.c */
+ RC_OPCODE_SUB,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_SWZ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_XPD,
+
+ RC_OPCODE_TEX,
+ RC_OPCODE_TXB,
+ RC_OPCODE_TXD,
+ RC_OPCODE_TXL,
+ RC_OPCODE_TXP,
+
+ /** branch instruction:
+ * If src0.x != 0.0, continue with the next instruction;
+ * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+ */
+ RC_OPCODE_IF,
+
+ /** branch instruction: jump to matching RC_OPCODE_ENDIF */
+ RC_OPCODE_ELSE,
+
+ /** branch instruction: has no effect */
+ RC_OPCODE_ENDIF,
+
+ /** special instruction, used in R300-R500 fragment program pair instructions
+ * indicates that the result of the alpha operation shall be replicated
+ * across all other channels */
+ RC_OPCODE_REPL_ALPHA,
+
+ /** special instruction, used in R300-R500 fragment programs
+ * to indicate the start of a block of texture instructions that
+ * can run simultaneously. */
+ RC_OPCODE_BEGIN_TEX,
+
+ MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+ rc_opcode Opcode;
+ const char * Name;
+
+ /** true if the instruction reads from a texture.
+ *
+ * \note This is false for the KIL instruction, even though KIL is
+ * a texture instruction from a hardware point of view. */
+ unsigned int HasTexture:1;
+
+ unsigned int NumSrcRegs:2;
+ unsigned int HasDstReg:1;
+
+ /** true if this instruction affects control flow */
+ unsigned int IsFlowControl:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+ assert((unsigned int)opcode < MAX_RC_OPCODE);
+ assert(rc_opcodes[opcode].Opcode == opcode);
+
+ return &rc_opcodes[opcode];
+}
+
+void rc_compute_sources_for_writemask(
+ const struct rc_opcode_info * opcode,
+ unsigned int writemask,
+ unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000000..828d0c8e28
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct live_intervals {
+ int Start;
+ int End;
+ struct live_intervals * Next;
+};
+
+struct register_info {
+ struct live_intervals Live;
+
+ unsigned int Used:1;
+ unsigned int Allocated:1;
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct hardware_register {
+ struct live_intervals * Used;
+};
+
+struct regalloc_state {
+ struct radeon_compiler * C;
+
+ struct register_info Input[RC_REGISTER_MAX_INDEX];
+ struct register_info Temporary[RC_REGISTER_MAX_INDEX];
+
+ struct hardware_register * HwTemporary;
+ unsigned int NumHwTemporaries;
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+ if (!src) {
+ DBG("(null)");
+ return;
+ }
+
+ while(src) {
+ DBG("(%i,%i)", src->Start, src->End);
+ src = src->Next;
+ }
+}
+
+static void add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ struct live_intervals ** dst_backup = dst;
+
+ if (VERBOSE) {
+ DBG("add_live_intervals: ");
+ print_live_intervals(*dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src) {
+ if (*dst && (*dst)->End < src->Start) {
+ dst = &(*dst)->Next;
+ } else if (!*dst || (*dst)->Start > src->End) {
+ struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
+ li->Start = src->Start;
+ li->End = src->End;
+ li->Next = *dst;
+ *dst = li;
+ src = src->Next;
+ } else {
+ if (src->End > (*dst)->End)
+ (*dst)->End = src->End;
+ if (src->Start < (*dst)->Start)
+ (*dst)->Start = src->Start;
+ src = src->Next;
+ }
+ }
+
+ if (VERBOSE) {
+ DBG(" result: ");
+ print_live_intervals(*dst_backup);
+ DBG("\n");
+ }
+}
+
+static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
+{
+ if (VERBOSE) {
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src && dst) {
+ if (dst->End <= src->Start) {
+ dst = dst->Next;
+ } else if (dst->End <= src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else if (dst->Start < src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else {
+ src = src->Next;
+ }
+ }
+
+ DBG(" no overlap\n");
+
+ return 0;
+}
+
+static int try_add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ if (overlap_live_intervals(*dst, src))
+ return 0;
+
+ add_live_intervals(s, dst, src);
+ return 1;
+}
+
+static void scan_callback(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct regalloc_state * s = data;
+ struct register_info * reg;
+
+ if (file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[index];
+ else if (file == RC_FILE_INPUT)
+ reg = &s->Input[index];
+ else
+ return;
+
+ if (!reg->Used) {
+ reg->Used = 1;
+ if (file == RC_FILE_INPUT)
+ reg->Live.Start = -1;
+ else
+ reg->Live.Start = inst->IP;
+ reg->Live.End = inst->IP;
+ } else {
+ if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
+ }
+}
+
+static void compute_live_intervals(struct regalloc_state * s)
+{
+ rc_recompute_ips(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads(inst, scan_callback, s);
+ rc_for_all_writes(inst, scan_callback, s);
+ }
+}
+
+static void rewrite_register(struct regalloc_state * s,
+ rc_register_file * file, unsigned int * index)
+{
+ const struct register_info * reg;
+
+ if (*file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[*index];
+ else if (*file == RC_FILE_INPUT)
+ reg = &s->Input[*index];
+ else
+ return;
+
+ if (reg->Allocated) {
+ *file = reg->File;
+ *index = reg->Index;
+ }
+}
+
+static void rewrite_normal_instruction(struct regalloc_state * s, struct rc_sub_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ rc_register_file file = inst->DstReg.File;
+ unsigned int index = inst->DstReg.Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->DstReg.File = file;
+ inst->DstReg.Index = index;
+ }
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ rc_register_file file = inst->SrcReg[src].File;
+ unsigned int index = inst->SrcReg[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
+}
+
+static void rewrite_pair_instruction(struct regalloc_state * s, struct rc_pair_instruction * inst)
+{
+ if (inst->RGB.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->RGB.DestIndex;
+
+ rewrite_register(s, &file, &index);
+
+ inst->RGB.DestIndex = index;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->Alpha.DestIndex;
+
+ rewrite_register(s, &file, &index);
+
+ inst->Alpha.DestIndex = index;
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ rc_register_file file = inst->RGB.Src[src].File;
+ unsigned int index = inst->RGB.Src[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->RGB.Src[src].File = file;
+ inst->RGB.Src[src].Index = index;
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ rc_register_file file = inst->Alpha.Src[src].File;
+ unsigned int index = inst->Alpha.Src[src].Index;
+
+ rewrite_register(s, &file, &index);
+
+ inst->Alpha.Src[src].File = file;
+ inst->Alpha.Src[src].Index = index;
+ }
+ }
+}
+
+static void do_regalloc(struct regalloc_state * s)
+{
+ /* Simple and stupid greedy register allocation */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ struct register_info * reg = &s->Temporary[index];
+
+ if (!reg->Used)
+ continue;
+
+ for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
+ if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
+ reg->Allocated = 1;
+ reg->File = RC_FILE_TEMPORARY;
+ reg->Index = hwreg;
+ goto success;
+ }
+ }
+
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+
+ success:;
+ }
+
+ /* Rewrite all instructions based on the translation table we built */
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ rewrite_normal_instruction(s, &inst->U.I);
+ else
+ rewrite_pair_instruction(s, &inst->U.P);
+ }
+}
+
+static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+{
+ struct regalloc_state * s = data;
+
+ if (!s->Input[input].Used)
+ return;
+
+ add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+
+}
+
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
+{
+ struct regalloc_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+ s.NumHwTemporaries = maxtemps;
+ s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
+ memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
+
+ compute_live_intervals(&s);
+
+ c->AllocateHwInputs(c, &alloc_input, &s);
+
+ do_regalloc(&s);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000000..df67aafe02
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+ struct rc_instruction * Instruction;
+
+ /** Next instruction in the linked list of ready instructions. */
+ struct schedule_instruction *NextReady;
+
+ /** Values that this instruction reads and writes */
+ struct reg_value * WriteValues[4];
+ struct reg_value * ReadValues[12];
+ unsigned int NumWriteValues:3;
+ unsigned int NumReadValues:4;
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ unsigned int NumDependencies:5;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ struct schedule_instruction *Reader;
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+ struct schedule_instruction * Writer;
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ * When this value becomes available, we increase all readers'
+ * dependency count.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is decremented each time
+ * a reader of the value is committed.
+ * When the reader cound reaches zero, the dependency count
+ * of the instruction writing \ref Next is decremented.
+ */
+ unsigned int NumReaders;
+
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+ struct reg_value * Values[4];
+};
+
+struct schedule_state {
+ struct radeon_compiler * C;
+ struct schedule_instruction * Current;
+
+ struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+ /**
+ * Linked lists of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ /*@{*/
+ struct schedule_instruction *ReadyFullALU;
+ struct schedule_instruction *ReadyRGB;
+ struct schedule_instruction *ReadyAlpha;
+ struct schedule_instruction *ReadyTEX;
+ /*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ if (file != RC_FILE_TEMPORARY)
+ return 0;
+
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->Temporary[index].Values[chan];
+}
+
+static struct reg_value * get_reg_value(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ if (!pv)
+ return 0;
+ return *pv;
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+ inst->NextReady = *list;
+ *list = inst;
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i is now ready\n", sinst->Instruction->IP);
+
+ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+ add_inst_to_list(&s->ReadyTEX, sinst);
+ else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyRGB, sinst);
+ else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyAlpha, sinst);
+ else
+ add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ assert(sinst->NumDependencies > 0);
+ sinst->NumDependencies--;
+ if (!sinst->NumDependencies)
+ instruction_ready(s, sinst);
+}
+
+static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i: commit\n", sinst->Instruction->IP);
+
+ for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
+ struct reg_value * v = sinst->ReadValues[i];
+ assert(v->NumReaders > 0);
+ v->NumReaders--;
+ if (!v->NumReaders) {
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+
+ for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
+ struct reg_value * v = sinst->WriteValues[i];
+ if (v->NumReaders) {
+ for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+ decrease_dependencies(s, r->Reader);
+ }
+ } else {
+ /* This happens in instruction sequences of the type
+ * OP r.x, ...;
+ * OP r.x, r.x, ...;
+ * See also the subtlety in how instructions that both
+ * read and write the same register are scanned.
+ */
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+ struct schedule_instruction *readytex;
+
+ assert(s->ReadyTEX);
+
+ /* Don't let the ready list change under us! */
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ /* Node marker for R300 */
+ struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+ /* Link texture instructions back in */
+ while(readytex) {
+ struct schedule_instruction * tex = readytex;
+ readytex = readytex->NextReady;
+
+ rc_insert_instruction(before->Prev, tex->Instruction);
+ commit_instruction(s, tex);
+ }
+}
+
+
+static int destructive_merge_instructions(
+ struct rc_pair_instruction * rgb,
+ struct rc_pair_instruction * alpha)
+{
+ assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+ assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+ /* Copy alpha args into rgb */
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+ rc_register_file file = 0;
+ unsigned int index = 0;
+
+ if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+ srcrgb = 1;
+ file = alpha->RGB.Src[oldsrc].File;
+ index = alpha->RGB.Src[oldsrc].Index;
+ } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+ srcalpha = 1;
+ file = alpha->Alpha.Src[oldsrc].File;
+ index = alpha->Alpha.Src[oldsrc].Index;
+ }
+
+ int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ if (source < 0)
+ return 0;
+
+ rgb->Alpha.Arg[arg].Source = source;
+ rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+ rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+ rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+ }
+
+ /* Copy alpha opcode into rgb */
+ rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+ rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+ rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+ rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+ rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+ rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+ /* Merge ALU result writing */
+ if (alpha->WriteALUResult) {
+ if (rgb->WriteALUResult)
+ return 0;
+
+ rgb->WriteALUResult = alpha->WriteALUResult;
+ rgb->ALUResultCompare = alpha->ALUResultCompare;
+ }
+
+ return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+ struct rc_pair_instruction backup;
+
+ memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+ if (destructive_merge_instructions(rgb, alpha))
+ return 1;
+
+ memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+ return 0;
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+ struct schedule_instruction * sinst;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ } else {
+ struct schedule_instruction **prgb;
+ struct schedule_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ struct schedule_instruction * psirgb = *prgb;
+ struct schedule_instruction * psialpha = *palpha;
+
+ if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+ continue;
+
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ rc_insert_instruction(before->Prev, psirgb->Instruction);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ success: ;
+ }
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value * v = get_reg_value(s, file, index, chan);
+
+ if (!v)
+ return;
+
+ if (v->Writer == s->Current) {
+ /* The instruction reads and writes to a register component.
+ * In this case, we only want to increment dependencies by one.
+ */
+ return;
+ }
+
+ DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader->Reader = s->Current;
+ reader->Next = v->Readers;
+ v->Readers = reader;
+ v->NumReaders++;
+
+ s->Current->NumDependencies++;
+
+ if (s->Current->NumReadValues >= 12) {
+ rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ }
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+
+ if (!pv)
+ return;
+
+ DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ memset(newv, 0, sizeof(*newv));
+
+ newv->Writer = s->Current;
+
+ if (*pv) {
+ (*pv)->Next = newv;
+ s->Current->NumDependencies++;
+ }
+
+ *pv = newv;
+
+ if (s->Current->NumWriteValues >= 4) {
+ rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+ }
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+ struct rc_instruction * begin, struct rc_instruction * end)
+{
+ struct schedule_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+
+ /* Scan instructions for data dependencies */
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+ memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+ s.Current->Instruction = inst;
+ inst->IP = ip++;
+
+ DBG("%i: Scanning\n", inst->IP);
+
+ /* The order of things here is subtle and maybe slightly
+ * counter-intuitive, to account for the case where an
+ * instruction writes to the same register as it reads
+ * from. */
+ rc_for_all_writes(inst, &scan_write, &s);
+ rc_for_all_reads(inst, &scan_read, &s);
+
+ DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+ if (!s.Current->NumDependencies)
+ instruction_ready(&s, s.Current);
+ }
+
+ /* Temporarily unlink all instructions */
+ begin->Prev->Next = end;
+ end->Prev = begin->Prev;
+
+ /* Schedule instructions back */
+ while(!s.C->Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s, end);
+
+ while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+ emit_one_alu(&s, end);
+ }
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ return opcode->IsFlowControl;
+ }
+ return 0;
+}
+
+void rc_pair_schedule(struct r300_fragment_program_compiler *c)
+{
+ struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ while(inst != &c->Base.Program.Instructions) {
+ if (is_controlflow(inst)) {
+ inst = inst->Next;
+ continue;
+ }
+
+ struct rc_instruction * first = inst;
+
+ while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+ inst = inst->Next;
+
+ DBG("Schedule one block\n");
+ schedule_block(c, first, inst);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000000..7211768272
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+ struct rc_src_register tmp;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[1].Negate = RC_MASK_NONE;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case RC_OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_MUL:
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+ int * needrgb, int * needalpha, int * istranscendent)
+{
+ *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+ *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+ *istranscendent = 0;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ *needrgb = 1;
+ else if (inst->WriteALUResult == RC_ALURESULT_W)
+ *needalpha = 1;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MAD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MOV:
+ case RC_OPCODE_MUL:
+ break;
+ case RC_OPCODE_COS:
+ case RC_OPCODE_EX2:
+ case RC_OPCODE_LG2:
+ case RC_OPCODE_RCP:
+ case RC_OPCODE_RSQ:
+ case RC_OPCODE_SIN:
+ *istranscendent = 1;
+ *needalpha = 1;
+ break;
+ case RC_OPCODE_DP4:
+ *needalpha = 1;
+ /* fall through */
+ case RC_OPCODE_DP3:
+ *needrgb = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+ struct rc_pair_instruction * pair,
+ struct rc_sub_instruction * inst)
+{
+ memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+ int needrgb, needalpha, istranscendent;
+ classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+ if (needrgb) {
+ if (istranscendent)
+ pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (needalpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ int nargs = opcode->NumSrcRegs;
+ int i;
+
+ /* Special case for DDX/DDY (MDH/MDV). */
+ if (inst->Opcode == RC_OPCODE_DDX || inst->Opcode == RC_OPCODE_DDY) {
+ nargs++;
+ }
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ int source;
+ if (needrgb && !istranscendent) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ }
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+ }
+ if (needalpha) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
+ }
+
+ /* Destination handling */
+ if (inst->DstReg.File == RC_FILE_OUTPUT) {
+ if (inst->DstReg.Index == c->OutputColor) {
+ pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else if (inst->DstReg.Index == c->OutputDepth) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ } else {
+ if (needrgb) {
+ pair->RGB.DestIndex = inst->DstReg.Index;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ }
+ if (needalpha) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+
+ if (inst->WriteALUResult) {
+ pair->WriteALUResult = inst->WriteALUResult;
+ pair->ALUResultCompare = inst->ALUResultCompare;
+ }
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct r300_fragment_program_compiler *c)
+{
+ for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ inst != &c->Base.Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type != RC_INSTRUCTION_NORMAL)
+ continue;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+ continue;
+
+ struct rc_sub_instruction copy = inst->U.I;
+
+ final_rewrite(&copy);
+ inst->Type = RC_INSTRUCTION_PAIR;
+ set_pair_instruction(c, &inst->U.P, &copy);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
index b636f90a96..0dbc5380bb 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -27,9 +27,9 @@
#include "radeon_program.h"
+#include <stdio.h>
+
#include "radeon_compiler.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
/**
@@ -69,37 +69,57 @@ void radeonLocalTransform(
}
}
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
-GLint rc_find_free_temporary(struct radeon_compiler * c)
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
{
- GLboolean used[MAX_PROGRAM_TEMPS];
- GLuint i;
+ char used[RC_REGISTER_MAX_INDEX];
+ unsigned int i;
memset(used, 0, sizeof(used));
for (struct rc_instruction * rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
- const struct prog_instruction *inst = &rcinst->I;
- const GLuint nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- const GLuint ndst = _mesa_num_inst_dst_regs(inst->Opcode);
- GLuint k;
-
- for (k = 0; k < nsrc; k++) {
- if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
- used[inst->SrcReg[k].Index] = GL_TRUE;
+ const struct rc_sub_instruction *inst = &rcinst->U.I;
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int k;
+
+ for (k = 0; k < opcode->NumSrcRegs; k++) {
+ if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
+ used[inst->SrcReg[k].Index] = 1;
}
- if (ndst) {
- if (inst->DstReg.File == PROGRAM_TEMPORARY)
- used[inst->DstReg.Index] = GL_TRUE;
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.File == RC_FILE_TEMPORARY)
+ used[inst->DstReg.Index] = 1;
}
}
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
if (!used[i])
return i;
}
- return -1;
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
}
@@ -107,24 +127,31 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
{
struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
- inst->Prev = 0;
- inst->Next = 0;
+ memset(inst, 0, sizeof(struct rc_instruction));
- _mesa_init_instructions(&inst->I, 1);
+ inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
return inst;
}
-
-struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
{
- struct rc_instruction * inst = rc_alloc_instruction(c);
-
inst->Prev = after;
inst->Next = after->Next;
inst->Prev->Next = inst;
inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ rc_insert_instruction(after, inst);
return inst;
}
@@ -135,76 +162,20 @@ void rc_remove_instruction(struct rc_instruction * inst)
inst->Next->Prev = inst->Prev;
}
-
-void rc_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
-{
- struct prog_instruction *source;
- unsigned int i;
-
- for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
- struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
- dest->I = *source;
- }
-
- c->Program.ShadowSamplers = program->ShadowSamplers;
- c->Program.InputsRead = program->InputsRead;
- c->Program.OutputsWritten = program->OutputsWritten;
-
- int isNVProgram = 0;
-
- if (program->Target == GL_VERTEX_PROGRAM_ARB) {
- struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
- isNVProgram = vp->IsNVProgram;
- }
-
- if (isNVProgram) {
- /* NV_vertex_program has a fixed-sized constant environment.
- * This could be handled more efficiently for programs that
- * do not use relative addressing.
- */
- for(i = 0; i < 96; ++i) {
- struct rc_constant constant;
-
- constant.Type = RC_CONSTANT_EXTERNAL;
- constant.Size = 4;
- constant.u.External = i;
-
- rc_constants_add(&c->Program.Constants, &constant);
- }
- } else {
- for(i = 0; i < program->Parameters->NumParameters; ++i) {
- struct rc_constant constant;
-
- constant.Type = RC_CONSTANT_EXTERNAL;
- constant.Size = 4;
- constant.u.External = i;
-
- rc_constants_add(&c->Program.Constants, &constant);
- }
- }
-}
-
-
/**
- * Print program to stderr, default options.
+ * Return the number of instructions in the program.
*/
-void rc_print_program(const struct rc_program *prog)
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
{
- GLuint indent = 0;
- GLuint linenum = 1;
- struct rc_instruction *inst;
-
- fprintf(stderr, "# Radeon Compiler Program\n");
+ unsigned int ip = 0;
- for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
- fprintf(stderr, "%3d: ", linenum);
+ for(struct rc_instruction * inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ inst->IP = ip++;
+ }
- /* Massive hack: We rely on the fact that the printers do not actually
- * use the gl_program argument (last argument) in debug mode */
- indent = _mesa_fprint_instruction_opt(
- stderr, &inst->I,
- indent, PROG_PRINT_DEBUG, 0);
+ c->Program.Instructions.IP = 0xcafedead;
- linenum++;
- }
+ return ip;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 561958608c..33db3ea0ff 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -28,37 +28,144 @@
#ifndef __RADEON_PROGRAM_H_
#define __RADEON_PROGRAM_H_
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-#include "shader/prog_instruction.h"
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
struct radeon_compiler;
-struct rc_instruction;
-struct rc_program;
-enum {
- PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
+struct rc_src_register {
+ rc_register_file File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int Swizzle:12;
+
+ /** Take the component-wise absolute value */
+ unsigned int Abs:1;
+
+ /** Post-Abs negation. */
+ unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+ rc_register_file File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int WriteMask:4;
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+ struct rc_src_register SrcReg[3];
+ struct rc_dst_register DstReg;
+
+ /**
+ * Opcode of this instruction, according to \ref rc_opcode enums.
+ */
+ rc_opcode Opcode:8;
+
+ /**
+ * Saturate each value of the result to the range [0,1] or [-1,1],
+ * according to \ref rc_saturate_mode enums.
+ */
+ rc_saturate_mode SaturateMode:2;
+
+ /**
+ * Writing to the special register RC_SPECIAL_ALU_RESULT
+ */
+ /*@{*/
+ rc_write_aluresult WriteALUResult:2;
+ rc_compare_func ALUResultCompare:3;
+ /*@}*/
+
+ /**
+ * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+ */
+ /*@{*/
+ /** Source texture unit. */
+ unsigned int TexSrcUnit:5;
+
+ /** Source texture target, one of the \ref rc_texture_target enums */
+ rc_texture_target TexSrcTarget:3;
+
+ /** True if tex instruction should do shadow comparison */
+ unsigned int TexShadow:1;
+ /*@}*/
+};
+
+typedef enum {
+ RC_INSTRUCTION_NORMAL = 0,
+ RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+
+ rc_instruction_type Type;
+ union {
+ struct rc_sub_instruction I;
+ struct rc_pair_instruction P;
+ } U;
+
+ /**
+ * Warning: IPs are not stable. If you want to use them,
+ * you need to recompute them at the beginning of each pass
+ * using \ref rc_recompute_ips
+ */
+ unsigned int IP;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
};
enum {
- OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
+ OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */
};
-#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
-#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
-static inline GLuint get_swz(GLuint swz, GLuint idx)
+static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
{
if (idx & 0x4)
return idx;
return GET_SWZ(swz, idx);
}
-static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, GLuint swz_z, GLuint swz_w)
+static inline unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
{
- GLuint ret = 0;
+ unsigned int ret = 0;
ret |= get_swz(src, swz_x);
ret |= get_swz(src, swz_y) << 3;
@@ -68,22 +175,24 @@ static inline GLuint combine_swizzles4(GLuint src, GLuint swz_x, GLuint swz_y, G
return ret;
}
-static inline GLuint combine_swizzles(GLuint src, GLuint swz)
+static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
{
- GLuint ret = 0;
+ unsigned int ret = 0;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_X));
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Y)) << 3;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_Z)) << 6;
- ret |= get_swz(src, GET_SWZ(swz, SWIZZLE_W)) << 9;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
return ret;
}
-static INLINE void reset_srcreg(struct prog_src_register* reg)
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+static inline void reset_srcreg(struct rc_src_register* reg)
{
- _mesa_bzero(reg, sizeof(*reg));
- reg->Swizzle = SWIZZLE_NOOP;
+ memset(reg, 0, sizeof(reg));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
}
@@ -92,13 +201,13 @@ static INLINE void reset_srcreg(struct prog_src_register* reg)
*
* The function will be called once for each instruction.
* It has to either emit the appropriate transformed code for the instruction
- * and return GL_TRUE, or return GL_FALSE if it doesn't understand the
+ * and return true, or return false if it doesn't understand the
* instruction.
*
* The function gets passed the userData as last parameter.
*/
struct radeon_program_transformation {
- GLboolean (*function)(
+ int (*function)(
struct radeon_compiler*,
struct rc_instruction*,
void*);
@@ -110,12 +219,15 @@ void radeonLocalTransform(
int num_transformations,
struct radeon_program_transformation* transformations);
-GLint rc_find_free_temporary(struct radeon_compiler * c);
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
void rc_remove_instruction(struct rc_instruction * inst);
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
void rc_print_program(const struct rc_program *prog);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index f23ce301ca..0326d25233 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -40,175 +40,164 @@
static struct rc_instruction *emit1(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg;
return fpi;
}
static struct rc_instruction *emit2(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg0;
- fpi->I.SrcReg[1] = SrcReg1;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
return fpi;
}
static struct rc_instruction *emit3(
struct radeon_compiler * c, struct rc_instruction * after,
- gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
- struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
- struct prog_src_register SrcReg2)
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+ struct rc_src_register SrcReg2)
{
struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
- fpi->I.Opcode = Opcode;
- fpi->I.SaturateMode = Saturate;
- fpi->I.DstReg = DstReg;
- fpi->I.SrcReg[0] = SrcReg0;
- fpi->I.SrcReg[1] = SrcReg1;
- fpi->I.SrcReg[2] = SrcReg2;
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ fpi->U.I.SrcReg[2] = SrcReg2;
return fpi;
}
-static struct prog_dst_register dstreg(int file, int index)
+static struct rc_dst_register dstreg(int file, int index)
{
- struct prog_dst_register dst;
+ struct rc_dst_register dst;
dst.File = file;
dst.Index = index;
- dst.WriteMask = WRITEMASK_XYZW;
- dst.CondMask = COND_TR;
+ dst.WriteMask = RC_MASK_XYZW;
dst.RelAddr = 0;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
return dst;
}
-static struct prog_dst_register dstregtmpmask(int index, int mask)
+static struct rc_dst_register dstregtmpmask(int index, int mask)
{
- struct prog_dst_register dst = {0};
- dst.File = PROGRAM_TEMPORARY;
+ struct rc_dst_register dst = {0};
+ dst.File = RC_FILE_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
dst.RelAddr = 0;
- dst.CondMask = COND_TR;
- dst.CondSwizzle = SWIZZLE_NOOP;
- dst.CondSrc = 0;
- dst.pad = 0;
return dst;
}
-static const struct prog_src_register builtin_zero = {
- .File = PROGRAM_BUILTIN,
+static const struct rc_src_register builtin_zero = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_0000
+ .Swizzle = RC_SWIZZLE_0000
};
-static const struct prog_src_register builtin_one = {
- .File = PROGRAM_BUILTIN,
+static const struct rc_src_register builtin_one = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_1111
+ .Swizzle = RC_SWIZZLE_1111
};
-static const struct prog_src_register srcreg_undefined = {
- .File = PROGRAM_UNDEFINED,
+static const struct rc_src_register srcreg_undefined = {
+ .File = RC_FILE_NONE,
.Index = 0,
- .Swizzle = SWIZZLE_NOOP
+ .Swizzle = RC_SWIZZLE_XYZW
};
-static struct prog_src_register srcreg(int file, int index)
+static struct rc_src_register srcreg(int file, int index)
{
- struct prog_src_register src = srcreg_undefined;
+ struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
return src;
}
-static struct prog_src_register srcregswz(int file, int index, int swz)
+static struct rc_src_register srcregswz(int file, int index, int swz)
{
- struct prog_src_register src = srcreg_undefined;
+ struct rc_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
src.Swizzle = swz;
return src;
}
-static struct prog_src_register absolute(struct prog_src_register reg)
+static struct rc_src_register absolute(struct rc_src_register reg)
{
- struct prog_src_register newreg = reg;
+ struct rc_src_register newreg = reg;
newreg.Abs = 1;
- newreg.Negate = NEGATE_NONE;
+ newreg.Negate = RC_MASK_NONE;
return newreg;
}
-static struct prog_src_register negate(struct prog_src_register reg)
+static struct rc_src_register negate(struct rc_src_register reg)
{
- struct prog_src_register newreg = reg;
- newreg.Negate = newreg.Negate ^ NEGATE_XYZW;
+ struct rc_src_register newreg = reg;
+ newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
return newreg;
}
-static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
+static struct rc_src_register swizzle(struct rc_src_register reg,
+ rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
{
- struct prog_src_register swizzled = reg;
- swizzled.Swizzle = MAKE_SWIZZLE4(
- x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
- y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
- z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
- w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
+ struct rc_src_register swizzled = reg;
+ swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
return swizzled;
}
-static struct prog_src_register scalar(struct prog_src_register reg)
+static struct rc_src_register scalar(struct rc_src_register reg)
{
- return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+ return swizzle(reg, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X);
}
static void transform_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src = inst->I.SrcReg[0];
+ struct rc_src_register src = inst->U.I.SrcReg[0];
src.Abs = 1;
- src.Negate = NEGATE_NONE;
- emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode, inst->I.DstReg, src);
+ src.Negate = RC_MASK_NONE;
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
rc_remove_instruction(inst);
}
static void transform_DP3(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->I.SrcReg[0];
- struct prog_src_register src1 = inst->I.SrcReg[1];
- src0.Negate &= ~NEGATE_W;
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= SWIZZLE_ZERO << (3 * 3);
- src1.Negate &= ~NEGATE_W;
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
src1.Swizzle &= ~(7 << (3 * 3));
- src1.Swizzle |= SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, src1);
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
static void transform_DPH(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- struct prog_src_register src0 = inst->I.SrcReg[0];
- src0.Negate &= ~NEGATE_W;
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ src0.Negate &= ~RC_MASK_W;
src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= SWIZZLE_ONE << (3 * 3);
- emit2(c, inst->Prev, OPCODE_DP4, inst->I.SaturateMode, inst->I.DstReg, src0, inst->I.SrcReg[1]);
+ src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
rc_remove_instruction(inst);
}
@@ -219,9 +208,9 @@ static void transform_DPH(struct radeon_compiler* c,
static void transform_DST(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- emit2(c, inst->Prev, OPCODE_MUL, inst->I.SaturateMode, inst->I.DstReg,
- swizzle(inst->I.SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
- swizzle(inst->I.SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
rc_remove_instruction(inst);
}
@@ -229,9 +218,9 @@ static void transform_FLR(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = rc_find_free_temporary(c);
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0]);
- emit2(c, inst->Prev, OPCODE_ADD, inst->I.SaturateMode, inst->I.DstReg,
- inst->I.SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
@@ -256,64 +245,64 @@ static void transform_FLR(struct radeon_compiler* c,
static void transform_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- GLuint constant;
- GLuint constant_swizzle;
- GLuint temp;
- struct prog_src_register srctemp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
+ unsigned int temp;
+ struct rc_src_register srctemp;
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
- if (inst->I.DstReg.WriteMask != WRITEMASK_XYZW || inst->I.DstReg.File != PROGRAM_TEMPORARY) {
+ if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
struct rc_instruction * inst_mov;
inst_mov = emit1(c, inst,
- OPCODE_MOV, 0, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, rc_find_free_temporary(c)));
+ RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
- inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
}
- temp = inst->I.DstReg.Index;
- srctemp = srcreg(PROGRAM_TEMPORARY, temp);
+ temp = inst->U.I.DstReg.Index;
+ srctemp = srcreg(RC_FILE_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
- emit2(c, inst->Prev, OPCODE_MAX, 0,
- dstregtmpmask(temp, WRITEMASK_XYW),
- inst->I.SrcReg[0],
- swizzle(srcreg(PROGRAM_CONSTANT, constant),
- SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
- emit2(c, inst->Prev, OPCODE_MIN, 0,
- dstregtmpmask(temp, WRITEMASK_Z),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ inst->U.I.SrcReg[0],
+ swizzle(srcreg(RC_FILE_CONSTANT, constant),
+ RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+ dstregtmpmask(temp, RC_MASK_Z),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
- emit1(c, inst->Prev, OPCODE_LG2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit2(c, inst->Prev, OPCODE_MUL, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
- emit1(c, inst->Prev, OPCODE_EX2, 0,
- dstregtmpmask(temp, WRITEMASK_W),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srctemp, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode,
- dstregtmpmask(temp, WRITEMASK_Z),
- negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_Z),
+ negate(swizzle(srctemp, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ swizzle(srctemp, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
- emit1(c, inst->Prev, OPCODE_MOV, inst->I.SaturateMode,
- dstregtmpmask(temp, WRITEMASK_XYW),
- swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
rc_remove_instruction(inst);
}
@@ -323,12 +312,12 @@ static void transform_LRP(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0,
- dstreg(PROGRAM_TEMPORARY, tempreg),
- inst->I.SrcReg[1], negate(inst->I.SrcReg[2]));
- emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode,
- inst->I.DstReg,
- inst->I.SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[2]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
rc_remove_instruction(inst);
}
@@ -337,14 +326,14 @@ static void transform_POW(struct radeon_compiler* c,
struct rc_instruction* inst)
{
int tempreg = rc_find_free_temporary(c);
- struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
- struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
- tempdst.WriteMask = WRITEMASK_W;
- tempsrc.Swizzle = SWIZZLE_WWWW;
+ struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ tempdst.WriteMask = RC_MASK_W;
+ tempsrc.Swizzle = RC_SWIZZLE_WWWW;
- emit1(c, inst->Prev, OPCODE_LG2, 0, tempdst, scalar(inst->I.SrcReg[0]));
- emit2(c, inst->Prev, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->I.SrcReg[1]));
- emit1(c, inst->Prev, OPCODE_EX2, inst->I.SaturateMode, inst->I.DstReg, tempsrc);
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, scalar(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->U.I.SrcReg[1]));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
rc_remove_instruction(inst);
}
@@ -352,7 +341,26 @@ static void transform_POW(struct radeon_compiler* c,
static void transform_RSQ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.SrcReg[0] = absolute(inst->I.SrcReg[0]);
+ inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ rc_remove_instruction(inst);
}
static void transform_SGE(struct radeon_compiler* c,
@@ -360,9 +368,33 @@ static void transform_SGE(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
rc_remove_instruction(inst);
}
@@ -372,9 +404,21 @@ static void transform_SLT(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->I.SrcReg[0], negate(inst->I.SrcReg[1]));
- emit3(c, inst->Prev, OPCODE_CMP, inst->I.SaturateMode, inst->I.DstReg,
- srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
rc_remove_instruction(inst);
}
@@ -382,14 +426,14 @@ static void transform_SLT(struct radeon_compiler* c,
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.Opcode = OPCODE_ADD;
- inst->I.SrcReg[1] = negate(inst->I.SrcReg[1]);
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
}
static void transform_SWZ(struct radeon_compiler* c,
struct rc_instruction* inst)
{
- inst->I.Opcode = OPCODE_MOV;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
}
static void transform_XPD(struct radeon_compiler* c,
@@ -397,13 +441,13 @@ static void transform_XPD(struct radeon_compiler* c,
{
int tempreg = rc_find_free_temporary(c);
- emit2(c, inst->Prev, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(inst->I.SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- swizzle(inst->I.SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, inst->I.SaturateMode, inst->I.DstReg,
- swizzle(inst->I.SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
- swizzle(inst->I.SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
- negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
rc_remove_instruction(inst);
}
@@ -414,7 +458,7 @@ static void transform_XPD(struct radeon_compiler* c,
* no userData necessary.
*
* Eliminates the following ALU instructions:
- * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
+ * ABS, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
@@ -423,27 +467,32 @@ static void transform_XPD(struct radeon_compiler* c,
*
* @note should be applicable to R300 and R500 fragment programs.
*/
-GLboolean radeonTransformALU(
+int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
- switch(inst->I.Opcode) {
- case OPCODE_ABS: transform_ABS(c, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
- case OPCODE_DST: transform_DST(c, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
- case OPCODE_LIT: transform_LIT(c, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
- case OPCODE_POW: transform_POW(c, inst); return GL_TRUE;
- case OPCODE_RSQ: transform_RSQ(c, inst); return GL_TRUE;
- case OPCODE_SGE: transform_SGE(c, inst); return GL_TRUE;
- case OPCODE_SLT: transform_SLT(c, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+ case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+ case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+ case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
- return GL_FALSE;
+ return 0;
}
}
@@ -452,37 +501,37 @@ static void transform_r300_vertex_ABS(struct radeon_compiler* c,
struct rc_instruction* inst)
{
/* Note: r500 can take absolute values, but r300 cannot. */
- inst->I.Opcode = OPCODE_MAX;
- inst->I.SrcReg[1] = inst->I.SrcReg[0];
- inst->I.SrcReg[1].Negate ^= NEGATE_XYZW;
+ inst->U.I.Opcode = RC_OPCODE_MAX;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
*/
-GLboolean r300_transform_vertex_alu(
+int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction* inst,
void* unused)
{
- switch(inst->I.Opcode) {
- case OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return GL_TRUE;
- case OPCODE_DP3: transform_DP3(c, inst); return GL_TRUE;
- case OPCODE_DPH: transform_DPH(c, inst); return GL_TRUE;
- case OPCODE_FLR: transform_FLR(c, inst); return GL_TRUE;
- case OPCODE_LRP: transform_LRP(c, inst); return GL_TRUE;
- case OPCODE_SUB: transform_SUB(c, inst); return GL_TRUE;
- case OPCODE_SWZ: transform_SWZ(c, inst); return GL_TRUE;
- case OPCODE_XPD: transform_XPD(c, inst); return GL_TRUE;
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
default:
- return GL_FALSE;
+ return 0;
}
}
-static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
{
- static const GLfloat SinCosConsts[2][4] = {
+ static const float SinCosConsts[2][4] = {
{
1.273239545, // 4/PI
-0.405284735, // -4/(PI*PI)
@@ -512,25 +561,25 @@ static void sincos_constants(struct radeon_compiler* c, GLuint *constants)
*/
static void sin_approx(
struct radeon_compiler* c, struct rc_instruction * before,
- struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
-{
- GLuint tempreg = rc_find_free_temporary(c);
-
- emit2(c, before->Prev, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcreg(PROGRAM_CONSTANT, constants[0]));
- emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit3(c, before->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
- negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
- emit3(c, before->Prev, OPCODE_MAD, 0, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
+ struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, before, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ srcreg(RC_FILE_CONSTANT, constants[0]));
+ emit3(c, before, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
+ absolute(swizzle(src, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ emit3(c, before, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ absolute(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)),
+ negate(swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X)));
+ emit3(c, before, RC_OPCODE_MAD, 0, dst,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
}
/**
@@ -538,81 +587,81 @@ static void sin_approx(
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
-GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
+int radeonTransformTrigSimple(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_COS &&
- inst->I.Opcode != OPCODE_SIN &&
- inst->I.Opcode != OPCODE_SCS)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
- GLuint constants[2];
- GLuint tempreg = rc_find_free_temporary(c);
+ unsigned int constants[2];
+ unsigned int tempreg = rc_find_free_temporary(c);
sincos_constants(c, constants);
- if (inst->I.Opcode == OPCODE_COS) {
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(c, inst, inst->I.DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
constants);
- } else if (inst->I.Opcode == OPCODE_SIN) {
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- sin_approx(c, inst, inst->I.DstReg,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
constants);
} else {
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg));
- emit3(c, inst->Prev, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
- srcreg(PROGRAM_TEMPORARY, tempreg),
- swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
- negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
-
- struct prog_dst_register dst = inst->I.DstReg;
-
- dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_X;
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W, RC_SWIZZLE_W),
+ negate(swizzle(srcreg(RC_FILE_CONSTANT, constants[0]), RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z, RC_SWIZZLE_Z)));
+
+ struct rc_dst_register dst = inst->U.I.DstReg;
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
sin_approx(c, inst, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
constants);
- dst.WriteMask = inst->I.DstReg.WriteMask & WRITEMASK_Y;
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
sin_approx(c, inst, dst,
- swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ swizzle(srcreg(RC_FILE_TEMPORARY, tempreg), RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y, RC_SWIZZLE_Y),
constants);
}
rc_remove_instruction(inst);
- return GL_TRUE;
+ return 1;
}
@@ -624,53 +673,53 @@ GLboolean radeonTransformTrigSimple(struct radeon_compiler* c,
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
-GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
+int radeonTransformTrigScale(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_COS &&
- inst->I.Opcode != OPCODE_SIN &&
- inst->I.Opcode != OPCODE_SCS)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
- static const GLfloat RCP_2PI = 0.15915494309189535;
- GLuint temp;
- GLuint constant;
- GLuint constant_swizzle;
+ static const float RCP_2PI = 0.15915494309189535;
+ unsigned int temp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
temp = rc_find_free_temporary(c);
constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
- emit2(c, inst->Prev, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
- swizzle(inst->I.SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
- srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
- emit1(c, inst->Prev, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
- srcreg(PROGRAM_TEMPORARY, temp));
-
- if (inst->I.Opcode == OPCODE_COS) {
- emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, inst->I.DstReg,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->I.Opcode == OPCODE_SIN) {
- emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode,
- inst->I.DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
- } else if (inst->I.Opcode == OPCODE_SCS) {
- struct prog_dst_register moddst = inst->I.DstReg;
-
- if (inst->I.DstReg.WriteMask & WRITEMASK_X) {
- moddst.WriteMask = WRITEMASK_X;
- emit1(c, inst->Prev, OPCODE_COS, inst->I.SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X, RC_SWIZZLE_X),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+ struct rc_dst_register moddst = inst->U.I.DstReg;
+
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+ moddst.WriteMask = RC_MASK_X;
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
}
- if (inst->I.DstReg.WriteMask & WRITEMASK_Y) {
- moddst.WriteMask = WRITEMASK_Y;
- emit1(c, inst->Prev, OPCODE_SIN, inst->I.SaturateMode, moddst,
- srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+ moddst.WriteMask = RC_MASK_Y;
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
}
}
rc_remove_instruction(inst);
- return GL_TRUE;
+ return 1;
}
/**
@@ -681,15 +730,15 @@ GLboolean radeonTransformTrigScale(struct radeon_compiler* c,
* @warning This explicitly changes the form of DDX and DDY!
*/
-GLboolean radeonTransformDeriv(struct radeon_compiler* c,
+int radeonTransformDeriv(struct radeon_compiler* c,
struct rc_instruction* inst,
void* unused)
{
- if (inst->I.Opcode != OPCODE_DDX && inst->I.Opcode != OPCODE_DDY)
- return GL_FALSE;
+ if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+ return 0;
- inst->I.SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE);
- inst->I.SrcReg[1].Negate = NEGATE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE);
+ inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
- return GL_TRUE;
+ return 1;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index 147efec6fc..7cb5f84b7f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -30,27 +30,27 @@
#include "radeon_program.h"
-GLboolean radeonTransformALU(
+int radeonTransformALU(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean r300_transform_vertex_alu(
+int r300_transform_vertex_alu(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformTrigSimple(
+int radeonTransformTrigSimple(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformTrigScale(
+int radeonTransformTrigScale(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
-GLboolean radeonTransformDeriv(
+int radeonTransformDeriv(
struct radeon_compiler * c,
struct rc_instruction * inst,
void*);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000000..7c0d6720b1
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT,
+
+ /**
+ * Indicates a special register, see RC_SPECIAL_xxx.
+ */
+ RC_FILE_SPECIAL
+} rc_register_file;
+
+enum {
+ /** R500 fragment program ALU result "register" */
+ RC_SPECIAL_ALU_RESULT = 0,
+
+ /** Must be last */
+ RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+ RC_ALURESULT_NONE = 0,
+ RC_ALURESULT_X,
+ RC_ALURESULT_W
+} rc_write_aluresult;
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
index 4c26db5d24..ee839596aa 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
*
* All Rights Reserved.
*
@@ -25,584 +25,29 @@
*
*/
-/**
- * @file
- *
- * Perform temporary register allocation and attempt to pair off instructions
- * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction
- * vs. ALU instruction scheduling.
- */
-
#include "radeon_program_pair.h"
-#include "memory_pool.h"
-#include "radeon_compiler.h"
-#include "shader/prog_print.h"
-
-#define error(fmt, args...) do { \
- rc_error(&s->Compiler->Base, "%s::%s(): " fmt "\n", \
- __FILE__, __FUNCTION__, ##args); \
-} while(0)
-
-struct pair_state_instruction {
- struct prog_instruction Instruction;
- GLuint IP; /**< Position of this instruction in original program */
-
- GLuint IsTex:1; /**< Is a texture instruction */
- GLuint NeedRGB:1; /**< Needs the RGB ALU */
- GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
- GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
-
- /**
- * Number of (read and write) dependencies that must be resolved before
- * this instruction can be scheduled.
- */
- GLuint NumDependencies:5;
-
- /**
- * Next instruction in the linked list of ready instructions.
- */
- struct pair_state_instruction *NextReady;
-
- /**
- * Values that this instruction writes
- */
- struct reg_value *Values[4];
-};
-
-
-/**
- * Used to keep track of which instructions read a value.
- */
-struct reg_value_reader {
- struct pair_state_instruction *Reader;
- struct reg_value_reader *Next;
-};
-
-/**
- * Used to keep track which values are stored in each component of a
- * PROGRAM_TEMPORARY.
- */
-struct reg_value {
- struct pair_state_instruction *Writer;
- struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */
-
- /**
- * Unordered linked list of instructions that read from this value.
- */
- struct reg_value_reader *Readers;
-
- /**
- * Number of readers of this value. This is calculated during @ref scan_instructions
- * and continually decremented during code emission.
- * When this count reaches zero, the instruction that writes the @ref Next value
- * can be scheduled.
- */
- GLuint NumReaders;
-};
-
-/**
- * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register
- * to the proper hardware temporary.
- */
-struct pair_register_translation {
- GLuint Allocated:1;
- GLuint HwIndex:8;
- GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */
-
- /**
- * Notes the value that is currently contained in each component
- * (only used for PROGRAM_TEMPORARY registers).
- */
- struct reg_value *Value[4];
-};
-
-struct pair_state {
- struct r300_fragment_program_compiler * Compiler;
- const struct radeon_pair_handler *Handler;
- GLboolean Verbose;
- void *UserData;
-
- /**
- * Translate Mesa registers to hardware registers
- */
- struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];
- struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];
-
- struct {
- GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */
- } HwTemps[128];
-
- /**
- * Linked list of instructions that can be scheduled right now,
- * based on which ALU/TEX resources they require.
- */
- struct pair_state_instruction *ReadyFullALU;
- struct pair_state_instruction *ReadyRGB;
- struct pair_state_instruction *ReadyAlpha;
- struct pair_state_instruction *ReadyTEX;
-};
-
-
-static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index)
-{
- switch(file) {
- case PROGRAM_TEMPORARY: return &s->Temps[index];
- case PROGRAM_INPUT: return &s->Inputs[index];
- default: return 0;
- }
-}
-
-static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex)
-{
- struct pair_register_translation *t = get_register(s, file, index);
- ASSERT(!s->HwTemps[hwindex].RefCount);
- ASSERT(!t->Allocated);
- s->HwTemps[hwindex].RefCount = t->RefCount;
- t->Allocated = 1;
- t->HwIndex = hwindex;
-}
-
-static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index)
-{
- GLuint hwindex;
-
- struct pair_register_translation *t = get_register(s, file, index);
- if (!t) {
- error("get_hw_reg: %i[%i]\n", file, index);
- return 0;
- }
-
- if (t->Allocated)
- return t->HwIndex;
-
- for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)
- if (!s->HwTemps[hwindex].RefCount)
- break;
-
- if (hwindex >= s->Handler->MaxHwTemps) {
- error("Ran out of hardware temporaries");
- return 0;
- }
-
- alloc_hw_reg(s, file, index, hwindex);
- return hwindex;
-}
-
-
-static void deref_hw_reg(struct pair_state *s, GLuint hwindex)
-{
- if (!s->HwTemps[hwindex].RefCount) {
- error("Hwindex %i refcount error", hwindex);
- return;
- }
-
- s->HwTemps[hwindex].RefCount--;
-}
-
-static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst)
-{
- pairinst->NextReady = *list;
- *list = pairinst;
-}
-
-/**
- * The given instruction has become ready. Link it into the ready
- * instructions.
- */
-static void instruction_ready(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- if (s->Verbose)
- _mesa_printf("instruction_ready(%i)\n", pairinst->IP);
-
- if (pairinst->IsTex)
- add_pairinst_to_list(&s->ReadyTEX, pairinst);
- else if (!pairinst->NeedAlpha)
- add_pairinst_to_list(&s->ReadyRGB, pairinst);
- else if (!pairinst->NeedRGB)
- add_pairinst_to_list(&s->ReadyAlpha, pairinst);
- else
- add_pairinst_to_list(&s->ReadyFullALU, pairinst);
-}
-
-
-/**
- * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
- * and reverse the order of arguments for CMP.
- */
-static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
-{
- struct prog_src_register tmp;
-
- switch(inst->Opcode) {
- case OPCODE_ADD:
- inst->SrcReg[2] = inst->SrcReg[1];
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[1].Negate = NEGATE_NONE;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_CMP:
- tmp = inst->SrcReg[2];
- inst->SrcReg[2] = inst->SrcReg[0];
- inst->SrcReg[0] = tmp;
- break;
- case OPCODE_MOV:
- /* AMD say we should use CMP.
- * However, when we transform
- * KIL -r0;
- * into
- * CMP tmp, -r0, -r0, 0;
- * KIL tmp;
- * we get incorrect behaviour on R500 when r0 == 0.0.
- * It appears that the R500 KIL hardware treats -0.0 as less
- * than zero.
- */
- inst->SrcReg[1].File = PROGRAM_BUILTIN;
- inst->SrcReg[1].Swizzle = SWIZZLE_1111;
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- case OPCODE_MUL:
- inst->SrcReg[2].File = PROGRAM_BUILTIN;
- inst->SrcReg[2].Swizzle = SWIZZLE_0000;
- inst->Opcode = OPCODE_MAD;
- break;
- default:
- /* nothing to do */
- break;
- }
-}
-
-
-/**
- * Classify an instruction according to which ALUs etc. it needs
- */
-static void classify_instruction(struct pair_state *s,
- struct pair_state_instruction *psi)
-{
- psi->NeedRGB = (psi->Instruction.DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;
- psi->NeedAlpha = (psi->Instruction.DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;
-
- switch(psi->Instruction.Opcode) {
- case OPCODE_ADD:
- case OPCODE_CMP:
- case OPCODE_DDX:
- case OPCODE_DDY:
- case OPCODE_FRC:
- case OPCODE_MAD:
- case OPCODE_MAX:
- case OPCODE_MIN:
- case OPCODE_MOV:
- case OPCODE_MUL:
- break;
- case OPCODE_COS:
- case OPCODE_EX2:
- case OPCODE_LG2:
- case OPCODE_RCP:
- case OPCODE_RSQ:
- case OPCODE_SIN:
- psi->IsTranscendent = 1;
- psi->NeedAlpha = 1;
- break;
- case OPCODE_DP4:
- psi->NeedAlpha = 1;
- /* fall through */
- case OPCODE_DP3:
- psi->NeedRGB = 1;
- break;
- case OPCODE_KIL:
- case OPCODE_TEX:
- case OPCODE_TXB:
- case OPCODE_TXP:
- case OPCODE_END:
- psi->IsTex = 1;
- break;
- default:
- error("Unknown opcode %d\n", psi->Instruction.Opcode);
- break;
- }
-}
-
-
-/**
- * Count which (input, temporary) register is read and written how often,
- * and scan the instruction stream to find dependencies.
- */
-static void scan_instructions(struct pair_state *s)
-{
- struct rc_instruction *source;
- GLuint ip;
-
- for(source = s->Compiler->Base.Program.Instructions.Next, ip = 0;
- source != &s->Compiler->Base.Program.Instructions;
- source = source->Next, ++ip) {
- struct pair_state_instruction *pairinst = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*pairinst));
- memset(pairinst, 0, sizeof(struct pair_state_instruction));
-
- pairinst->Instruction = source->I;
- pairinst->IP = ip;
- final_rewrite(s, &pairinst->Instruction);
- classify_instruction(s, pairinst);
-
- int nsrc = _mesa_num_inst_src_regs(pairinst->Instruction.Opcode);
- int j;
- for(j = 0; j < nsrc; j++) {
- struct pair_register_translation *t =
- get_register(s, pairinst->Instruction.SrcReg[j].File, pairinst->Instruction.SrcReg[j].Index);
- if (!t)
- continue;
-
- t->RefCount++;
-
- if (pairinst->Instruction.SrcReg[j].File == PROGRAM_TEMPORARY) {
- int i;
- for(i = 0; i < 4; ++i) {
- GLuint swz = GET_SWZ(pairinst->Instruction.SrcReg[j].Swizzle, i);
- if (swz >= 4)
- continue; /* constant or NIL swizzle */
- if (!t->Value[swz])
- continue; /* this is an undefined read */
-
- /* Do not add a dependency if this instruction
- * also rewrites the value. The code below adds
- * a dependency for the DstReg, which is a superset
- * of the SrcReg dependency. */
- if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY &&
- pairinst->Instruction.DstReg.Index == pairinst->Instruction.SrcReg[j].Index &&
- GET_BIT(pairinst->Instruction.DstReg.WriteMask, swz))
- continue;
-
- struct reg_value_reader* r = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*r));
- pairinst->NumDependencies++;
- t->Value[swz]->NumReaders++;
- r->Reader = pairinst;
- r->Next = t->Value[swz]->Readers;
- t->Value[swz]->Readers = r;
- }
- }
- }
-
- int ndst = _mesa_num_inst_dst_regs(pairinst->Instruction.Opcode);
- if (ndst) {
- struct pair_register_translation *t =
- get_register(s, pairinst->Instruction.DstReg.File, pairinst->Instruction.DstReg.Index);
- if (t) {
- t->RefCount++;
-
- if (pairinst->Instruction.DstReg.File == PROGRAM_TEMPORARY) {
- int j;
- for(j = 0; j < 4; ++j) {
- if (!GET_BIT(pairinst->Instruction.DstReg.WriteMask, j))
- continue;
-
- struct reg_value* v = memory_pool_malloc(&s->Compiler->Base.Pool, sizeof(*v));
- memset(v, 0, sizeof(struct reg_value));
- v->Writer = pairinst;
- if (t->Value[j]) {
- pairinst->NumDependencies++;
- t->Value[j]->Next = v;
- }
- t->Value[j] = v;
- pairinst->Values[j] = v;
- }
- }
- }
- }
-
- if (s->Verbose)
- _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);
-
- if (!pairinst->NumDependencies)
- instruction_ready(s, pairinst);
- }
-
- /* Clear the PROGRAM_TEMPORARY state */
- int i, j;
- for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {
- for(j = 0; j < 4; ++j)
- s->Temps[i].Value[j] = 0;
- }
-}
-
-
-static void decrement_dependencies(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- ASSERT(pairinst->NumDependencies > 0);
- if (!--pairinst->NumDependencies)
- instruction_ready(s, pairinst);
-}
/**
- * Update the dependency tracking state based on what the instruction
- * at the given IP does.
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
*/
-static void commit_instruction(struct pair_state *s, struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- if (s->Verbose)
- _mesa_printf("commit_instruction(%i)\n", pairinst->IP);
-
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- struct pair_register_translation *t = &s->Temps[inst->DstReg.Index];
- deref_hw_reg(s, t->HwIndex);
-
- int i;
- for(i = 0; i < 4; ++i) {
- if (!GET_BIT(inst->DstReg.WriteMask, i))
- continue;
-
- t->Value[i] = pairinst->Values[i];
- if (t->Value[i]->NumReaders) {
- struct reg_value_reader *r;
- for(r = pairinst->Values[i]->Readers; r; r = r->Next)
- decrement_dependencies(s, r->Reader);
- } else if (t->Value[i]->Next) {
- /* This happens when the only reader writes
- * the register at the same time */
- decrement_dependencies(s, t->Value[i]->Next->Writer);
- }
- }
- }
-
- int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
- for(i = 0; i < nsrc; i++) {
- struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index);
- if (!t)
- continue;
-
- deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index));
-
- if (inst->SrcReg[i].File != PROGRAM_TEMPORARY)
- continue;
-
- int j;
- for(j = 0; j < 4; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz >= 4)
- continue;
- if (!t->Value[swz])
- continue;
-
- /* Do not free a dependency if this instruction
- * also rewrites the value. See scan_instructions. */
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == inst->SrcReg[i].Index &&
- GET_BIT(inst->DstReg.WriteMask, swz))
- continue;
-
- if (!--t->Value[swz]->NumReaders) {
- if (t->Value[swz]->Next)
- decrement_dependencies(s, t->Value[swz]->Next->Writer);
- }
- }
- }
-}
-
-
-/**
- * Emit all ready texture instructions in a single block.
- *
- * Emit as a single block to (hopefully) sample many textures in parallel,
- * and to avoid hardware indirections on R300.
- *
- * In R500, we don't really know when the result of a texture instruction
- * arrives. So allocate all destinations first, to make sure they do not
- * arrive early and overwrite a texture coordinate we're going to use later
- * in the block.
- */
-static void emit_all_tex(struct pair_state *s)
-{
- struct pair_state_instruction *readytex;
- struct pair_state_instruction *pairinst;
-
- ASSERT(s->ReadyTEX);
-
- // Don't let the ready list change under us!
- readytex = s->ReadyTEX;
- s->ReadyTEX = 0;
-
- // Allocate destination hardware registers in one block to avoid conflicts.
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- struct prog_instruction *inst = &pairinst->Instruction;
- if (inst->Opcode != OPCODE_KIL)
- get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- }
-
- if (s->Compiler->Base.Debug)
- _mesa_printf(" BEGIN_TEX\n");
-
- if (s->Handler->BeginTexBlock)
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->BeginTexBlock(s->UserData);
-
- for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) {
- struct prog_instruction *inst = &pairinst->Instruction;
- commit_instruction(s, pairinst);
-
- if (inst->Opcode != OPCODE_KIL)
- inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
-
- if (s->Compiler->Base.Debug) {
- _mesa_printf(" ");
- _mesa_print_instruction(inst);
- fflush(stderr);
- }
-
- struct radeon_pair_texture_instruction rpti;
-
- switch(inst->Opcode) {
- case OPCODE_TEX: rpti.Opcode = RADEON_OPCODE_TEX; break;
- case OPCODE_TXB: rpti.Opcode = RADEON_OPCODE_TXB; break;
- case OPCODE_TXP: rpti.Opcode = RADEON_OPCODE_TXP; break;
- default:
- case OPCODE_KIL: rpti.Opcode = RADEON_OPCODE_KIL; break;
- }
-
- rpti.DestIndex = inst->DstReg.Index;
- rpti.WriteMask = inst->DstReg.WriteMask;
- rpti.TexSrcUnit = inst->TexSrcUnit;
- rpti.TexSrcTarget = inst->TexSrcTarget;
- rpti.SrcIndex = inst->SrcReg[0].Index;
- rpti.SrcSwizzle = inst->SrcReg[0].Swizzle;
-
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitTex(s->UserData, &rpti);
- }
-
- if (s->Compiler->Base.Debug)
- _mesa_printf(" END_TEX\n");
-}
-
-
-static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair,
- struct prog_src_register src, GLboolean rgb, GLboolean alpha)
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index)
{
int candidate = -1;
int candidate_quality = -1;
int i;
- if (!rgb && !alpha)
+ if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
- GLuint constant;
- GLuint index;
-
- if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) {
- constant = 0;
- index = get_hw_reg(s, src.File, src.Index);
- } else {
- constant = 1;
- index = src.Index;
- }
-
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
- if (pair->RGB.Src[i].Constant != constant ||
+ if (pair->RGB.Src[i].File != file ||
pair->RGB.Src[i].Index != index)
continue;
q++;
@@ -610,7 +55,7 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
- if (pair->Alpha.Src[i].Constant != constant ||
+ if (pair->Alpha.Src[i].File != file ||
pair->Alpha.Src[i].Index != index)
continue;
q++;
@@ -625,334 +70,15 @@ static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instructio
if (candidate >= 0) {
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
- pair->RGB.Src[candidate].Constant = constant;
+ pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
- pair->Alpha.Src[candidate].Constant = constant;
+ pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
}
}
return candidate;
}
-
-/**
- * Fill the given ALU instruction's opcodes and source operands into the given pair,
- * if possible.
- */
-static GLboolean fill_instruction_into_pair(
- struct pair_state *s,
- struct radeon_pair_instruction *pair,
- struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP);
- ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP);
-
- if (pairinst->NeedRGB) {
- if (pairinst->IsTranscendent)
- pair->RGB.Opcode = OPCODE_REPL_ALPHA;
- else
- pair->RGB.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->RGB.Saturate = 1;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.Opcode = inst->Opcode;
- if (inst->SaturateMode == SATURATE_ZERO_ONE)
- pair->Alpha.Saturate = 1;
- }
-
- int nargs = _mesa_num_inst_src_regs(inst->Opcode);
- int i;
-
- /* Special case for DDX/DDY (MDH/MDV). */
- if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) {
- if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used)
- return GL_FALSE;
- else
- nargs++;
- }
-
- for(i = 0; i < nargs; ++i) {
- int source;
- if (pairinst->NeedRGB && !pairinst->IsTranscendent) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- int j;
- for(j = 0; j < 3; ++j) {
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- }
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->RGB.Arg[i].Source = source;
- pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
- pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (NEGATE_X | NEGATE_Y | NEGATE_Z));
- }
- if (pairinst->NeedAlpha) {
- GLboolean srcrgb = GL_FALSE;
- GLboolean srcalpha = GL_FALSE;
- GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3);
- if (swz < 3)
- srcrgb = GL_TRUE;
- else if (swz < 4)
- srcalpha = GL_TRUE;
- source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha);
- if (source < 0)
- return GL_FALSE;
- pair->Alpha.Arg[i].Source = source;
- pair->Alpha.Arg[i].Swizzle = swz;
- pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
- pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & NEGATE_W);
- }
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Fill in the destination register information.
- *
- * This is split from filling in source registers because we want
- * to avoid allocating hardware temporaries for destinations until
- * we are absolutely certain that we're going to emit a certain
- * instruction pairing.
- */
-static void fill_dest_into_pair(
- struct pair_state *s,
- struct radeon_pair_instruction *pair,
- struct pair_state_instruction *pairinst)
-{
- struct prog_instruction *inst = &pairinst->Instruction;
-
- if (inst->DstReg.File == PROGRAM_OUTPUT) {
- if (inst->DstReg.Index == s->Compiler->OutputColor) {
- pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- } else if (inst->DstReg.Index == s->Compiler->OutputDepth) {
- pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- } else {
- GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
- if (pairinst->NeedRGB) {
- pair->RGB.DestIndex = hwindex;
- pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ;
- }
- if (pairinst->NeedAlpha) {
- pair->Alpha.DestIndex = hwindex;
- pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
- }
- }
-}
-
-
-/**
- * Find a good ALU instruction or pair of ALU instruction and emit it.
- *
- * Prefer emitting full ALU instructions, so that when we reach a point
- * where no full ALU instruction can be emitted, we have more candidates
- * for RGB/Alpha pairing.
- */
-static void emit_alu(struct pair_state *s)
-{
- struct radeon_pair_instruction pair;
- struct pair_state_instruction *psi;
-
- if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
- if (s->ReadyFullALU) {
- psi = s->ReadyFullALU;
- s->ReadyFullALU = s->ReadyFullALU->NextReady;
- } else if (s->ReadyRGB) {
- psi = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
- } else {
- psi = s->ReadyAlpha;
- s->ReadyAlpha = s->ReadyAlpha->NextReady;
- }
-
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psi);
- fill_dest_into_pair(s, &pair, psi);
- commit_instruction(s, psi);
- } else {
- struct pair_state_instruction **prgb;
- struct pair_state_instruction **palpha;
-
- /* Some pairings might fail because they require too
- * many source slots; try all possible pairings if necessary */
- for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
- for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
- struct pair_state_instruction * psirgb = *prgb;
- struct pair_state_instruction * psialpha = *palpha;
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psirgb);
- if (!fill_instruction_into_pair(s, &pair, psialpha))
- continue;
- *prgb = (*prgb)->NextReady;
- *palpha = (*palpha)->NextReady;
- fill_dest_into_pair(s, &pair, psirgb);
- fill_dest_into_pair(s, &pair, psialpha);
- commit_instruction(s, psirgb);
- commit_instruction(s, psialpha);
- goto success;
- }
- }
-
- /* No success in pairing; just take the first RGB instruction */
- psi = s->ReadyRGB;
- s->ReadyRGB = s->ReadyRGB->NextReady;
-
- _mesa_bzero(&pair, sizeof(pair));
- fill_instruction_into_pair(s, &pair, psi);
- fill_dest_into_pair(s, &pair, psi);
- commit_instruction(s, psi);
- success: ;
- }
-
- if (s->Compiler->Base.Debug)
- radeonPrintPairInstruction(&pair);
-
- s->Compiler->Base.Error = s->Compiler->Base.Error || !s->Handler->EmitPaired(s->UserData, &pair);
-}
-
-/* Callback function for assigning input registers to hardware registers */
-static void alloc_helper(void * data, unsigned input, unsigned hwreg)
-{
- struct pair_state * s = data;
- alloc_hw_reg(s, PROGRAM_INPUT, input, hwreg);
-}
-
-void radeonPairProgram(
- struct r300_fragment_program_compiler * compiler,
- const struct radeon_pair_handler* handler, void *userdata)
-{
- struct pair_state s;
-
- _mesa_bzero(&s, sizeof(s));
- s.Compiler = compiler;
- s.Handler = handler;
- s.UserData = userdata;
- s.Verbose = GL_FALSE && s.Compiler->Base.Debug;
-
- if (s.Compiler->Base.Debug)
- _mesa_printf("Emit paired program\n");
-
- scan_instructions(&s);
- s.Compiler->AllocateHwInputs(s.Compiler, &alloc_helper, &s);
-
- while(!s.Compiler->Base.Error &&
- (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
- if (s.ReadyTEX)
- emit_all_tex(&s);
-
- while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
- emit_alu(&s);
- }
-
- if (s.Compiler->Base.Debug)
- _mesa_printf(" END\n");
-}
-
-
-static void print_pair_src(int i, struct radeon_pair_instruction_source* src)
-{
- _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);
-}
-
-static const char* opcode_string(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return "SOP";
- else
- return _mesa_opcode_string(opcode);
-}
-
-static int num_pairinst_args(GLuint opcode)
-{
- if (opcode == OPCODE_REPL_ALPHA)
- return 0;
- else
- return _mesa_num_inst_src_regs(opcode);
-}
-
-static char swizzle_char(GLuint swz)
-{
- switch(swz) {
- case SWIZZLE_X: return 'x';
- case SWIZZLE_Y: return 'y';
- case SWIZZLE_Z: return 'z';
- case SWIZZLE_W: return 'w';
- case SWIZZLE_ZERO: return '0';
- case SWIZZLE_ONE: return '1';
- case SWIZZLE_NIL: return '_';
- default: return '?';
- }
-}
-
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst)
-{
- int nargs;
- int i;
-
- _mesa_printf(" RGB: ");
- for(i = 0; i < 3; ++i) {
- if (inst->RGB.Src[i].Used)
- print_pair_src(i, inst->RGB.Src + i);
- }
- _mesa_printf("\n");
- _mesa_printf(" Alpha:");
- for(i = 0; i < 3; ++i) {
- if (inst->Alpha.Src[i].Used)
- print_pair_src(i, inst->Alpha.Src + i);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : "");
- if (inst->RGB.WriteMask)
- _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex,
- (inst->RGB.WriteMask & 1) ? "x" : "",
- (inst->RGB.WriteMask & 2) ? "y" : "",
- (inst->RGB.WriteMask & 4) ? "z" : "");
- if (inst->RGB.OutputWriteMask)
- _mesa_printf(" COLOR.%s%s%s",
- (inst->RGB.OutputWriteMask & 1) ? "x" : "",
- (inst->RGB.OutputWriteMask & 2) ? "y" : "",
- (inst->RGB.OutputWriteMask & 4) ? "z" : "");
- nargs = num_pairinst_args(inst->RGB.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->RGB.Arg[i].Abs ? "|" : "";
- const char* neg = inst->RGB.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source,
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)),
- swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)),
- abs);
- }
- _mesa_printf("\n");
-
- _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : "");
- if (inst->Alpha.WriteMask)
- _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex);
- if (inst->Alpha.OutputWriteMask)
- _mesa_printf(" COLOR.w");
- if (inst->Alpha.DepthWriteMask)
- _mesa_printf(" DEPTH.w");
- nargs = num_pairinst_args(inst->Alpha.Opcode);
- for(i = 0; i < nargs; ++i) {
- const char* abs = inst->Alpha.Arg[i].Abs ? "|" : "";
- const char* neg = inst->Alpha.Arg[i].Negate ? "-" : "";
- _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source,
- swizzle_char(inst->Alpha.Arg[i].Swizzle), abs);
- }
- _mesa_printf("\n");
-}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index ff76178551..1600598428 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -28,116 +28,97 @@
#ifndef __RADEON_PROGRAM_PAIR_H_
#define __RADEON_PROGRAM_PAIR_H_
-#include "radeon_program.h"
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
struct r300_fragment_program_compiler;
/**
- * Represents a paired instruction, as found in R300 and R500
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
* fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
*/
+
+
struct radeon_pair_instruction_source {
- GLuint Index:8;
- GLuint Constant:1;
- GLuint Used:1;
+ unsigned int Used:1;
+ rc_register_file File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
};
struct radeon_pair_instruction_rgb {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:3;
- GLuint OutputWriteMask:3;
- GLuint Saturate:1;
+ rc_opcode Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:3;
+ unsigned int OutputWriteMask:3;
+ unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
- GLuint Source:2;
- GLuint Swizzle:9;
- GLuint Abs:1;
- GLuint Negate:1;
+ unsigned int Source:2;
+ unsigned int Swizzle:9;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
} Arg[3];
};
struct radeon_pair_instruction_alpha {
- GLuint Opcode:8;
- GLuint DestIndex:8;
- GLuint WriteMask:1;
- GLuint OutputWriteMask:1;
- GLuint DepthWriteMask:1;
- GLuint Saturate:1;
+ rc_opcode Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:1;
+ unsigned int OutputWriteMask:1;
+ unsigned int DepthWriteMask:1;
+ unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
- GLuint Source:2;
- GLuint Swizzle:3;
- GLuint Abs:1;
- GLuint Negate:1;
+ unsigned int Source:2;
+ unsigned int Swizzle:3;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
} Arg[3];
};
-struct radeon_pair_instruction {
+struct rc_pair_instruction {
struct radeon_pair_instruction_rgb RGB;
struct radeon_pair_instruction_alpha Alpha;
-};
-
-enum {
- RADEON_OPCODE_TEX = 0,
- RADEON_OPCODE_TXB,
- RADEON_OPCODE_TXP,
- RADEON_OPCODE_KIL
+ rc_write_aluresult WriteALUResult:2;
+ rc_compare_func ALUResultCompare:3;
};
-struct radeon_pair_texture_instruction {
- GLuint Opcode:2; /**< one of RADEON_OPCODE_xxx */
-
- GLuint DestIndex:8;
- GLuint WriteMask:4;
- GLuint TexSrcUnit:5;
- GLuint TexSrcTarget:3;
-
- GLuint SrcIndex:8;
- GLuint SrcSwizzle:12;
-};
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index);
+/*@}*/
/**
- *
+ * Compiler passes that operate with the paired format.
*/
-struct radeon_pair_handler {
- /**
- * Write a paired instruction to the hardware.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
-
- /**
- * Write a texture instruction to the hardware.
- * Register indices have already been rewritten to the allocated
- * hardware register numbers.
- *
- * @return GL_FALSE on error.
- */
- GLboolean (*EmitTex)(void*, struct radeon_pair_texture_instruction*);
-
- /**
- * Called before a block of contiguous, independent texture
- * instructions is emitted.
- */
- GLboolean (*BeginTexBlock)(void*);
-
- unsigned MaxHwTemps;
-};
-
-void radeonPairProgram(
- struct r300_fragment_program_compiler * compiler,
- const struct radeon_pair_handler*, void *userdata);
+/*@{*/
+struct radeon_pair_handler;
-void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
+void rc_pair_translate(struct r300_fragment_program_compiler *c);
+void rc_pair_schedule(struct r300_fragment_program_compiler *c);
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps);
+/*@}*/
#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000000..d863b82d53
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+ if (func == RC_COMPARE_FUNC_NEVER) {
+ fprintf(f, "false");
+ } else if (func == RC_COMPARE_FUNC_ALWAYS) {
+ fprintf(f, "true");
+ } else {
+ const char * op;
+ switch(func) {
+ case RC_COMPARE_FUNC_LESS: op = "<"; break;
+ case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+ case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+ case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+ case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+ case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+ default: op = "???"; break;
+ }
+ fprintf(f, "%s %s %s", lhs, op, rhs);
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else if (file == RC_FILE_SPECIAL) {
+ switch(index) {
+ case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+ default: fprintf(f, "special[%i]", index); break;
+ }
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+ switch(swz) {
+ case RC_SWIZZLE_X: return 'x';
+ case RC_SWIZZLE_Y: return 'y';
+ case RC_SWIZZLE_Z: return 'z';
+ case RC_SWIZZLE_W: return 'w';
+ case RC_SWIZZLE_ZERO: return '0';
+ case RC_SWIZZLE_ONE: return '1';
+ case RC_SWIZZLE_HALF: return 'H';
+ case RC_SWIZZLE_UNUSED: return '_';
+ }
+ return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ fprintf(f, "%c", rc_swizzle_char(swz));
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->U.I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->U.I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->U.I.TexSrcTarget),
+ inst->U.I.TexShadow ? "SHADOW" : "",
+ inst->U.I.TexSrcUnit);
+ }
+
+ fprintf(f, ";");
+
+ if (inst->U.I.WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f,
+ (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+ inst->U.I.ALUResultCompare, "0");
+ fprintf(f, ")]");
+ }
+
+ fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ int printedsrc = 0;
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.xyz = ", src);
+ rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ if (inst->Alpha.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.w = ", src);
+ rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ }
+ fprintf(f, "\n");
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ fprintf(f, " color.%s%s%s",
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+ abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ fprintf(f, " color.w");
+ if (inst->Alpha.DepthWriteMask)
+ fprintf(f, " depth.w");
+ if (inst->WriteALUResult == RC_ALURESULT_W)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+ rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+ fprintf(f, ")]\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ if (inst->Type == RC_INSTRUCTION_PAIR)
+ rc_print_pair_instruction(stderr, inst);
+ else
+ rc_print_normal_instruction(stderr, inst);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000000..c81d5f7a5e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index 0fe32a5443..da5b7ba642 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -684,11 +684,7 @@ void r300InitCmdBuf(r300ContextPtr r300)
r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
- if ((r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ||
- ( !r300->radeon.radeonScreen->kernel_mm && (
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS400) ||
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
- (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420) ) ) ) {
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) {
ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
} else {
ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
@@ -697,6 +693,14 @@ void r300InitCmdBuf(r300ContextPtr r300)
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+ if (is_r500) {
+ if (r300->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0);
+ else
+ ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0);
+ r300->hw.zsb.cmd[R300_ZSB_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1);
+ }
ALLOC_STATE(zstencil_format, always, 5, 0);
r300->hw.zstencil_format.cmd[0] =
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 9df3897e65..2c2b16aa98 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -84,6 +84,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
@@ -116,6 +117,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
{"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 1dadcc0a69..518d5cdbf4 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -234,6 +234,10 @@ typedef struct r300_context *r300ContextPtr;
#define R300_ZS_CNTL_2 3
#define R300_ZS_CMDSIZE 4
+#define R300_ZSB_CMD_0 0
+#define R300_ZSB_CNTL_0 1
+#define R300_ZSB_CMDSIZE 2
+
#define R300_ZB_CMD_0 0
#define R300_ZB_OFFSET 1
#define R300_ZB_PITCH 2
@@ -343,6 +347,7 @@ struct r300_hw_state {
struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
struct radeon_state_atom zs; /* zstencil control (4F00) */
+ struct radeon_state_atom zsb; /* zstencil bf */
struct radeon_state_atom zstencil_format;
struct radeon_state_atom zb; /* z buffer (4F20) */
struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 70c9252894..267ee81a7a 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -44,6 +44,7 @@
#include "compiler/radeon_compiler.h"
+#include "radeon_mesa_to_rc.h"
#include "r300_state.h"
@@ -131,7 +132,7 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler,
*/
static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
{
- struct prog_src_register src;
+ struct rc_src_register src;
int i;
fp->fog_attr = FRAG_ATTRIB_MAX;
@@ -155,7 +156,7 @@ static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r
}
memset(&src, 0, sizeof(src));
- src.File = PROGRAM_INPUT;
+ src.File = RC_FILE_INPUT;
src.Index = fp->fog_attr;
src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
@@ -232,7 +233,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog
fflush(stderr);
}
- rc_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+ radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
insert_WPOS_trailer(&compiler, fp);
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 39b4b61a10..623da60333 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1022,15 +1022,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
# define R300_RE_SHADE_MODEL_FLAT ( \
R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
- R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \
- R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST )
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
#define R300_GA_SOLID_RG 0x427c
@@ -2315,6 +2313,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_Z_WRITE_ENABLE (1 << 2)
# define R300_Z_SIGNED_COMPARE (1 << 3)
# define R300_STENCIL_FRONT_BACK (1 << 4)
+# define R400_ZSIGNED_MAGNITUDE (1 << 5)
+# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
#define R300_ZB_ZSTENCILCNTL 0x4f04
/* functions */
@@ -3002,6 +3002,8 @@ enum {
# define R500_INST_RGB_CLAMP (1 << 19)
# define R500_INST_ALPHA_CLAMP (1 << 20)
# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALU_RESULT_SEL_RED (0 << 21)
+# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21)
# define R500_INST_ALPHA_PRED_INV (1 << 22)
# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 9301543d38..3060f49aaf 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -590,7 +590,9 @@ static void r300SetDepthState(GLcontext * ctx)
r300ContextPtr r300 = R300_CONTEXT(ctx);
R300_STATECHANGE(r300, zs);
- r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK;
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE |
+ R300_STENCIL_FRONT_BACK |
+ R500_STENCIL_REFMASK_FRONT_BACK);
r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
if (ctx->Depth.Test) {
@@ -604,11 +606,16 @@ static void r300SetDepthState(GLcontext * ctx)
static void r300CatchStencilFallback(GLcontext *ctx)
{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
const unsigned back = ctx->Stencil._BackFace;
- if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
- || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
- || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+ if (rmesa->radeon.radeonScreen->kernel_mm &&
+ (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+ } else if (ctx->Stencil._Enabled &&
+ (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+ || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+ || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
} else {
r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
@@ -915,11 +922,24 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
(flag << R300_S_BACK_FUNC_SHIFT);
rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK;
+ R300_STATECHANGE(rmesa, zsb);
+ refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT)
+ | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT);
+
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &=
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask;
+ }
}
static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const unsigned back = ctx->Stencil._BackFace;
r300CatchStencilFallback(ctx);
@@ -931,6 +951,13 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
(ctx->Stencil.
WriteMask[0] & R300_STENCILREF_MASK) <<
R300_STENCILWRITEMASK_SHIFT;
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ R300_STATECHANGE(rmesa, zsb);
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |=
+ (ctx->Stencil.
+ WriteMask[back] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
+ }
}
static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
@@ -2253,6 +2280,14 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
R300_STATECHANGE(r300, zb);
}
+ if (new_state & (_NEW_LIGHT)) {
+ R300_STATECHANGE(r300, shade2);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ else
+ r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ }
+
r300->radeon.NewGLState |= new_state;
}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index 2f7b67c143..43629d643b 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -41,7 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "tnl/tnl.h"
#include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
+#include "radeon_mesa_to_rc.h"
#include "r300_context.h"
#include "r300_fragprog_common.h"
#include "r300_state.h"
@@ -217,20 +217,20 @@ static void initialize_NV_registers(struct radeon_compiler * compiler)
for(reg = 0; reg < 12; ++reg) {
inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
- inst->I.Opcode = OPCODE_MOV;
- inst->I.DstReg.File = PROGRAM_TEMPORARY;
- inst->I.DstReg.Index = reg;
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = reg;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
}
inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
- inst->I.Opcode = OPCODE_ARL;
- inst->I.DstReg.File = PROGRAM_ADDRESS;
- inst->I.DstReg.Index = 0;
- inst->I.DstReg.WriteMask = WRITEMASK_X;
- inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
- inst->I.SrcReg[0].Swizzle = SWIZZLE_0000;
+ inst->U.I.Opcode = RC_OPCODE_ARL;
+ inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+ inst->U.I.DstReg.Index = 0;
+ inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
}
static struct r300_vertex_program *build_program(GLcontext *ctx,
@@ -261,7 +261,7 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->Base);
}
- rc_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+ radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
if (mesa_vp->IsNVProgram)
initialize_NV_registers(&compiler.Base);
diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
index 250570f6b8..da4812d323 100644
--- a/src/mesa/drivers/dri/r300/radeon_context.h
+++ b/src/mesa/drivers/dri/r300/radeon_context.h
@@ -51,26 +51,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_screen.h"
-#if R200_MERGED
-extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-
-#define FALLBACK( radeon, bit, mode ) do { \
- if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
- __FUNCTION__, bit, mode ); \
- radeonFallback( (radeon)->glCtx, bit, mode ); \
-} while (0)
-#else
#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
-#endif
/* TCL fallbacks */
extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
-#if R200_MERGED
-#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
-#else
#define TCL_FALLBACK( ctx, bit, mode ) ;
-#endif
#endif /* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
new file mode 100644
index 0000000000..9f9dec840b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mesa_to_rc.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_program.h"
+
+
+static rc_opcode translate_opcode(gl_inst_opcode opcode)
+{
+ switch(opcode) {
+ case OPCODE_NOP: return RC_OPCODE_NOP;
+ case OPCODE_ABS: return RC_OPCODE_ABS;
+ case OPCODE_ADD: return RC_OPCODE_ADD;
+ case OPCODE_ARL: return RC_OPCODE_ARL;
+ case OPCODE_CMP: return RC_OPCODE_CMP;
+ case OPCODE_COS: return RC_OPCODE_COS;
+ case OPCODE_DDX: return RC_OPCODE_DDX;
+ case OPCODE_DDY: return RC_OPCODE_DDY;
+ case OPCODE_DP3: return RC_OPCODE_DP3;
+ case OPCODE_DP4: return RC_OPCODE_DP4;
+ case OPCODE_DPH: return RC_OPCODE_DPH;
+ case OPCODE_DST: return RC_OPCODE_DST;
+ case OPCODE_EX2: return RC_OPCODE_EX2;
+ case OPCODE_EXP: return RC_OPCODE_EXP;
+ case OPCODE_FLR: return RC_OPCODE_FLR;
+ case OPCODE_FRC: return RC_OPCODE_FRC;
+ case OPCODE_KIL: return RC_OPCODE_KIL;
+ case OPCODE_LG2: return RC_OPCODE_LG2;
+ case OPCODE_LIT: return RC_OPCODE_LIT;
+ case OPCODE_LOG: return RC_OPCODE_LOG;
+ case OPCODE_LRP: return RC_OPCODE_LRP;
+ case OPCODE_MAD: return RC_OPCODE_MAD;
+ case OPCODE_MAX: return RC_OPCODE_MAX;
+ case OPCODE_MIN: return RC_OPCODE_MIN;
+ case OPCODE_MOV: return RC_OPCODE_MOV;
+ case OPCODE_MUL: return RC_OPCODE_MUL;
+ case OPCODE_POW: return RC_OPCODE_POW;
+ case OPCODE_RCP: return RC_OPCODE_RCP;
+ case OPCODE_RSQ: return RC_OPCODE_RSQ;
+ case OPCODE_SCS: return RC_OPCODE_SCS;
+ case OPCODE_SEQ: return RC_OPCODE_SEQ;
+ case OPCODE_SFL: return RC_OPCODE_SFL;
+ case OPCODE_SGE: return RC_OPCODE_SGE;
+ case OPCODE_SGT: return RC_OPCODE_SGT;
+ case OPCODE_SIN: return RC_OPCODE_SIN;
+ case OPCODE_SLE: return RC_OPCODE_SLE;
+ case OPCODE_SLT: return RC_OPCODE_SLT;
+ case OPCODE_SNE: return RC_OPCODE_SNE;
+ case OPCODE_SUB: return RC_OPCODE_SUB;
+ case OPCODE_SWZ: return RC_OPCODE_SWZ;
+ case OPCODE_TEX: return RC_OPCODE_TEX;
+ case OPCODE_TXB: return RC_OPCODE_TXB;
+ case OPCODE_TXD: return RC_OPCODE_TXD;
+ case OPCODE_TXL: return RC_OPCODE_TXL;
+ case OPCODE_TXP: return RC_OPCODE_TXP;
+ case OPCODE_XPD: return RC_OPCODE_XPD;
+ default: return RC_OPCODE_ILLEGAL_OPCODE;
+ }
+}
+
+static rc_saturate_mode translate_saturate(unsigned int saturate)
+{
+ switch(saturate) {
+ default:
+ case SATURATE_OFF: return RC_SATURATE_NONE;
+ case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
+ }
+}
+
+static rc_register_file translate_register_file(unsigned int file)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY;
+ case PROGRAM_INPUT: return RC_FILE_INPUT;
+ case PROGRAM_OUTPUT: return RC_FILE_OUTPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM: return RC_FILE_CONSTANT;
+ case PROGRAM_ADDRESS: return RC_FILE_ADDRESS;
+ default: return RC_FILE_NONE;
+ }
+}
+
+static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->Swizzle = src->Swizzle;
+ dest->Abs = src->Abs;
+ dest->Negate = src->Negate;
+}
+
+static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->WriteMask = src->WriteMask;
+}
+
+static rc_texture_target translate_tex_target(gl_texture_index target)
+{
+ switch(target) {
+ case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY;
+ case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY;
+ case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE;
+ case TEXTURE_3D_INDEX: return RC_TEXTURE_3D;
+ case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT;
+ default:
+ case TEXTURE_2D_INDEX: return RC_TEXTURE_2D;
+ case TEXTURE_1D_INDEX: return RC_TEXTURE_1D;
+ }
+}
+
+static void translate_instruction(struct radeon_compiler * c,
+ struct rc_instruction * dest, struct prog_instruction * src)
+{
+ const struct rc_opcode_info * opcode;
+ unsigned int i;
+
+ dest->U.I.Opcode = translate_opcode(src->Opcode);
+ if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) {
+ rc_error(c, "Unsupported opcode %i\n", src->Opcode);
+ return;
+ }
+ dest->U.I.SaturateMode = translate_saturate(src->SaturateMode);
+
+ opcode = rc_get_opcode_info(dest->U.I.Opcode);
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i)
+ translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]);
+
+ if (opcode->HasDstReg)
+ translate_dstreg(&dest->U.I.DstReg, &src->DstReg);
+
+ if (opcode->HasTexture) {
+ dest->U.I.TexSrcUnit = src->TexSrcUnit;
+ dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget);
+ dest->U.I.TexShadow = src->TexShadow;
+ }
+}
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+ unsigned int i;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ translate_instruction(c, dest, source);
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
+
+ int isNVProgram = 0;
+
+ if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
+ isNVProgram = vp->IsNVProgram;
+ }
+
+ if (isNVProgram) {
+ /* NV_vertex_program has a fixed-sized constant environment.
+ * This could be handled more efficiently for programs that
+ * do not use relative addressing.
+ */
+ for(i = 0; i < 96; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ } else {
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
new file mode 100644
index 0000000000..9511a04f36
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_MESA_TO_RC_H
+#define RADEON_MESA_TO_RC_H
+
+struct gl_program;
+struct radeon_compiler;
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+#endif /* RADEON_MESA_TO_RC_H */
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index d925a2dfe3..7d5a7b1ab6 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -29,6 +29,7 @@ COMMON_SOURCES = \
RADEON_COMMON_SOURCES = \
radeon_bo_legacy.c \
radeon_common_context.c \
+ radeon_buffer_objects.c \
radeon_common.c \
radeon_cs_legacy.c \
radeon_dma.c \
@@ -65,8 +66,7 @@ DRIVER_SOURCES = \
C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
-DRIVER_DEFINES = -DCOMPILE_R600 -DR200_MERGED=0 \
- -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600 \
+DRIVER_DEFINES = -DRADEON_R600 \
# -DRADEON_BO_TRACK \
-Wall
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index e0b77d4385..969144ba12 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -59,6 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_debug.h"
#include "r600_context.h"
#include "radeon_common_context.h"
+#include "radeon_buffer_objects.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
#include "r600_emit.h"
@@ -85,6 +86,7 @@ int hw_tcl_on = 1;
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
@@ -117,6 +119,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
{"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
@@ -128,6 +131,7 @@ const struct dri_extension card_extensions[] = {
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_texture_sRGB", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
{"GL_ATI_texture_mirror_once", NULL},
@@ -254,6 +258,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
r600InitTextureFuncs(&functions);
r700InitShaderFuncs(&functions);
r700InitIoctlFuncs(&functions);
+ radeonInitBufferObjectFuncs(&functions);
if (!radeonInitContext(&r600->radeon, &functions,
glVisual, driContextPriv,
@@ -372,6 +377,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
+ r700InitDraw(ctx);
+
radeon_fbo_init(&r600->radeon);
radeonInitSpanFuncs( ctx );
@@ -383,9 +390,6 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) {
radeon_warning("disabling 3D acceleration\n");
-#if R200_MERGED
- FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1);
-#endif
}
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
index 9397ecde81..7f68820fda 100644
--- a/src/mesa/drivers/dri/r600/r600_context.h
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -126,6 +126,32 @@ struct r600_hw_state {
struct radeon_state_atom tx_brdr_clr;
};
+typedef struct StreamDesc
+{
+ GLint size; //number of data element
+ GLenum type; //data element type
+ GLsizei stride;
+
+ struct radeon_bo *bo;
+ GLint bo_offset;
+
+ GLuint dwords;
+ GLuint dst_loc;
+ GLuint _signed;
+ GLboolean normalize;
+ GLboolean is_named_bo;
+ GLubyte element;
+} StreamDesc;
+
+typedef struct r700_index_buffer
+{
+ struct radeon_bo *bo;
+ int bo_offset;
+
+ GLboolean is_32bit;
+ GLuint count;
+} r700_index_buffer;
+
/**
* \brief R600 context structure.
*/
@@ -144,6 +170,9 @@ struct r600_context {
GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+ GLint nNumActiveAos;
+ StreamDesc stream_desc[VERT_ATTRIB_MAX];
+ struct r700_index_buffer ind_buf;
};
#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
@@ -177,6 +206,7 @@ extern GLboolean r700SyncSurf(context_t *context,
extern void r700SetupStreams(GLcontext * ctx);
extern void r700Start3D(context_t *context);
extern void r600InitAtoms(context_t *context);
+extern void r700InitDraw(GLcontext *ctx);
#define RADEON_D_CAPTURE 0
#define RADEON_D_PLAYBACK 1
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index bcb8d7c73d..7d7e77d355 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -531,6 +531,49 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo
return GL_FALSE;
}
break;
+ /* EXT_texture_sRGB */
+ case MESA_FORMAT_SRGBA8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SLA8:
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SL8: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+ SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
default:
/* Not supported format */
return GL_FALSE;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 00eda544d4..903b6968be 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -790,6 +790,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ VTX_FETCH_METHOD * pFetchMethod)
+{
+ GLuint client_size_inbyte;
+ GLuint data_format;
+ GLuint mega_fetch_count;
+ GLuint is_mega_fetch_flag;
+
+ R700VertexGenericFetch* vfetch_instruction_ptr;
+ R700VertexGenericFetch* assembled_vfetch_instruction_ptr
+ = pAsm->vfetch_instruction_ptr_array[element];
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+ if (vfetch_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+ }
+ else
+ {
+ vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+ }
+
+ data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
+
+ if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+ {
+ //TODO : mini fetch
+ }
+ else
+ {
+ mega_fetch_count = MEGA_FETCH_BYTES - 1;
+ is_mega_fetch_flag = 0x1;
+ pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+ }
+
+ vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
+ vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
+ vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+ vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
+ vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
+ vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+ vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+ vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
+ vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
+
+ if(1 == _signed)
+ {
+ vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
+ }
+
+ if(GL_TRUE == normalize)
+ {
+ vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
+ }
+
+ // Destination register
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
+ vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+ vfetch_instruction_ptr->m_Word2.f.offset = 0;
+ vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+ vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+
+ if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
+ {
+ return GL_FALSE;
+ }
+ else
+ {
+ pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
+{
+ GLint i;
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
+
+ cleanup_vfetch_shaderinst(pAsm->pR700Shader);
+
+ return GL_TRUE;
+}
+
GLuint gethelpr(r700_AssemblerBase* pAsm)
{
GLuint r = pAsm->uHelpReg;
@@ -1951,9 +2078,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
GLuint contiguous_slots_needed;
GLuint uNumSrc = r700GetNumOperands(pAsm);
- GLuint channel_swizzle, j;
- GLuint chan_counter[4] = {0, 0, 0, 0};
- PVSSRC * pSource[3];
+ //GLuint channel_swizzle, j;
+ //GLuint chan_counter[4] = {0, 0, 0, 0};
+ //PVSSRC * pSource[3];
GLboolean bSplitInst = GL_FALSE;
if (1 == pAsm->D.dst.math)
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index 73bb8bac55..0d4283e4ba 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -415,6 +415,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
GLuint number_of_elements,
GLenum dataElementType,
VTX_FETCH_METHOD* pFetchMethod);
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ VTX_FETCH_METHOD * pFetchMethod);
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
void resethelpr(r700_AssemblerBase* pAsm);
void checkop_init(r700_AssemblerBase* pAsm);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 06d7e9c9ab..3b7f6fffe0 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -208,6 +208,93 @@ static void r700SetupVTXConstants(GLcontext * ctx,
}
+extern int getTypeSize(GLenum type);
+static void r700SetupVTXConstants2(GLcontext * ctx,
+ void * pAos,
+ StreamDesc * pStreamDesc)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct radeon_aos * paos = (struct radeon_aos *)pAos;
+ unsigned int nVBsize;
+ BATCH_LOCALS(&context->radeon);
+
+ unsigned int uSQ_VTX_CONSTANT_WORD0_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD1_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
+
+ if (!paos->bo)
+ return;
+
+ if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+ (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+ else
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+ if(0 == pStreamDesc->stride)
+ {
+ nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
+ }
+ else
+ {
+ nVBsize = paos->count * pStreamDesc->stride;
+ }
+
+ uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
+ uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1;
+
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL),
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
+
+ if(GL_TRUE == pStreamDesc->normalize)
+ {
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ }
+ //else
+ //{
+ // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT,
+ // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ //}
+
+ if(1 == pStreamDesc->_signed)
+ {
+ SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+ }
+
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+ R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0);
+ R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
+ paos->bo,
+ uSQ_VTX_CONSTANT_WORD0_0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+}
+
void r700SetupStreams(GLcontext *ctx)
{
context_t *context = R700_CONTEXT(ctx);
@@ -256,14 +343,24 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
COMMIT_BATCH();
for(i=0; i<VERT_ATTRIB_MAX; i++) {
- if(vp->mesa_program->Base.InputsRead & (1 << i)) {
- /* currently aos are packed */
- r700SetupVTXConstants(ctx,
- i,
- (void*)(&context->radeon.tcl.aos[j]),
- (unsigned int)context->radeon.tcl.aos[j].components,
- (unsigned int)context->radeon.tcl.aos[j].stride * 4,
- (unsigned int)context->radeon.tcl.aos[j].count);
+ if(vp->mesa_program->Base.InputsRead & (1 << i))
+ {
+ if(1 == context->selected_vp->uiVersion)
+ {
+ /* currently aos are packed */
+ r700SetupVTXConstants(ctx,
+ i,
+ (void*)(&context->radeon.tcl.aos[j]),
+ (unsigned int)context->radeon.tcl.aos[j].components,
+ (unsigned int)context->radeon.tcl.aos[j].stride * 4,
+ (unsigned int)context->radeon.tcl.aos[j].count);
+ }
+ else
+ { /* context->selected_vp->uiVersion == 2 : aos not always packed */
+ r700SetupVTXConstants2(ctx,
+ (void*)(&context->radeon.tcl.aos[j]),
+ &(context->stream_desc[j]));
+ }
j++;
}
}
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index 78ce3ae436..62a1ea1a22 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -341,6 +341,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
}
+ else
+ {
+ CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+ CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+ }
ui = (unNumOfReg < ui) ? ui : unNumOfReg;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index b1c3648ca5..4f39d9f1bd 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -43,6 +43,7 @@
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
#include "tnl/t_pipeline.h"
+#include "vbo/vbo_context.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
@@ -53,6 +54,7 @@
#include "r700_fragprog.h"
#include "r700_state.h"
+#include "radeon_buffer_objects.h"
#include "radeon_common_context.h"
void r700WaitForIdle(context_t *context);
@@ -249,78 +251,134 @@ static int r700NumVerts(int num_verts, int prim)
static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
{
- context_t *context = R700_CONTEXT(ctx);
- BATCH_LOCALS(&context->radeon);
- int type, i, total_emit;
- int num_indices;
- uint32_t vgt_draw_initiator = 0;
- uint32_t vgt_index_type = 0;
- uint32_t vgt_primitive_type = 0;
- uint32_t vgt_num_indices = 0;
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *vb = &tnl->vb;
-
- type = r700PrimitiveType(prim);
- num_indices = r700NumVerts(end - start, prim);
-
- radeon_print(RADEON_RENDER, RADEON_TRACE,
- "%s type %x num_indices %d\n",
- __func__, type, num_indices);
-
- if (type < 0 || num_indices <= 0)
- return;
+ context_t *context = R700_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, i, total_emit;
+ int num_indices;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+ GLboolean bUseDrawIndex;
+
+ if(NULL != context->ind_buf.bo)
+ {
+ bUseDrawIndex = GL_TRUE;
+ }
+ else
+ {
+ bUseDrawIndex = GL_FALSE;
+ }
+
+ type = r700PrimitiveType(prim);
+ num_indices = r700NumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+ if(GL_TRUE == bUseDrawIndex)
+ {
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 5 + 2; /* DRAW_INDEX */
+ }
+ else
+ {
total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
- + 2 /* NUM_INSTANCES */
+ + 2 /* NUM_INSTANCES */
+ num_indices + 3; /* DRAW_INDEX_IMMD */
+ }
- BEGIN_BATCH_NO_AUTOSTATE(total_emit);
- // prim
- SETfield(vgt_primitive_type, type,
- VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
- R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
- R600_OUT_BATCH(vgt_primitive_type);
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+ R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX);
+ R600_OUT_BATCH(vgt_primitive_type);
// index type
- SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
- R600_OUT_BATCH(vgt_index_type);
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
- // num instances
- R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
- R600_OUT_BATCH(1);
+ if(GL_TRUE == bUseDrawIndex)
+ {
+ if(GL_TRUE != context->ind_buf.is_32bit)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+ }
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+
+ // draw packet
+ vgt_num_indices = num_indices;
- // draw packet
- vgt_num_indices = num_indices;
+ if(GL_TRUE == bUseDrawIndex)
+ {
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+ else
+ {
SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+
SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+ if(GL_TRUE == bUseDrawIndex)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+ R600_OUT_BATCH(context->ind_buf.bo_offset);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+ context->ind_buf.bo,
+ context->ind_buf.bo_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ }
+ else
+ {
R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
R600_OUT_BATCH(vgt_num_indices);
R600_OUT_BATCH(vgt_draw_initiator);
- for (i = start; i < (start + num_indices); i++) {
- if(vb->Elts)
- R600_OUT_BATCH(vb->Elts[i]);
- else
- R600_OUT_BATCH(i);
+ for (i = start; i < (start + num_indices); i++)
+ {
+ if(vb->Elts)
+ {
+ R600_OUT_BATCH(vb->Elts[i]);
+ }
+ else
+ {
+ R600_OUT_BATCH(i);
+ }
}
- END_BATCH();
- COMMIT_BATCH();
+ }
+ END_BATCH();
+ COMMIT_BATCH();
}
/* start 3d, idle, cb/db flush */
#define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14
-static GLuint r700PredictRenderSize(GLcontext* ctx)
+static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims)
{
context_t *context = R700_CONTEXT(ctx);
- TNLcontext *tnl = TNL_CONTEXT(ctx);
struct r700_vertex_program *vp = context->selected_vp;
- struct vertex_buffer *vb = &tnl->vb;
GLboolean flushed;
GLuint dwords, i;
GLuint state_size;
@@ -328,8 +386,15 @@ static GLuint r700PredictRenderSize(GLcontext* ctx)
context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead);
dwords = PRE_EMIT_STATE_BUFSZ;
- for (i = 0; i < vb->PrimitiveCount; i++)
- dwords += vb->Primitive[i].count + 10;
+ if (nr_prims)
+ dwords += nr_prims * 14;
+ else {
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *vb = &tnl->vb;
+
+ for (i = 0; i < vb->PrimitiveCount; i++)
+ dwords += vb->Primitive[i].count + 10;
+ }
state_size = radeonCountStateEmitSize(&context->radeon);
flushed = rcommonEnsureCmdBufSpace(&context->radeon,
dwords + state_size, __FUNCTION__);
@@ -369,7 +434,7 @@ static GLboolean r700RunRender(GLcontext * ctx,
r700SetupFragmentProgram(ctx);
r600UpdateTextureState(ctx);
- GLuint emit_end = r700PredictRenderSize(ctx)
+ GLuint emit_end = r700PredictRenderSize(ctx, 0)
+ context->radeon.cmdbuf.cs->cdw;
r700SetupStreams(ctx);
@@ -477,4 +542,544 @@ const struct tnl_pipeline_stage *r700_pipeline[] =
0,
};
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r700ConvertAttrib(GLcontext *ctx, int count,
+ const struct gl_client_array *input,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ {
+ count = 1;
+ }
+
+ if (input->BufferObj->Name)
+ {
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ }
+ else
+ {
+ src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
+ sizeof(GLfloat) * input->Size * count, 32);
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type)
+ {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void r700AlignDataToDword(GLcontext *ctx,
+ const struct gl_client_array *input,
+ int count,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i)
+ {
+ _mesa_memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
+}
+
+static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLuint stride;
+ int ret;
+ int i, index;
+
+ R600_STATECHANGE(context, vtx);
+
+ for(index = 0; index < context->nNumActiveAos; index++)
+ {
+ struct radeon_aos *aos = &context->radeon.tcl.aos[index];
+ i = context->stream_desc[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
+ }
+ else
+ {
+ if (input[i]->BufferObj->Name)
+ {
+ if (stride % 4 != 0)
+ {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
+ context->stream_desc[index].is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
+ }
+ }
+ else
+ {
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0)
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ }
+ else
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
+ &context->stream_desc[index].bo_offset, size, 32);
+ assert(context->stream_desc[index].bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
+ context->stream_desc[index].bo_offset);
+
+ switch (context->stream_desc[index].dwords)
+ {
+ case 1:
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ context->stream_desc[index].stride = 4;
+ break;
+ case 2:
+ radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ context->stream_desc[index].stride = 8;
+ break;
+ case 3:
+ radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ context->stream_desc[index].stride = 12;
+ break;
+ case 4:
+ radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ context->stream_desc[index].stride = 16;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ }
+
+ aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
+ aos->stride = context->stream_desc[index].stride / sizeof(float);
+ aos->components = context->stream_desc[index].dwords;
+ aos->bo = context->stream_desc[index].bo;
+ aos->offset = context->stream_desc[index].bo_offset;
+
+ if(context->stream_desc[index].is_named_bo)
+ {
+ radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
+ context->stream_desc[index].bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ context->radeon.tcl.aos_count = context->nNumActiveAos;
+ ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+ first_elem(&context->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+}
+
+static void r700FreeData(GLcontext *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ context_t *context = R700_CONTEXT(ctx);
+
+ int i;
+
+ for (i = 0; i < context->nNumActiveAos; i++)
+ {
+ if (!context->stream_desc[i].is_named_bo)
+ {
+ radeon_bo_unref(context->stream_desc[i].bo);
+ }
+ context->radeon.tcl.aos[i].bo = NULL;
+ }
+
+ if (context->ind_buf.bo != NULL)
+ {
+ radeon_bo_unref(context->ind_buf.bo);
+ }
+}
+
+static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
+ {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+
+#if MESA_BIG_ENDIAN
+ }
+ else
+ { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+#endif
+ }
+
+ context->ind_buf.is_32bit = GL_FALSE;
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ context->ind_buf.bo = NULL;
+ return;
+ }
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT)
+ {
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
+ {
+#endif
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+ assert(context->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ _mesa_memcpy(dst_ptr, src_ptr, size);
+
+ context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ }
+ else
+ {
+ r700FixupIndexBuffer(ctx, mesa_ind_buf);
+ }
+}
+
+static GLboolean r700TryDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ GLuint i, id = 0;
+ struct radeon_renderbuffer *rrb;
+
+ if (ctx->NewState)
+ {
+ _mesa_update_state( ctx );
+ }
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+ r700SetVertexFormat(ctx, arrays, max_index + 1);
+ /* shaders need to be updated before buffers are validated */
+ r700UpdateShaders2(ctx);
+ if (!r600ValidateBuffers(ctx))
+ return GL_FALSE;
+
+ /* always emit CB base to prevent
+ * lock ups on some chips.
+ */
+ R600_STATECHANGE(context, cb_target);
+ /* mark vtx as dirty since it changes per-draw */
+ R600_STATECHANGE(context, vtx);
+
+ r700SetScissor(context);
+ r700SetupVertexProgram(ctx);
+ r700SetupFragmentProgram(ctx);
+ r600UpdateTextureState(ctx);
+
+ GLuint emit_end = r700PredictRenderSize(ctx, nr_prims)
+ + context->radeon.cmdbuf.cs->cdw;
+
+ r700SetupIndexBuffer(ctx, ib);
+ r700SetupStreams2(ctx, arrays, max_index + 1);
+
+ radeonEmitState(radeon);
+
+ radeon_debug_add_indent();
+ for (i = 0; i < nr_prims; ++i)
+ {
+ r700RunRenderPrimitive(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ }
+ radeon_debug_remove_indent();
+
+ /* Flush render op cached for last several quads. */
+ r700WaitForIdleClean(context);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
+ r700FreeData(ctx);
+
+ if (emit_end < context->radeon.cmdbuf.cs->cdw)
+ {
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+ }
+
+ return GL_TRUE;
+}
+
+static void r700DrawPrimsRe(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ GLboolean retval = GL_FALSE;
+
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
+
+ if (min_index) {
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe );
+ return;
+ }
+
+ /* Make an attempt at drawing */
+ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval)
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+
+static void r700DrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ /* For non indexed drawing, using tnl pipe. */
+ if(!ib)
+ {
+ context->ind_buf.bo = NULL;
+
+ _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib,
+ index_bounds_valid, min_index, max_index);
+ return;
+ }
+
+ r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index);
+}
+
+void r700InitDraw(GLcontext *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ /* to be enabled */
+ vbo->draw_prims = r700DrawPrims;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c
index b4fd51c137..955ea4e4e1 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.c
+++ b/src/mesa/drivers/dri/r600/r700_shader.c
@@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction *
plstCFInstructions->uNumOfNode++;
}
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+ GLuint ulIndex = 0;
+ GLboolean bFound = GL_FALSE;
+ R700ShaderInstruction * pPrevInst = NULL;
+ R700ShaderInstruction * pCurInst = plstCFInstructions->pHead;
+
+ /* Need go thro list to make sure pInst is there. */
+ while(NULL != pCurInst)
+ {
+ if(pCurInst == pInst)
+ {
+ bFound = GL_TRUE;
+ break;
+ }
+
+ pPrevInst = pCurInst;
+ pCurInst = pCurInst->pNextInst;
+ }
+ if(GL_TRUE == bFound)
+ {
+ plstCFInstructions->uNumOfNode--;
+
+ pCurInst = pInst->pNextInst;
+ ulIndex = pInst->m_uIndex;
+ while(NULL != pCurInst)
+ {
+ pCurInst->m_uIndex = ulIndex;
+ ulIndex++;
+ pCurInst = pCurInst->pNextInst;
+ }
+
+ if(plstCFInstructions->pHead == pInst)
+ {
+ plstCFInstructions->pHead = pInst->pNextInst;
+ }
+ if(plstCFInstructions->pTail == pInst)
+ {
+ plstCFInstructions->pTail = pPrevInst;
+ }
+ if(NULL != pPrevInst)
+ {
+ pPrevInst->pNextInst = pInst->pNextInst;
+ }
+
+ FREE(pInst);
+ }
+}
+
void Init_R700_Shader(R700_Shader * pShader)
{
pShader->Type = R700_SHADER_INVALID;
@@ -488,6 +537,47 @@ void DebugPrint(void)
{
}
+void cleanup_vfetch_shaderinst(R700_Shader *pShader)
+{
+ R700ShaderInstruction *pInst;
+ R700ShaderInstruction *pInstToFree;
+ R700VertexInstruction *pVTXInst;
+ R700ControlFlowInstruction *pCFInst;
+
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pVTXInst = (R700VertexInstruction *)pInst;
+ pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+ if(NULL != pVTXInst->m_pLinkedGenericClause)
+ {
+ pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause);
+
+ TakeInstOutFromList(&(pShader->lstCFInstructions),
+ (R700ShaderInstruction*)pCFInst);
+
+ pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType);
+ }
+
+ pInst = pInst->pNextInst;
+ };
+
+ //destroy each item in pShader->lstVTXInstructions;
+ pInst = pShader->lstVTXInstructions.pHead;
+ while(NULL != pInst)
+ {
+ pInstToFree = pInst;
+ pInst = pInst->pNextInst;
+ FREE(pInstToFree);
+ };
+
+ //set NULL pShader->lstVTXInstructions
+ pShader->lstVTXInstructions.pHead=NULL;
+ pShader->lstVTXInstructions.pTail=NULL;
+ pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
void Clean_Up_Shader(R700_Shader *pShader)
{
FREE(pShader->pProgram);
diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h
index bfd01e1a93..997cb05aaf 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.h
+++ b/src/mesa/drivers/dri/r600/r700_shader.h
@@ -143,6 +143,7 @@ void LoadProgram(R700_Shader *pShader);
void UpdateShaderRegisters(R700_Shader *pShader);
void DeleteInstructions(R700_Shader *pShader);
void DebugPrint(void);
+void cleanup_vfetch_shaderinst(R700_Shader *pShader);
void Clean_Up_Shader(R700_Shader *pShader);
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 124469b5a6..7e8b48f91e 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -92,7 +92,25 @@ void r700UpdateShaders (GLcontext * ctx) //----------------------------------
}
}
- r700SelectVertexShader(ctx);
+ r700SelectVertexShader(ctx, 1);
+ r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ context->radeon.NewGLState = 0;
+}
+
+void r700UpdateShaders2(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
+
+ r700SelectFragmentShader(ctx);
+
+ r700SelectVertexShader(ctx, 2);
r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
context->radeon.NewGLState = 0;
}
@@ -171,6 +189,14 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-----------
R600_STATECHANGE(context, db_target);
}
+ if (new_state & (_NEW_LIGHT)) {
+ R600_STATECHANGE(context, su);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ else
+ CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ }
+
r700UpdateStateParameters(ctx, new_state);
R600_STATECHANGE(context, cl);
@@ -467,10 +493,10 @@ static void r700SetBlendState(GLcontext * ctx)
eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
SETfield(blend_reg,
- blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ blend_factor(ctx->Color.BlendSrcA, GL_TRUE),
ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
SETfield(blend_reg,
- blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ blend_factor(ctx->Color.BlendDstA, GL_FALSE),
ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
switch (ctx->Color.BlendEquationA) {
@@ -745,9 +771,9 @@ static void r700ColorMask(GLcontext * ctx,
(b ? 4 : 0) |
(a ? 8 : 0));
- if (mask != r700->CB_SHADER_MASK.u32All) {
+ if (mask != r700->CB_TARGET_MASK.u32All) {
R600_STATECHANGE(context, cb);
- SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+ SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
}
}
@@ -1754,7 +1780,7 @@ void r700InitState(GLcontext * ctx) //-------------------
r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
/* screen/window/view */
- SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask);
+ SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask);
context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h
index 0f53d5b4c5..209189d8d7 100644
--- a/src/mesa/drivers/dri/r600/r700_state.h
+++ b/src/mesa/drivers/dri/r600/r700_state.h
@@ -35,6 +35,7 @@
extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
extern void r700UpdateShaders (GLcontext * ctx);
+extern void r700UpdateShaders2(GLcontext * ctx);
extern void r700UpdateViewportOffset(GLcontext * ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 9ee26286d9..d12c39c9f7 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -159,7 +159,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions(
return GL_TRUE;
}
-void Map_Vertex_Program(struct r700_vertex_program *vp,
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ int i;
+ context_t *context = R700_CONTEXT(ctx);
+
+ VTX_FETCH_METHOD vtxFetchMethod;
+ vtxFetchMethod.bEnableMini = GL_FALSE;
+ vtxFetchMethod.mega_fetch_remainder = 0;
+
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ assemble_vfetch_instruction2(&vp->r700AsmCode,
+ vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
+ context->stream_desc[i].type,
+ context->stream_desc[i].size,
+ context->stream_desc[i].element,
+ context->stream_desc[i]._signed,
+ context->stream_desc[i].normalize,
+ &vtxFetchMethod);
+ }
+
+ return GL_TRUE;
+}
+
+void Map_Vertex_Program(GLcontext *ctx,
+ struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp)
{
GLuint ui;
@@ -175,11 +203,22 @@ void Map_Vertex_Program(struct r700_vertex_program *vp,
pAsm->number_used_registers += num_inputs;
// Create VFETCH instructions for inputs
- if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
- {
- radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
- return; //error
- }
+ if(1 == vp->uiVersion)
+ {
+ if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) )
+ {
+ radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n");
+ return;
+ }
+ }
+ else
+ {
+ if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
+ {
+ radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
+ return;
+ }
+ }
// Map Outputs
pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
@@ -261,7 +300,8 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
}
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp)
+ struct gl_vertex_program *mesa_vp,
+ GLint nVer)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program *vp;
@@ -271,6 +311,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
unsigned int i;
vp = _mesa_calloc(sizeof(*vp));
+ vp->uiVersion = nVer;
vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base);
if (mesa_vp->IsPositionInvariant)
@@ -278,8 +319,10 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
_mesa_insert_mvp_code(ctx, vp->mesa_program);
}
- for(i=0; i<VERT_ATTRIB_MAX; i++)
+ if( 1 == nVer )
{
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
unBit = 1 << i;
if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */
{
@@ -287,7 +330,17 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/
vp->aos_desc[i].type = GL_FLOAT;
}
+ }
}
+ else
+ {
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ vp->aos_desc[i].size = context->stream_desc[i].size;
+ vp->aos_desc[i].stride = context->stream_desc[i].stride;
+ vp->aos_desc[i].type = context->stream_desc[i].type;
+ }
+ }
if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
{
@@ -296,7 +349,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
//Init_Program
Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
- Map_Vertex_Program( vp, vp->mesa_program );
+ Map_Vertex_Program(ctx, vp, vp->mesa_program );
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
{
@@ -325,7 +378,7 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
return vp;
}
-void r700SelectVertexShader(GLcontext *ctx)
+void r700SelectVertexShader(GLcontext *ctx, GLint nVersion)
{
context_t *context = R700_CONTEXT(ctx);
struct r700_vertex_program_cont *vpc;
@@ -347,17 +400,35 @@ void r700SelectVertexShader(GLcontext *ctx)
for (vp = vpc->progs; vp; vp = vp->next)
{
+ if (vp->uiVersion != nVersion )
+ continue;
match = GL_TRUE;
- for(i=0; i<VERT_ATTRIB_MAX; i++)
+ if ( 1 == nVersion )
{
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
unBit = 1 << i;
if(InputsRead & unBit)
{
- if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
- match = GL_FALSE;
- break;
+ if (vp->aos_desc[i].size != vb->AttribPtr[i]->size)
+ {
+ match = GL_FALSE;
+ break;
+ }
}
+ }
}
+ else
+ {
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ if (vp->aos_desc[i].size != context->stream_desc[i].size)
+ {
+ match = GL_FALSE;
+ break;
+ }
+ }
+ }
if (match)
{
context->selected_vp = vp;
@@ -365,7 +436,7 @@ void r700SelectVertexShader(GLcontext *ctx)
}
}
- vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) );
+ vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion);
if(!vp)
{
radeon_error("Failed to translate vertex shader. \n");
@@ -377,6 +448,140 @@ void r700SelectVertexShader(GLcontext *ctx)
return;
}
+int getTypeSize(GLenum type)
+{
+ switch (type)
+ {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
+
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
+ : input->StrideB;
+
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ pStreamDesc->type = GL_FLOAT;
+
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = sizeof(GLfloat) * input->Size;
+ }
+ pStreamDesc->dwords = input->Size;
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ pStreamDesc->type = input->Type;
+ pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
+ if (!input->BufferObj->Name)
+ {
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
+ }
+
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ }
+
+ pStreamDesc->size = input->Size;
+ pStreamDesc->dst_loc = context->nNumActiveAos;
+ pStreamDesc->element = unLoc;
+
+ switch (pStreamDesc->type)
+ { //GetSurfaceFormat
+ case GL_FLOAT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = GL_FALSE;
+ break;
+ case GL_SHORT:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_BYTE:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_SHORT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_BYTE:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ default:
+ case GL_INT:
+ case GL_UNSIGNED_INT:
+ case GL_DOUBLE:
+ assert(0);
+ break;
+ }
+ context->nNumActiveAos++;
+}
+
+void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vpc
+ = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+ struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
+ unsigned int unLoc = 0;
+ unsigned int unBit = mesa_vp->Base.InputsRead;
+ context->nNumActiveAos = 0;
+
+ while(unBit)
+ {
+ if(unBit & 1)
+ {
+ r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
+ }
+
+ unBit >>= 1;
+ ++unLoc;
+ }
+}
+
void * r700GetActiveVpShaderBo(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h
index c48764c43b..f9a3e395ee 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.h
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.h
@@ -52,7 +52,7 @@ struct r700_vertex_program
GLboolean translated;
GLboolean loaded;
- GLboolean needUpdateVF;
+ GLint uiVersion;
void * shaderbo;
@@ -76,19 +76,28 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm,
GLboolean Process_Vertex_Program_Vfetch_Instructions(
struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
-void Map_Vertex_Program(struct r700_vertex_program *vp,
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct r700_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+void Map_Vertex_Program(GLcontext *ctx,
+ struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
struct gl_vertex_program *mesa_vp);
struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
- struct gl_vertex_program *mesa_vp);
+ struct gl_vertex_program *mesa_vp,
+ GLint nVer);
/* Interface */
-extern void r700SelectVertexShader(GLcontext *ctx);
+extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion);
+extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count);
extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
extern void * r700GetActiveVpShaderBo(GLcontext * ctx);
+extern int getTypeSize(GLenum type);
+
#endif /* _R700_VERTPROG_H_ */
diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c
new file mode 120000
index 0000000000..f6a5f66470
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h
new file mode 120000
index 0000000000..2f134fd17b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
index 1f286776b5..b1efc72872 100644
--- a/src/mesa/drivers/dri/radeon/Makefile
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -47,8 +47,7 @@ C_SOURCES = \
$(DRIVER_SOURCES) \
$(CS_SOURCES)
-DRIVER_DEFINES = -DRADEON_COMMON=0 \
- -Wall
+DRIVER_DEFINES = -DRADEON_R100 -Wall
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 9817ff856b..f8a4cdb495 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -1345,5 +1345,5 @@ void rcommonBeginBatch(radeonContextPtr rmesa, int n,
void radeonUserClear(GLcontext *ctx, GLuint mask)
{
- _mesa_meta_clear(ctx, mask);
+ _mesa_meta_Clear(ctx, mask);
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 330721acee..6b9b1e3c5e 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "swrast_setup/swrast_setup.h"
#include "tnl/tnl.h"
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600) /* +r6/r7 */
+#if defined(RADEON_R600)
#include "r600_context.h"
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index d83b166742..7ac53ec0ca 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -583,7 +583,7 @@ void radeon_fbo_init(struct radeon_context *radeon)
radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
- radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_blit_framebuffer;
+ radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 5ffb55db5e..573eb6c9c1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -48,17 +48,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "radeon_screen.h"
#include "radeon_common.h"
#include "radeon_span.h"
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
#include "radeon_context.h"
#include "radeon_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#include "r200_context.h"
#include "r200_ioctl.h"
#include "r200_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
#include "r300_context.h"
#include "r300_tex.h"
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#include "r600_context.h"
#include "r700_driconf.h" /* +r6/r7 */
#include "r600_tex.h" /* +r6/r7 */
@@ -82,7 +82,7 @@ DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
DRI_CONF_OPT_END
-#if !RADEON_COMMON /* R100 */
+#if defined(RADEON_R100) /* R100 */
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
DRI_CONF_SECTION_PERFORMANCE
@@ -109,7 +109,7 @@ DRI_CONF_BEGIN
DRI_CONF_END;
static const GLuint __driNConfigOptions = 15;
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
PUBLIC const char __driConfigOptions[] =
DRI_CONF_BEGIN
@@ -147,7 +147,7 @@ extern const struct dri_extension NV_vp_extension[];
extern const struct dri_extension ATI_fs_extension[];
extern const struct dri_extension point_extensions[];
-#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+#elif defined(RADEON_R300) || defined(RADEON_R600)
#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
@@ -220,7 +220,7 @@ static const GLuint __driNConfigOptions = 17;
extern const struct dri_extension gl_20_extension[];
-#endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */
+#endif
extern const struct dri_extension card_extensions[];
extern const struct dri_extension mm_extensions[];
@@ -337,7 +337,7 @@ radeonFillInModes( __DRIscreenPrivate *psp,
return (const __DRIconfig **) configs;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
static const __DRItexOffsetExtension radeonTexOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
radeonSetTexOffset,
@@ -350,7 +350,7 @@ static const __DRItexBufferExtension radeonTexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
static const __DRIallocateExtension r200AllocateExtension = {
{ __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION },
r200AllocateMemoryMESA,
@@ -370,7 +370,7 @@ static const __DRItexBufferExtension r200TexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
static const __DRItexOffsetExtension r300texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r300SetTexOffset,
@@ -383,7 +383,7 @@ static const __DRItexBufferExtension r300TexBufferExtension = {
};
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
static const __DRItexOffsetExtension r600texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r600SetTexOffset, /* +r6/r7 */
@@ -1222,22 +1222,22 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->extensions[i++] = &driMediaStreamCounterExtension.base;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
screen->extensions[i++] = &radeonTexOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
if (IS_R200_CLASS(screen))
screen->extensions[i++] = &r200AllocateExtension.base;
screen->extensions[i++] = &r200texOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
screen->extensions[i++] = &r300texOffsetExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
screen->extensions[i++] = &r600texOffsetExtension.base;
#endif
@@ -1376,22 +1376,22 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv)
screen->extensions[i++] = &driMediaStreamCounterExtension.base;
}
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
screen->extensions[i++] = &radeonTexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
if (IS_R200_CLASS(screen))
screen->extensions[i++] = &r200AllocateExtension.base;
screen->extensions[i++] = &r200TexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#if defined(RADEON_R300)
screen->extensions[i++] = &r300TexBufferExtension.base;
#endif
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
screen->extensions[i++] = &r600TexBufferExtension.base;
#endif
@@ -1589,22 +1589,22 @@ radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv)
static const __DRIconfig **
radeonInitScreen(__DRIscreenPrivate *psp)
{
-#if !RADEON_COMMON
+#if defined(RADEON_R100)
static const char *driver_name = "Radeon";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 6, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
static const char *driver_name = "R200";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 6, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
static const char *driver_name = "R300";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
static const char *driver_name = "R600";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
@@ -1630,13 +1630,13 @@ radeonInitScreen(__DRIscreenPrivate *psp)
* Hello chicken. Hello egg. How are you two today?
*/
driInitExtensions( NULL, card_extensions, GL_FALSE );
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
driInitExtensions( NULL, blend_extensions, GL_FALSE );
driInitSingleExtension( NULL, ARB_vp_extension );
driInitSingleExtension( NULL, NV_vp_extension );
driInitSingleExtension( NULL, ATI_fs_extension );
driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+#elif (defined(RADEON_R300) || defined(RADEON_R600))
driInitSingleExtension( NULL, gl_20_extension );
#endif
@@ -1684,13 +1684,13 @@ __DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp)
*/
driInitExtensions( NULL, card_extensions, GL_FALSE );
driInitExtensions( NULL, mm_extensions, GL_FALSE );
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
driInitExtensions( NULL, blend_extensions, GL_FALSE );
driInitSingleExtension( NULL, ARB_vp_extension );
driInitSingleExtension( NULL, NV_vp_extension );
driInitSingleExtension( NULL, ATI_fs_extension );
driInitExtensions( NULL, point_extensions, GL_FALSE );
-#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
+#elif (defined(RADEON_R300) || defined(RADEON_R600))
driInitSingleExtension( NULL, gl_20_extension );
#endif
@@ -1772,13 +1772,13 @@ getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo )
const struct __DriverAPIRec driDriverAPI = {
.InitScreen = radeonInitScreen,
.DestroyScreen = radeonDestroyScreen,
-#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
.CreateContext = r200CreateContext,
.DestroyContext = r200DestroyContext,
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
.CreateContext = r600CreateContext,
.DestroyContext = radeonDestroyContext,
-#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
+#elif defined(RADEON_R300)
.CreateContext = r300CreateContext,
.DestroyContext = radeonDestroyContext,
#else
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index d603f52df7..0c49c3713a 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -55,7 +55,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
/* r200 depth buffer is always tiled - this is the formula
according to the docs unless I typo'ed in it
*/
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
@@ -112,7 +112,7 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
* - 2D (akin to macro-tiled/micro-tiled on older asics)
* only 1D tiling is implemented below
*/
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
GLint x, GLint y, GLint is_depth, GLint is_stencil)
{
@@ -334,8 +334,8 @@ static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
#endif
-#ifndef COMPILE_R300
-#ifndef COMPILE_R600
+#ifndef RADEON_R300
+#ifndef RADEON_R600
static uint32_t
z24s8_to_s8z24(uint32_t val)
{
@@ -409,7 +409,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_RGB565
#define TAG2(x,y) radeon##x##_RGB565##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -423,7 +423,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB1555
#define TAG2(x,y) radeon##x##_ARGB1555##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -437,7 +437,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB4444
#define TAG2(x,y) radeon##x##_ARGB4444##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
#else
#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
@@ -451,7 +451,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_xRGB8888
#define TAG2(x,y) radeon##x##_xRGB8888##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
#define PUT_VALUE(_x, _y, d) { \
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
@@ -473,7 +473,7 @@ s8z24_to_z24s8(uint32_t val)
#define TAG(x) radeon##x##_ARGB8888
#define TAG2(x,y) radeon##x##_ARGB8888##y
-#if defined(RADEON_COMMON_FOR_R600)
+#if defined(RADEON_R600)
#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
#define PUT_VALUE(_x, _y, d) { \
GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
@@ -506,10 +506,10 @@ s8z24_to_z24s8(uint32_t val)
*/
#define VALUE_TYPE GLushort
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
*(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
#else
@@ -517,10 +517,10 @@ s8z24_to_z24s8(uint32_t val)
*(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
#endif
-#if defined(RADEON_COMMON_FOR_R200)
+#if defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
#else
@@ -538,7 +538,7 @@ s8z24_to_z24s8(uint32_t val)
*/
#define VALUE_TYPE GLuint
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
@@ -547,7 +547,7 @@ do { \
tmp |= ((d << 8) & 0xffffff00); \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
@@ -556,7 +556,7 @@ do { \
tmp |= ((d) & 0x00ffffff); \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
@@ -576,17 +576,17 @@ do { \
} while (0)
#endif
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
@@ -607,13 +607,13 @@ do { \
*/
#define VALUE_TYPE GLuint
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
*_ptr = d; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
@@ -627,7 +627,7 @@ do { \
tmp |= (d) & 0xff; \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_DEPTH( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
@@ -643,18 +643,18 @@ do { \
} while (0)
#endif
-#if defined(COMPILE_R300)
+#if defined(RADEON_R300)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \
}while(0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_DEPTH( d, _x, _y ) \
do { \
d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
@@ -674,7 +674,7 @@ do { \
/* 24 bit depth, 8 bit stencil depthbuffer functions
*/
-#ifdef COMPILE_R300
+#ifdef RADEON_R300
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
@@ -683,7 +683,7 @@ do { \
tmp |= (d) & 0xff; \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
@@ -692,7 +692,7 @@ do { \
tmp |= (d) & 0xff; \
*_ptr = tmp; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define WRITE_STENCIL( _x, _y, d ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
@@ -712,21 +712,21 @@ do { \
} while (0)
#endif
-#ifdef COMPILE_R300
+#ifdef RADEON_R300
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
GLuint tmp = *_ptr; \
d = tmp & 0x000000ff; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R600)
+#elif defined(RADEON_R600)
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
GLuint tmp = *_ptr; \
d = tmp & 0x000000ff; \
} while (0)
-#elif defined(RADEON_COMMON_FOR_R200)
+#elif defined(RADEON_R200)
#define READ_STENCIL( d, _x, _y ) \
do { \
GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
index 9d252aa74c..ae41b90efe 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texstate.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -833,11 +833,14 @@ static void import_tex_obj_state( r100ContextPtr rmesa,
cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
- if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
- GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] );
+ if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+ uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0];
txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
- RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.txr[unit] );
+ RADEON_STATECHANGE( rmesa, txr[unit] );
+ }
+
+ if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit;
}
else {
@@ -1114,7 +1117,6 @@ static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_objec
RADEON_STATECHANGE( rmesa, ctx );
rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=
(RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
-
RADEON_STATECHANGE( rmesa, tcl );
rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index fad3d1ceda..7b7392b217 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -101,7 +101,12 @@ void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
/* Set Data pointer and additional data for mapped texture image */
static void teximage_set_map_data(radeon_texture_image *image)
{
- radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel];
+ radeon_mipmap_level *lvl;
+
+ if (!image->mt)
+ return;
+
+ lvl = &image->mt->levels[image->mtlevel];
image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
image->base.RowStride = lvl->rowstride / image->mt->bpp;
@@ -659,11 +664,6 @@ static void radeon_teximage(
if (dims == 3)
_mesa_free(dstImageOffsets);
}
-
- /* SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- radeon_generate_mipmap(ctx, target, texObj);
- }
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -792,11 +792,6 @@ static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int leve
format, type, pixels, packing))
_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
}
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- radeon_generate_mipmap(ctx, target, texObj);
- }
}
radeon_teximage_unmap(image);
@@ -979,7 +974,7 @@ int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *t
radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]);
if (RADEON_DEBUG & RADEON_TEXTURE)
fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt);
- if (t->mt == image->mt) {
+ if (t->mt == image->mt || (!image->mt && !image->base.Data)) {
if (RADEON_DEBUG & RADEON_TEXTURE)
fprintf(stderr, "OK\n");
diff --git a/src/mesa/drivers/dri/s3v/s3v_tex.c b/src/mesa/drivers/dri/s3v/s3v_tex.c
index 9b92519862..ec1182f34f 100644
--- a/src/mesa/drivers/dri/s3v/s3v_tex.c
+++ b/src/mesa/drivers/dri/s3v/s3v_tex.c
@@ -536,24 +536,13 @@ void s3vInitTextureFuncs( GLcontext *ctx )
#endif
ctx->Driver.TexEnv = s3vTexEnv;
- ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format;
- ctx->Driver.TexImage1D = _mesa_store_teximage1d;
ctx->Driver.TexImage2D = s3vTexImage2D;
- ctx->Driver.TexImage3D = _mesa_store_teximage3d;
- ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d;
ctx->Driver.TexSubImage2D = s3vTexSubImage2D;
- ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d;
- ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d;
- ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d;
- ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d;
- ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d;
- ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d;
ctx->Driver.BindTexture = s3vBindTexture;
ctx->Driver.DeleteTexture = s3vDeleteTexture;
ctx->Driver.TexParameter = s3vTexParameter;
ctx->Driver.UpdateTexturePalette = 0;
ctx->Driver.IsTextureResident = s3vIsTextureResident;
- ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage;
s3vInitTextureObjects( ctx );
}
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
index a858af30c1..f4947daa06 100644
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -43,6 +43,7 @@
#include "tnl/t_pipeline.h"
#include "vbo/vbo.h"
#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
#include "utils.h"
#include "swrast_priv.h"
@@ -62,6 +63,7 @@
#define need_GL_SGI_color_table
/* sw extensions not associated with some GL version */
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_shader_objects
#define need_GL_ARB_vertex_array_object
#define need_GL_ARB_vertex_program
@@ -95,6 +97,8 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_histogram", GL_EXT_histogram_functions },
{ "GL_SGI_color_table", GL_SGI_color_table_functions },
+ { "GL_ARB_depth_clamp", NULL },
+ { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
{ "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions },
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
@@ -109,8 +113,10 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_paletted_texture", GL_EXT_paletted_texture_functions },
{ "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions },
{ "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions },
+ { "GL_NV_depth_clamp", NULL },
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions },
{ "GL_NV_fragment_program", GL_NV_fragment_program_functions },
+ { "GL_NV_fragment_program_option", NULL },
{ NULL, NULL }
};
@@ -645,6 +651,8 @@ driCreateNewContext(__DRIscreen *screen, const __DRIconfig *config,
_mesa_enable_2_0_extensions(mesaCtx);
_mesa_enable_2_1_extensions(mesaCtx);
+ _mesa_meta_init(mesaCtx);
+
return ctx;
}
@@ -656,6 +664,7 @@ driDestroyContext(__DRIcontext *ctx)
if (ctx) {
mesaCtx = &ctx->Base;
+ _mesa_meta_free(mesaCtx);
_swsetup_DestroyContext( mesaCtx );
_swrast_DestroyContext( mesaCtx );
_tnl_DestroyContext( mesaCtx );
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tex.c b/src/mesa/drivers/dri/tdfx/tdfx_tex.c
index 1f7257eaea..f6a48b3ae1 100644
--- a/src/mesa/drivers/dri/tdfx/tdfx_tex.c
+++ b/src/mesa/drivers/dri/tdfx/tdfx_tex.c
@@ -176,6 +176,54 @@ logbase2(int n)
}
+static void
+tdfxGenerateMipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ GLint mipWidth, mipHeight;
+ tdfxMipMapLevel *mip;
+ struct gl_texture_image *mipImage; /* the new/next image */
+ struct gl_texture_image *texImage;
+ const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target);
+ GLint level = texObj->BaseLevel;
+ GLsizei width, height, texelBytes;
+ const tdfxMipMapLevel *mml;
+
+ texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+ assert(!texImage->IsCompressed);
+
+ mml = TDFX_TEXIMAGE_DATA(texImage);
+
+ width = texImage->Width;
+ height = texImage->Height;
+ while (level < texObj->MaxLevel && level < maxLevels - 1) {
+ mipWidth = width / 2;
+ if (!mipWidth) {
+ mipWidth = 1;
+ }
+ mipHeight = height / 2;
+ if (!mipHeight) {
+ mipHeight = 1;
+ }
+ if ((mipWidth == width) && (mipHeight == height)) {
+ break;
+ }
+ ++level;
+ mipImage = _mesa_select_tex_image(ctx, texObj, target, level);
+ mip = TDFX_TEXIMAGE_DATA(mipImage);
+ _mesa_halve2x2_teximage2d(ctx,
+ texImage,
+ texelBytes,
+ mml->width, mml->height,
+ texImage->Data, mipImage->Data);
+ texImage = mipImage;
+ mml = mip;
+ width = mipWidth;
+ height = mipHeight;
+ }
+}
+
+
/*
* Compute various texture image parameters.
* Input: w, h - source texture width and height
@@ -1397,45 +1445,6 @@ tdfxTexImage2D(GLcontext *ctx, GLenum target, GLint level,
width, height, 1,
format, type, pixels, packing);
}
-
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- GLint mipWidth, mipHeight;
- tdfxMipMapLevel *mip;
- struct gl_texture_image *mipImage;
- const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target);
-
- assert(!texImage->IsCompressed);
-
- while (level < texObj->MaxLevel && level < maxLevels - 1) {
- mipWidth = width / 2;
- if (!mipWidth) {
- mipWidth = 1;
- }
- mipHeight = height / 2;
- if (!mipHeight) {
- mipHeight = 1;
- }
- if ((mipWidth == width) && (mipHeight == height)) {
- break;
- }
- _mesa_TexImage2D(target, ++level, internalFormat,
- mipWidth, mipHeight, border,
- format, type,
- NULL);
- mipImage = _mesa_select_tex_image(ctx, texObj, target, level);
- mip = TDFX_TEXIMAGE_DATA(mipImage);
- _mesa_halve2x2_teximage2d(ctx,
- texImage,
- texelBytes,
- mml->width, mml->height,
- texImage->Data, mipImage->Data);
- texImage = mipImage;
- mml = mip;
- width = mipWidth;
- height = mipHeight;
- }
- }
}
RevalidateTexture(ctx, texObj);
@@ -1507,44 +1516,6 @@ tdfxTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
format, type, pixels, packing);
}
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- GLint mipWidth, mipHeight;
- tdfxMipMapLevel *mip;
- struct gl_texture_image *mipImage;
- const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target);
-
- assert(!texImage->IsCompressed);
-
- width = texImage->Width;
- height = texImage->Height;
- while (level < texObj->MaxLevel && level < maxLevels - 1) {
- mipWidth = width / 2;
- if (!mipWidth) {
- mipWidth = 1;
- }
- mipHeight = height / 2;
- if (!mipHeight) {
- mipHeight = 1;
- }
- if ((mipWidth == width) && (mipHeight == height)) {
- break;
- }
- ++level;
- mipImage = _mesa_select_tex_image(ctx, texObj, target, level);
- mip = TDFX_TEXIMAGE_DATA(mipImage);
- _mesa_halve2x2_teximage2d(ctx,
- texImage,
- texelBytes,
- mml->width, mml->height,
- texImage->Data, mipImage->Data);
- texImage = mipImage;
- mml = mip;
- width = mipWidth;
- height = mipHeight;
- }
- }
-
ti->reloadImages = GL_TRUE; /* signal the image needs to be reloaded */
fxMesa->new_state |= TDFX_NEW_TEXTURE; /* XXX this might be a bit much */
}
@@ -1703,11 +1674,6 @@ tdfxCompressedTexImage2D (GLcontext *ctx, GLenum target,
MEMCPY(texImage->Data, data, texImage->CompressedSize);
}
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- assert(!texImage->IsCompressed);
- }
-
RevalidateTexture(ctx, texObj);
ti->reloadImages = GL_TRUE;
@@ -1770,11 +1736,6 @@ tdfxCompressedTexSubImage2D( GLcontext *ctx, GLenum target,
texImage->Data);
}
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- assert(!texImage->IsCompressed);
- }
-
RevalidateTexture(ctx, texObj);
ti->reloadImages = GL_TRUE;
@@ -1914,4 +1875,5 @@ void tdfxInitTextureFuncs( struct dd_function_table *functions )
functions->CompressedTexImage2D = tdfxCompressedTexImage2D;
functions->CompressedTexSubImage2D = tdfxCompressedTexSubImage2D;
functions->UpdateTexturePalette = tdfxUpdateTexturePalette;
+ functions->GenerateMipmap = tdfxGenerateMipmap;
}
diff --git a/src/mesa/drivers/dri/unichrome/via_tex.c b/src/mesa/drivers/dri/unichrome/via_tex.c
index d2010f0907..54073e7691 100644
--- a/src/mesa/drivers/dri/unichrome/via_tex.c
+++ b/src/mesa/drivers/dri/unichrome/via_tex.c
@@ -818,11 +818,6 @@ static void viaTexImage(GLcontext *ctx,
}
}
- /* GL_SGIS_generate_mipmap */
- if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target, texObj);
- }
-
_mesa_unmap_teximage_pbo(ctx, packing);
}
diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c
index 904659e345..692657a5df 100644
--- a/src/mesa/drivers/osmesa/osmesa.c
+++ b/src/mesa/drivers/osmesa/osmesa.c
@@ -50,6 +50,7 @@
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
#include "vbo/vbo.h"
@@ -1258,6 +1259,8 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
osmesa->bInd = bind;
osmesa->aInd = aind;
+ _mesa_meta_init(&osmesa->mesa);
+
/* Initialize the software rasterizer and helper modules. */
{
GLcontext *ctx = &osmesa->mesa;
@@ -1304,6 +1307,8 @@ OSMesaDestroyContext( OSMesaContext osmesa )
if (osmesa->rb)
_mesa_reference_renderbuffer(&osmesa->rb, NULL);
+ _mesa_meta_free( &osmesa->mesa );
+
_swsetup_DestroyContext( &osmesa->mesa );
_tnl_DestroyContext( &osmesa->mesa );
_vbo_DestroyContext( &osmesa->mesa );
diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def
index bd3e5b2137..5abcd1d927 100644
--- a/src/mesa/drivers/windows/gdi/mesa.def
+++ b/src/mesa/drivers/windows/gdi/mesa.def
@@ -943,6 +943,15 @@ EXPORTS
_mesa_update_framebuffer_visual
_mesa_use_program
_mesa_Viewport
+ _mesa_meta_CopyColorSubTable
+ _mesa_meta_CopyColorTable
+ _mesa_meta_CopyConvolutionFilter1D
+ _mesa_meta_CopyConvolutionFilter2D
+ _mesa_meta_CopyTexImage1D
+ _mesa_meta_CopyTexImage2D
+ _mesa_meta_CopyTexSubImage1D
+ _mesa_meta_CopyTexSubImage2D
+ _mesa_meta_CopyTexSubImage3D
_mesa_wait_query
_swrast_Accum
_swrast_Bitmap
@@ -953,15 +962,6 @@ EXPORTS
_swrast_Clear
_swrast_choose_line
_swrast_choose_triangle
- _swrast_CopyColorSubTable
- _swrast_CopyColorTable
- _swrast_CopyConvolutionFilter1D
- _swrast_CopyConvolutionFilter2D
- _swrast_copy_teximage1d
- _swrast_copy_teximage2d
- _swrast_copy_texsubimage1d
- _swrast_copy_texsubimage2d
- _swrast_copy_texsubimage3d
_swrast_CreateContext
_swrast_DestroyContext
_swrast_exec_fragment_program
diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
index e1971db693..8929b22af1 100644
--- a/src/mesa/drivers/windows/gdi/wmesa.c
+++ b/src/mesa/drivers/windows/gdi/wmesa.c
@@ -12,6 +12,7 @@
#include "framebuffer.h"
#include "renderbuffer.h"
#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
#include "vbo/vbo.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
@@ -1515,6 +1516,8 @@ WMesaContext WMesaCreateContext(HDC hDC,
_mesa_enable_2_0_extensions(ctx);
_mesa_enable_2_1_extensions(ctx);
+ _mesa_meta_init(ctx);
+
/* Initialize the software rasterizer and helper modules. */
if (!_swrast_CreateContext(ctx) ||
!_vbo_CreateContext(ctx) ||
@@ -1558,6 +1561,8 @@ void WMesaDestroyContext( WMesaContext pwc )
DeleteObject(pwc->clearPen);
DeleteObject(pwc->clearBrush);
+ _mesa_meta_free(ctx);
+
_swsetup_DestroyContext(ctx);
_tnl_DestroyContext(ctx);
_vbo_DestroyContext(ctx);
diff --git a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
index 342a742867..7ac425a109 100644
--- a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
+++ b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
@@ -1346,6 +1346,8 @@ static void wmesa_update_state_first_time(
struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
+ _mesa_init_driver_functions(&ctx->Driver);
+
/*
* XXX these function pointers could be initialized just once during
* context creation since they don't depend on any state changes.
@@ -1362,8 +1364,6 @@ static void wmesa_update_state_first_time(
ctx->Driver.Viewport = wmesa_viewport;
- ctx->Driver.Accum = _swrast_Accum;
- ctx->Driver.Bitmap = _swrast_Bitmap;
ctx->Driver.Clear = clear;
ctx->Driver.Flush = flush;
@@ -1371,28 +1371,6 @@ static void wmesa_update_state_first_time(
ctx->Driver.ClearColor = clear_color;
ctx->Driver.Enable = enable;
- ctx->Driver.CopyPixels = _swrast_CopyPixels;
- ctx->Driver.DrawPixels = _swrast_DrawPixels;
- ctx->Driver.ReadPixels = _swrast_ReadPixels;
-
- ctx->Driver.ChooseTextureFormat = _mesa_choose_tex_format;
- ctx->Driver.TexImage1D = _mesa_store_teximage1d;
- ctx->Driver.TexImage2D = _mesa_store_teximage2d;
- ctx->Driver.TexImage3D = _mesa_store_teximage3d;
- ctx->Driver.TexSubImage1D = _mesa_store_texsubimage1d;
- ctx->Driver.TexSubImage2D = _mesa_store_texsubimage2d;
- ctx->Driver.TexSubImage3D = _mesa_store_texsubimage3d;
- ctx->Driver.TestProxyTexImage = _mesa_test_proxy_teximage;
-
- ctx->Driver.CopyTexImage1D = _swrast_copy_teximage1d;
- ctx->Driver.CopyTexImage2D = _swrast_copy_teximage2d;
- ctx->Driver.CopyTexSubImage1D = _swrast_copy_texsubimage1d;
- ctx->Driver.CopyTexSubImage2D = _swrast_copy_texsubimage2d;
- ctx->Driver.CopyTexSubImage3D = _swrast_copy_texsubimage3d;
- ctx->Driver.CopyColorTable = _swrast_CopyColorTable;
- ctx->Driver.CopyColorSubTable = _swrast_CopyColorSubTable;
- ctx->Driver.CopyConvolutionFilter1D = _swrast_CopyConvolutionFilter1D;
- ctx->Driver.CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D;
// Does not apply for Mesa 5.x
//ctx->Driver.BaseCompressedTexFormat = _mesa_base_compressed_texformat;
diff --git a/src/mesa/drivers/windows/icd/mesa.def b/src/mesa/drivers/windows/icd/mesa.def
index 465b380a0c..25ac08a2f0 100644
--- a/src/mesa/drivers/windows/icd/mesa.def
+++ b/src/mesa/drivers/windows/icd/mesa.def
@@ -75,6 +75,15 @@ EXPORTS
_mesa_strcmp
_mesa_test_proxy_teximage
_mesa_Viewport
+ _mesa_meta_CopyColorSubTable
+ _mesa_meta_CopyColorTable
+ _mesa_meta_CopyConvolutionFilter1D
+ _mesa_meta_CopyConvolutionFilter2D
+ _mesa_meta_CopyTexImage1D
+ _mesa_meta_CopyTexImage2D
+ _mesa_meta_CopyTexSubImage1D
+ _mesa_meta_CopyTexSubImage2D
+ _mesa_meta_CopyTexSubImage3D
_swrast_Accum
_swrast_Bitmap
_swrast_CopyPixels
@@ -84,15 +93,6 @@ EXPORTS
_swrast_Clear
_swrast_choose_line
_swrast_choose_triangle
- _swrast_CopyColorSubTable
- _swrast_CopyColorTable
- _swrast_CopyConvolutionFilter1D
- _swrast_CopyConvolutionFilter2D
- _swrast_copy_teximage1d
- _swrast_copy_teximage2d
- _swrast_copy_texsubimage1d
- _swrast_copy_texsubimage2d
- _swrast_copy_texsubimage3d
_swrast_CreateContext
_swrast_DestroyContext
_swrast_InvalidateState
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index 2c7be9f182..79b058634c 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -1317,6 +1317,7 @@ xmesa_convert_from_x_visual_type( int visualType )
#define need_GL_SGI_color_table
/* sw extensions not associated with some GL version */
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_shader_objects
#define need_GL_ARB_sync
#define need_GL_ARB_vertex_program
@@ -1347,6 +1348,8 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_histogram", GL_EXT_histogram_functions },
{ "GL_SGI_color_table", GL_SGI_color_table_functions },
+ { "GL_ARB_depth_clamp", NULL },
+ { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{ "GL_ARB_shader_objects", GL_ARB_shader_objects_functions },
{ "GL_ARB_sync", GL_ARB_sync_functions },
{ "GL_ARB_vertex_program", GL_ARB_vertex_program_functions },
@@ -1358,6 +1361,7 @@ const struct dri_extension card_extensions[] =
{ "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions },
{ "GL_EXT_paletted_texture", GL_EXT_paletted_texture_functions },
{ "GL_MESA_resize_buffers", GL_MESA_resize_buffers_functions },
+ { "GL_NV_depth_clamp", NULL },
{ "GL_NV_vertex_program", GL_NV_vertex_program_functions },
{ "GL_NV_fragment_program", GL_NV_fragment_program_functions },
{ NULL, NULL }
@@ -1644,8 +1648,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
xmesa_register_swrast_functions( mesaCtx );
_swsetup_Wakeup(mesaCtx);
- if (TEST_META_FUNCS)
- _mesa_meta_init(mesaCtx);
+ _mesa_meta_init(mesaCtx);
return c;
}
@@ -1661,8 +1664,7 @@ void XMesaDestroyContext( XMesaContext c )
FXdestroyContext( XMESA_BUFFER(mesaCtx->DrawBuffer) );
#endif
- if (TEST_META_FUNCS)
- _mesa_meta_free( mesaCtx );
+ _mesa_meta_free( mesaCtx );
_swsetup_DestroyContext( mesaCtx );
_swrast_DestroyContext( mesaCtx );
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
index 465a40ac8d..e2d4aa9b2d 100644
--- a/src/mesa/drivers/x11/xm_dd.c
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -1150,10 +1150,11 @@ xmesa_init_driver_functions( XMesaVisual xmvisual,
driver->Enable = enable;
driver->Viewport = xmesa_viewport;
if (TEST_META_FUNCS) {
- driver->Clear = _mesa_meta_clear;
- driver->CopyPixels = _mesa_meta_copy_pixels;
- driver->BlitFramebuffer = _mesa_meta_blit_framebuffer;
- driver->DrawPixels = _mesa_meta_draw_pixels;
+ driver->Clear = _mesa_meta_Clear;
+ driver->CopyPixels = _mesa_meta_CopyPixels;
+ driver->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+ driver->DrawPixels = _mesa_meta_DrawPixels;
+ driver->Bitmap = _mesa_meta_Bitmap;
}
else {
driver->Clear = clear_buffers;
@@ -1167,8 +1168,8 @@ xmesa_init_driver_functions( XMesaVisual xmvisual,
else if (xmvisual->undithered_pf == PF_5R6G5B) {
driver->DrawPixels = xmesa_DrawPixels_5R6G5B;
}
- }
#endif
+ }
driver->TestProxyTexImage = test_proxy_teximage;
#if ENABLE_EXT_texure_compression_s3tc
driver->ChooseTextureFormat = choose_tex_format;