diff options
Diffstat (limited to 'src/mesa/drivers/dri/r300')
24 files changed, 1486 insertions, 717 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 080c79898b..53fb7caa95 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -8,11 +8,15 @@ LIBNAME = r300compiler C_SOURCES = \ radeon_code.c \ radeon_compiler.c \ - radeon_nqssadce.c \ radeon_program.c \ - radeon_opcodes.c \ + radeon_program_print.c \ + radeon_opcodes.c \ radeon_program_alu.c \ radeon_program_pair.c \ + radeon_dataflow.c \ + radeon_dataflow_annotate.c \ + radeon_dataflow_dealias.c \ + radeon_dataflow_swizzles.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index ded6966d08..cfa48a59e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -36,7 +36,6 @@ #include <stdio.h> #include "../r300_reg.h" -#include "radeon_nqssadce.h" #include "radeon_compiler.h" #define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO)) @@ -92,7 +91,7 @@ static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle) * Check whether the given instruction supports the swizzle and negate * combinations in the given source register. */ -int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) +static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { if (reg.Abs) reg.Negate = RC_MASK_NONE; @@ -134,15 +133,16 @@ int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) } -/** - * Generate MOV dst, src using only native swizzles. - */ -void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src) +static void r300_swizzle_split( + struct rc_src_register src, unsigned int mask, + struct rc_swizzle_split * split) { if (src.Abs) src.Negate = RC_MASK_NONE; - while(dst.WriteMask) { + split->NumPhases = 0; + + while(mask) { const struct swizzle_data *best_swizzle = 0; unsigned int best_matchcount = 0; unsigned int best_matchmask = 0; @@ -153,7 +153,7 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st unsigned int matchcount = 0; unsigned int matchmask = 0; for(comp = 0; comp < 3; ++comp) { - if (!GET_BIT(dst.WriteMask, comp)) + if (!GET_BIT(mask, comp)) continue; unsigned int swz = GET_SWZ(src.Swizzle, comp); if (swz == RC_SWIZZLE_UNUSED) @@ -172,23 +172,24 @@ void r300FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, st best_swizzle = sd; best_matchcount = matchcount; best_matchmask = matchmask; - if (matchmask == (dst.WriteMask & RC_MASK_XYZ)) + if (matchmask == (mask & RC_MASK_XYZ)) break; } } - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask &= (best_matchmask | RC_MASK_W); - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (best_matchmask & src.Negate) ? RC_MASK_XYZW : RC_MASK_NONE; - /* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */ + if (mask & RC_MASK_W) + best_matchmask |= RC_MASK_W; - dst.WriteMask &= ~inst->I.DstReg.WriteMask; + split->Phase[split->NumPhases++] = best_matchmask; + mask &= ~best_matchmask; } } +struct rc_swizzle_caps r300_swizzle_caps = { + .IsNative = r300_swizzle_is_native, + .Split = r300_swizzle_split +}; + /** * Translate an RGB (XYZ) swizzle into the hardware code for the given diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h index 728c2cd972..118476af13 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h @@ -28,12 +28,9 @@ #ifndef __R300_FRAGPROG_SWIZZLE_H_ #define __R300_FRAGPROG_SWIZZLE_H_ -#include "radeon_program.h" +#include "radeon_swizzle.h" -struct nqssadce_state; - -int r300FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg); -void r300FPBuildSwizzle(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src); +extern struct rc_swizzle_caps r300_swizzle_caps; unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle); unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 0aa40c0587..bf9bea685a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -24,18 +24,18 @@ #include <stdio.h> -#include "radeon_nqssadce.h" #include "radeon_program_alu.h" #include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" #include "r500_fragprog.h" -static void nqssadce_init(struct nqssadce_state* s) +static void dataflow_outputs_mark_use(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_fragment_program_compiler * c = s->UserData; - s->Outputs[c->OutputColor].Sourced = RC_MASK_XYZW; - s->Outputs[c->OutputDepth].Sourced = RC_MASK_W; + struct r300_fragment_program_compiler * c = userdata; + callback(data, c->OutputColor, RC_MASK_XYZW); + callback(data, c->OutputDepth, RC_MASK_W); } static void rewrite_depth_out(struct r300_fragment_program_compiler * c) @@ -92,6 +92,8 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { &radeonTransformTrigScale, 0 } }; radeonLocalTransform(&c->Base, 4, transformations); + + c->Base.SwizzleCaps = &r500_swizzle_caps; } else { struct radeon_program_transformation transformations[] = { { &r300_transform_TEX, c }, @@ -99,33 +101,23 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { &radeonTransformTrigSimple, 0 } }; radeonLocalTransform(&c->Base, 3, transformations); + + c->Base.SwizzleCaps = &r300_swizzle_caps; } if (c->Base.Debug) { fprintf(stderr, "Fragment Program: After native rewrite:\n"); - rc_print_program(&c->Base.Program); + rc_print_program(&c->Base.Program, 0); fflush(stderr); } - if (c->is_r500) { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r500FPIsNativeSwizzle, - .BuildSwizzle = &r500FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); - } else { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle - }; - radeonNqssaDce(&c->Base, &nqssadce, c); - } + rc_dataflow_annotate(&c->Base, &dataflow_outputs_mark_use, c); + rc_dataflow_dealias(&c->Base); + rc_dataflow_swizzles(&c->Base); if (c->Base.Debug) { - fprintf(stderr, "Compiler: after NqSSA-DCE:\n"); - rc_print_program(&c->Base.Program); + fprintf(stderr, "Compiler: after dataflow passes:\n"); + rc_print_program(&c->Base.Program, 0); fflush(stderr); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 0efd2c91e6..c64648ff3b 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -26,9 +26,9 @@ #include "../r300_reg.h" -#include "radeon_nqssadce.h" -#include "radeon_program.h" +#include "radeon_dataflow.h" #include "radeon_program_alu.h" +#include "radeon_swizzle.h" /* @@ -545,18 +545,19 @@ static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler) } } -static void nqssadceInit(struct nqssadce_state* s) +static void dataflow_outputs_mark_used(void * userdata, void * data, + void (*callback)(void *, unsigned int, unsigned int)) { - struct r300_vertex_program_compiler * compiler = s->UserData; + struct r300_vertex_program_compiler * c = userdata; int i; for(i = 0; i < 32; ++i) { - if (compiler->RequiredOutputs & (1 << i)) - s->Outputs[i].Sourced = RC_MASK_XYZW; + if (c->RequiredOutputs & (1 << i)) + callback(data, i, RC_MASK_XYZW); } } -static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg) +static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { (void) opcode; (void) reg; @@ -565,9 +566,16 @@ static int swizzleIsNative(rc_opcode opcode, struct rc_src_register reg) } +static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { + .IsNative = &swizzle_is_native, + .Split = 0 /* should never be called */ +}; + void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; + addArtificialOutputs(compiler); { @@ -579,7 +587,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after native rewrite:\n"); - rc_print_program(&compiler->Base.Program); + rc_print_program(&compiler->Base.Program, 0); fflush(stderr); } @@ -596,26 +604,22 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Vertex program after source conflict resolve:\n"); - rc_print_program(&compiler->Base.Program); + rc_print_program(&compiler->Base.Program, 0); fflush(stderr); } - { - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadceInit, - .IsNativeSwizzle = &swizzleIsNative, - .BuildSwizzle = NULL - }; - radeonNqssaDce(&compiler->Base, &nqssadce, compiler); + rc_dataflow_annotate(&compiler->Base, &dataflow_outputs_mark_used, compiler); + rc_dataflow_dealias(&compiler->Base); + rc_dataflow_swizzles(&compiler->Base); - /* We need this step for reusing temporary registers */ - allocate_temporary_registers(compiler); + /* This invalidates dataflow annotations and should be replaced + * by a future generic register allocation pass. */ + allocate_temporary_registers(compiler); - if (compiler->Base.Debug) { - fprintf(stderr, "Vertex program after NQSSADCE:\n"); - rc_print_program(&compiler->Base.Program); - fflush(stderr); - } + if (compiler->Base.Debug) { + fprintf(stderr, "Vertex program after dataflow:\n"); + rc_print_program(&compiler->Base.Program, 0); + fflush(stderr); } translate_vertex_program(compiler); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 3e994ebd1b..971465e359 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -169,7 +169,7 @@ int r500_transform_TEX( return 1; } -int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) +static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; int i; @@ -227,36 +227,38 @@ int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg) } /** - * Implement a MOV with a potentially non-native swizzle. + * Split source register access. * * The only thing we *cannot* do in an ALU instruction is per-component - * negation. Therefore, we split the MOV into two instructions when necessary. + * negation. */ -void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src) +static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask, + struct rc_swizzle_split * split) { unsigned int negatebase[2] = { 0, 0 }; int i; for(i = 0; i < 4; ++i) { unsigned int swz = GET_SWZ(src.Swizzle, i); - if (swz == RC_SWIZZLE_UNUSED) + if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i)) continue; negatebase[GET_BIT(src.Negate, i)] |= 1 << i; } + split->NumPhases = 0; + for(i = 0; i <= 1; ++i) { if (!negatebase[i]) continue; - struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev); - inst->I.Opcode = RC_OPCODE_MOV; - inst->I.DstReg = dst; - inst->I.DstReg.WriteMask = negatebase[i]; - inst->I.SrcReg[0] = src; - inst->I.SrcReg[0].Negate = (i == 0) ? RC_MASK_NONE : RC_MASK_XYZW; + split->Phase[split->NumPhases++] = negatebase[i]; } } +struct rc_swizzle_caps r500_swizzle_caps = { + .IsNative = r500_swizzle_is_native, + .Split = r500_swizzle_split +}; static char *toswiz(int swiz_val) { switch(swiz_val) { diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 887d4abbd2..92ac75d5fd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -34,15 +34,13 @@ #define __R500_FRAGPROG_H_ #include "radeon_compiler.h" -#include "radeon_nqssadce.h" +#include "radeon_swizzle.h" extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c); -extern int r500FPIsNativeSwizzle(rc_opcode opcode, struct rc_src_register reg); - -extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct rc_dst_register dst, struct rc_src_register src); +extern struct rc_swizzle_caps r500_swizzle_caps; extern int r500_transform_TEX( struct radeon_compiler * c, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index babdcffd3a..d0b78ec1c8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -93,6 +93,41 @@ void rc_error(struct radeon_compiler * c, const char * fmt, ...) } } +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) +{ + rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); + return 1; +} + +/** + * Recompute c->Program.InputsRead and c->Program.OutputsWritten + * based on which inputs and outputs are actually referenced + * in program instructions. + */ +void rc_calculate_inputs_outputs(struct radeon_compiler * c) +{ + struct rc_instruction *inst; + + c->Program.InputsRead = 0; + c->Program.OutputsWritten = 0; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) + { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + int i; + + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->I.SrcReg[i].File == RC_FILE_INPUT) + c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; + } + + if (opcode->HasDstReg) { + if (inst->I.DstReg.File == RC_FILE_OUTPUT) + c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; + } + } +} + /** * Rewrite the program such that everything that source the given input * register will source new_input instead. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index 018f9bba06..87a732cd90 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -27,6 +27,7 @@ #include "radeon_code.h" #include "radeon_program.h" +struct rc_swizzle_caps; struct radeon_compiler { struct memory_pool Pool; @@ -34,6 +35,14 @@ struct radeon_compiler { unsigned Debug:1; unsigned Error:1; char * ErrorMsg; + + /** + * Variables used internally, not be touched by callers + * of the compiler + */ + /*@{*/ + struct rc_swizzle_caps * SwizzleCaps; + /*@}*/ }; void rc_init(struct radeon_compiler * c); @@ -42,6 +51,23 @@ void rc_destroy(struct radeon_compiler * c); void rc_debug(struct radeon_compiler * c, const char * fmt, ...); void rc_error(struct radeon_compiler * c, const char * fmt, ...); +int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion); + +/** + * This macro acts like an if-statement that can be used to implement + * non-aborting assertions in the compiler. + * + * It checks whether \p cond is true. If not, an internal compiler error is + * flagged and the if-clause is run. + * + * A typical use-case would be: + * + * if (rc_assert(c, condition-that-must-be-true)) + * return; + */ +#define rc_assert(c, cond) \ + (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond)) + void rc_calculate_inputs_outputs(struct radeon_compiler * c); void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c new file mode 100644 index 0000000000..af6777a7bd --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +static void add_ref_to_vector(struct rc_dataflow_ref * ref, struct rc_dataflow_vector * vector) +{ + ref->Vector = vector; + ref->Prev = &vector->Refs; + ref->Next = vector->Refs.Next; + ref->Prev->Next = ref; + ref->Next->Prev = ref; +} + +struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c, + struct rc_dataflow_vector * vector, struct rc_instruction * inst) +{ + struct rc_dataflow_ref * ref = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_ref)); + ref->ReadInstruction = inst; + ref->UseMask = 0; + + add_ref_to_vector(ref, vector); + + return ref; +} + +struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c, + rc_register_file file, unsigned int index, struct rc_instruction * inst) +{ + struct rc_dataflow_vector * vec = memory_pool_malloc(&c->Pool, sizeof(struct rc_dataflow_vector)); + + memset(vec, 0, sizeof(struct rc_dataflow_vector)); + vec->File = file; + vec->Index = index; + vec->WriteInstruction = inst; + + vec->Refs.Next = vec->Refs.Prev = &vec->Refs; + + return vec; +} + +void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref) +{ + ref->Prev->Next = ref->Next; + ref->Next->Prev = ref->Prev; +} + +void rc_dataflow_remove_instruction(struct rc_instruction * inst) +{ + for(unsigned int i = 0; i < 3; ++i) { + if (inst->Dataflow.SrcReg[i]) { + rc_dataflow_remove_ref(inst->Dataflow.SrcReg[i]); + inst->Dataflow.SrcReg[i] = 0; + } + if (inst->Dataflow.SrcRegAddress[i]) { + rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[i]); + inst->Dataflow.SrcRegAddress[i] = 0; + } + } + + if (inst->Dataflow.DstReg) { + while(inst->Dataflow.DstReg->Refs.Next != &inst->Dataflow.DstReg->Refs) { + struct rc_dataflow_ref * ref = inst->Dataflow.DstReg->Refs.Next; + rc_dataflow_remove_ref(ref); + if (inst->Dataflow.DstRegPrev) + add_ref_to_vector(ref, inst->Dataflow.DstRegPrev->Vector); + } + + inst->Dataflow.DstReg->WriteInstruction = 0; + inst->Dataflow.DstReg = 0; + } + + if (inst->Dataflow.DstRegPrev) { + rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev); + inst->Dataflow.DstRegPrev = 0; + } + + inst->Dataflow.DstRegAliased = 0; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h new file mode 100644 index 0000000000..c9856affe8 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_DATAFLOW_H +#define RADEON_DATAFLOW_H + +#include "radeon_program_constants.h" + +struct radeon_compiler; +struct rc_instruction; +struct rc_swizzle_caps; + +struct rc_dataflow_vector; + +struct rc_dataflow_ref { + struct rc_dataflow_vector * Vector; + + /** + * Linked list of references to the above-mentioned vector. + * The linked list is \em not sorted. + */ + /*@{*/ + struct rc_dataflow_ref * Prev; + struct rc_dataflow_ref * Next; + /*@}*/ + + unsigned int UseMask:4; + struct rc_instruction * ReadInstruction; +}; + +struct rc_dataflow_vector { + rc_register_file File:3; + unsigned int Index:RC_REGISTER_INDEX_BITS; + + /** For private use in compiler passes. MUST BE RESET TO 0 by the end of each pass. + * The annotate pass uses this bit to track whether a vector is in the + * update stack. + */ + unsigned int PassBit:1; + /** Which of the components have been written with useful values */ + unsigned int ValidMask:4; + /** Which of the components are used downstream */ + unsigned int UseMask:4; + /** The instruction that produced this vector */ + struct rc_instruction * WriteInstruction; + + /** Linked list of references to this vector */ + struct rc_dataflow_ref Refs; +}; + +struct rc_instruction_dataflow { + struct rc_dataflow_ref * SrcReg[3]; + struct rc_dataflow_ref * SrcRegAddress[3]; + + /** Reference the components of the destination register + * that are carried over without being overwritten */ + struct rc_dataflow_ref * DstRegPrev; + /** Indicates whether the destination register was in use + * before this instruction */ + unsigned int DstRegAliased:1; + struct rc_dataflow_vector * DstReg; +}; + +/** + * General functions for manipulating the dataflow structures. + */ +/*@{*/ +struct rc_dataflow_ref * rc_dataflow_create_ref(struct radeon_compiler * c, + struct rc_dataflow_vector * vector, struct rc_instruction * inst); +struct rc_dataflow_vector * rc_dataflow_create_vector(struct radeon_compiler * c, + rc_register_file file, unsigned int index, struct rc_instruction * inst); +void rc_dataflow_remove_ref(struct rc_dataflow_ref * ref); + +void rc_dataflow_remove_instruction(struct rc_instruction * inst); +/*@}*/ + + +/** + * Compiler passes based on dataflow structures. + */ +/*@{*/ +typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data, + void (*mark_fn)(void * data, unsigned int index, unsigned int mask)); +void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata); +void rc_dataflow_dealias(struct radeon_compiler * c); +void rc_dataflow_swizzles(struct radeon_compiler * c); +/*@}*/ + +#endif /* RADEON_DATAFLOW_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c new file mode 100644 index 0000000000..41d175a22f --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_annotate.c @@ -0,0 +1,365 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +struct dataflow_state { + struct radeon_compiler * C; + unsigned int DCE:1; + unsigned int UpdateRunning:1; + + struct rc_dataflow_vector * Input[RC_REGISTER_MAX_INDEX]; + struct rc_dataflow_vector * Output[RC_REGISTER_MAX_INDEX]; + struct rc_dataflow_vector * Temporary[RC_REGISTER_MAX_INDEX]; + struct rc_dataflow_vector * Address; + + struct rc_dataflow_vector ** UpdateStack; + unsigned int UpdateStackSize; + unsigned int UpdateStackReserved; +}; + +static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask); + +static struct rc_dataflow_vector * get_register_contents(struct dataflow_state * s, + rc_register_file file, unsigned int index) +{ + if (file == RC_FILE_INPUT || file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { + if (index >= RC_REGISTER_MAX_INDEX) + return 0; /* cannot happen, but be defensive */ + + if (file == RC_FILE_TEMPORARY) + return s->Temporary[index]; + if (file == RC_FILE_INPUT) + return s->Input[index]; + if (file == RC_FILE_OUTPUT) + return s->Output[index]; + } + + if (file == RC_FILE_ADDRESS) + return s->Address; + + return 0; /* can happen, constant register file */ +} + +static void mark_ref_use(struct dataflow_state * s, struct rc_dataflow_ref * ref, unsigned int mask) +{ + if (!(mask & ~ref->UseMask)) + return; + + ref->UseMask |= mask; + mark_vector_use(s, ref->Vector, ref->UseMask); +} + +static void mark_source_use(struct dataflow_state * s, struct rc_instruction * inst, + unsigned int src, unsigned int srcmask) +{ + unsigned int refmask = 0; + + for(unsigned int i = 0; i < 4; ++i) { + if (GET_BIT(srcmask, i)) + refmask |= 1 << GET_SWZ(inst->I.SrcReg[src].Swizzle, i); + } + + /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ + refmask &= RC_MASK_XYZW; + + if (!refmask) + return; /* can happen if the swizzle contains constant components */ + + if (inst->Dataflow.SrcReg[src]) + mark_ref_use(s, inst->Dataflow.SrcReg[src], refmask); + + if (inst->Dataflow.SrcRegAddress[src]) + mark_ref_use(s, inst->Dataflow.SrcRegAddress[src], RC_MASK_X); +} + +static void compute_sources_for_writemask( + struct rc_instruction * inst, + unsigned int writemask, + unsigned int *srcmasks) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + + srcmasks[0] = 0; + srcmasks[1] = 0; + srcmasks[2] = 0; + + if (inst->I.Opcode == RC_OPCODE_KIL) + srcmasks[0] |= RC_MASK_XYZW; + + if (!writemask) + return; + + if (opcode->IsComponentwise) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= writemask; + } else if (opcode->IsStandardScalar) { + for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) + srcmasks[src] |= RC_MASK_X; + } else { + switch(inst->I.Opcode) { + case RC_OPCODE_ARL: + srcmasks[0] |= RC_MASK_X; + break; + case RC_OPCODE_DP3: + srcmasks[0] |= RC_MASK_XYZ; + srcmasks[1] |= RC_MASK_XYZ; + break; + case RC_OPCODE_DP4: + srcmasks[0] |= RC_MASK_XYZW; + srcmasks[1] |= RC_MASK_XYZW; + break; + case RC_OPCODE_TEX: + case RC_OPCODE_TXB: + case RC_OPCODE_TXP: + srcmasks[0] |= RC_MASK_XYZW; + break; + case RC_OPCODE_DST: + srcmasks[0] |= 0x6; + srcmasks[1] |= 0xa; + break; + case RC_OPCODE_EXP: + case RC_OPCODE_LOG: + srcmasks[0] |= RC_MASK_XY; + break; + case RC_OPCODE_LIT: + srcmasks[0] |= 0xb; + break; + default: + break; + } + } +} + +static void mark_instruction_source_use(struct dataflow_state * s, + struct rc_instruction * inst, unsigned int writemask) +{ + unsigned int srcmasks[3]; + + compute_sources_for_writemask(inst, writemask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) + mark_source_use(s, inst, src, srcmasks[src]); +} + +static void run_update(struct dataflow_state * s) +{ + s->UpdateRunning = 1; + + while(s->UpdateStackSize) { + struct rc_dataflow_vector * vector = s->UpdateStack[--s->UpdateStackSize]; + vector->PassBit = 0; + + if (vector->WriteInstruction) { + struct rc_instruction * inst = vector->WriteInstruction; + + if (inst->Dataflow.DstRegPrev) { + unsigned int carryover = vector->UseMask & ~inst->I.DstReg.WriteMask; + + if (carryover) + mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover); + } + + mark_instruction_source_use( + s, vector->WriteInstruction, + vector->UseMask & inst->I.DstReg.WriteMask); + } + } + + s->UpdateRunning = 0; +} + +static void mark_vector_use(struct dataflow_state * s, struct rc_dataflow_vector * vector, unsigned int mask) +{ + if (!(mask & ~vector->UseMask)) + return; /* no new used bits */ + + vector->UseMask |= mask; + if (vector->PassBit) + return; + + if (s->UpdateStackSize >= s->UpdateStackReserved) { + unsigned int new_reserve = 2 * s->UpdateStackReserved; + struct rc_dataflow_vector ** new_stack; + + if (!new_reserve) + new_reserve = 16; + + new_stack = memory_pool_malloc(&s->C->Pool, new_reserve * sizeof(struct rc_dataflow_vector *)); + memcpy(new_stack, s->UpdateStack, s->UpdateStackSize * sizeof(struct rc_dataflow_vector *)); + + s->UpdateStack = new_stack; + s->UpdateStackReserved = new_reserve; + } + + s->UpdateStack[s->UpdateStackSize++] = vector; + vector->PassBit = 1; + + if (!s->UpdateRunning) + run_update(s); +} + +static void annotate_instruction(struct dataflow_state * s, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + struct rc_dataflow_vector * vector = get_register_contents(s, inst->I.SrcReg[src].File, inst->I.SrcReg[src].Index); + if (vector) { + inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(s->C, vector, inst); + } + if (inst->I.SrcReg[src].RelAddr) { + struct rc_dataflow_vector * addr = get_register_contents(s, RC_FILE_ADDRESS, 0); + if (addr) + inst->Dataflow.SrcRegAddress[src] = rc_dataflow_create_ref(s->C, addr, inst); + } + } + + mark_instruction_source_use(s, inst, 0); /* for KIL */ + + if (opcode->HasDstReg) { + struct rc_dataflow_vector * oldvec = get_register_contents(s, inst->I.DstReg.File, inst->I.DstReg.Index); + struct rc_dataflow_vector * newvec = rc_dataflow_create_vector(s->C, inst->I.DstReg.File, inst->I.DstReg.Index, inst); + + newvec->ValidMask = inst->I.DstReg.WriteMask; + + if (oldvec) { + unsigned int carryover = oldvec->ValidMask & ~inst->I.DstReg.WriteMask; + + if (oldvec->ValidMask) + inst->Dataflow.DstRegAliased = 1; + + if (carryover) { + inst->Dataflow.DstRegPrev = rc_dataflow_create_ref(s->C, oldvec, inst); + newvec->ValidMask |= carryover; + + if (!s->DCE) + mark_ref_use(s, inst->Dataflow.DstRegPrev, carryover); + } + } + + inst->Dataflow.DstReg = newvec; + + if (newvec->File == RC_FILE_TEMPORARY) + s->Temporary[newvec->Index] = newvec; + else if (newvec->File == RC_FILE_OUTPUT) + s->Output[newvec->Index] = newvec; + else + s->Address = newvec; + + if (!s->DCE) + mark_vector_use(s, newvec, inst->I.DstReg.WriteMask); + } +} + +static void init_inputs(struct dataflow_state * s) +{ + unsigned int index; + + for(index = 0; index < 32; ++index) { + if (s->C->Program.InputsRead & (1 << index)) { + s->Input[index] = rc_dataflow_create_vector(s->C, RC_FILE_INPUT, index, 0); + s->Input[index]->ValidMask = RC_MASK_XYZW; + } + } +} + +static void mark_output_use(void * data, unsigned int index, unsigned int mask) +{ + struct dataflow_state * s = data; + struct rc_dataflow_vector * vec = s->Output[index]; + + if (vec) + mark_vector_use(s, vec, mask); +} + +void rc_dataflow_annotate(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata) +{ + struct dataflow_state s; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = c; + s.DCE = dce ? 1 : 0; + + init_inputs(&s); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + annotate_instruction(&s, inst); + } + + if (s.DCE) { + dce(userdata, &s, &mark_output_use); + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + + if (opcode->HasDstReg) { + unsigned int redundant_writes = inst->I.DstReg.WriteMask & ~inst->Dataflow.DstReg->UseMask; + + inst->I.DstReg.WriteMask &= ~redundant_writes; + + if (!inst->I.DstReg.WriteMask) { + struct rc_instruction * todelete = inst; + inst = inst->Prev; + rc_remove_instruction(todelete); + continue; + } + } + + unsigned int srcmasks[3]; + compute_sources_for_writemask(inst, inst->I.DstReg.WriteMask, srcmasks); + + for(unsigned int src = 0; src < 3; ++src) { + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(srcmasks[src], chan)) + SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); + } + + if (inst->Dataflow.SrcReg[src]) { + if (!inst->Dataflow.SrcReg[src]->UseMask) { + rc_dataflow_remove_ref(inst->Dataflow.SrcReg[src]); + inst->Dataflow.SrcReg[src] = 0; + } + } + + if (inst->Dataflow.SrcRegAddress[src]) { + if (!inst->Dataflow.SrcRegAddress[src]->UseMask) { + rc_dataflow_remove_ref(inst->Dataflow.SrcRegAddress[src]); + inst->Dataflow.SrcRegAddress[src] = 0; + } + } + } + } + + rc_calculate_inputs_outputs(c); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c new file mode 100644 index 0000000000..4596636970 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_dealias.c @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" + + +#define DEALIAS_LIST_SIZE 128 + +struct dealias_state { + struct radeon_compiler * C; + + unsigned int OldIndex:RC_REGISTER_INDEX_BITS; + unsigned int NewIndex:RC_REGISTER_INDEX_BITS; + unsigned int DealiasFail:1; + + struct rc_dataflow_vector * List[DEALIAS_LIST_SIZE]; + unsigned int Length; +}; + +static void push_dealias_vector(struct dealias_state * s, struct rc_dataflow_vector * vec) +{ + if (s->Length >= DEALIAS_LIST_SIZE) { + rc_debug(s->C, "%s: list size exceeded\n", __FUNCTION__); + s->DealiasFail = 1; + return; + } + + if (rc_assert(s->C, vec->File == RC_FILE_TEMPORARY && vec->Index == s->OldIndex)) + return; + + s->List[s->Length++] = vec; +} + +static void run_dealias(struct dealias_state * s) +{ + unsigned int i; + + for(i = 0; i < s->Length && !s->DealiasFail; ++i) { + struct rc_dataflow_vector * vec = s->List[i]; + struct rc_dataflow_ref * ref; + + for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) { + if (ref->ReadInstruction->Dataflow.DstRegPrev == ref) + push_dealias_vector(s, ref->ReadInstruction->Dataflow.DstReg); + } + } + + if (s->DealiasFail) + return; + + for(i = 0; i < s->Length; ++i) { + struct rc_dataflow_vector * vec = s->List[i]; + struct rc_dataflow_ref * ref; + + vec->Index = s->NewIndex; + vec->WriteInstruction->I.DstReg.Index = s->NewIndex; + + for(ref = vec->Refs.Next; ref != &vec->Refs; ref = ref->Next) { + struct rc_instruction * inst = ref->ReadInstruction; + unsigned int i; + + for(i = 0; i < 3; ++i) { + if (inst->Dataflow.SrcReg[i] == ref) { + if (rc_assert(s->C, inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && + inst->I.SrcReg[i].Index == s->OldIndex)) + return; + + inst->I.SrcReg[i].Index = s->NewIndex; + } + } + } + } +} + +/** + * Breaks register aliasing to reduce multiple assignments to a single register. + * + * This affects sequences like: + * MUL r0, ...; + * MAD r0, r1, r2, r0; + * In this example, a new register will be used for the destination of the + * second MAD. + * + * The purpose of this dealiasing is to make the resulting code more SSA-like + * and therefore make it easier to move instructions around. + * This is of crucial importance for R300 fragment programs, where de-aliasing + * can help to reduce texture indirections, but other targets can benefit from + * it as well. + * + * \note When compiling GLSL, there may be some benefit gained from breaking + * up vectors whose components are unrelated. This is not done yet and should + * be investigated at some point (of course, a matching pass to re-merge + * components would be required). + */ +void rc_dataflow_dealias(struct radeon_compiler * c) +{ + struct dealias_state s; + + memset(&s, 0, sizeof(s)); + s.C = c; + + struct rc_instruction * inst; + for(inst = c->Program.Instructions.Prev; inst != &c->Program.Instructions; inst = inst->Prev) { + if (!inst->Dataflow.DstRegAliased || inst->Dataflow.DstReg->File != RC_FILE_TEMPORARY) + continue; + + if (inst->Dataflow.DstReg->UseMask & ~inst->I.DstReg.WriteMask) + continue; + + s.OldIndex = inst->I.DstReg.Index; + s.NewIndex = rc_find_free_temporary(c); + s.DealiasFail = 0; + s.Length = 0; + + inst->Dataflow.DstRegAliased = 0; + if (inst->Dataflow.DstRegPrev) { + rc_dataflow_remove_ref(inst->Dataflow.DstRegPrev); + inst->Dataflow.DstRegPrev = 0; + } + + push_dealias_vector(&s, inst->Dataflow.DstReg); + run_dealias(&s); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c new file mode 100644 index 0000000000..1aa91eff7c --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_dataflow.h" + +#include "radeon_compiler.h" +#include "radeon_swizzle.h" + + +static void rewrite_source(struct radeon_compiler * c, + struct rc_instruction * inst, unsigned src) +{ + struct rc_swizzle_split split; + unsigned int tempreg = rc_find_free_temporary(c); + unsigned int usemask; + struct rc_dataflow_ref * oldref = inst->Dataflow.SrcReg[src]; + struct rc_dataflow_vector * vector = 0; + + usemask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (GET_SWZ(inst->I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) + usemask |= 1 << chan; + } + + c->SwizzleCaps->Split(inst->I.SrcReg[src], usemask, &split); + + for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { + struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); + unsigned int phase_refmask; + unsigned int masked_negate; + + mov->I.Opcode = RC_OPCODE_MOV; + mov->I.DstReg.File = RC_FILE_TEMPORARY; + mov->I.DstReg.Index = tempreg; + mov->I.DstReg.WriteMask = split.Phase[phase]; + mov->I.SrcReg[0] = inst->I.SrcReg[src]; + + phase_refmask = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + if (!GET_BIT(split.Phase[phase], chan)) + SET_SWZ(mov->I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); + else + phase_refmask |= 1 << GET_SWZ(mov->I.SrcReg[0].Swizzle, chan); + } + + phase_refmask &= RC_MASK_XYZW; + + masked_negate = split.Phase[phase] & mov->I.SrcReg[0].Negate; + if (masked_negate == 0) + mov->I.SrcReg[0].Negate = 0; + else if (masked_negate == split.Phase[phase]) + mov->I.SrcReg[0].Negate = RC_MASK_XYZW; + + if (oldref) { + mov->Dataflow.SrcReg[0] = rc_dataflow_create_ref(c, oldref->Vector, mov); + mov->Dataflow.SrcReg[0]->UseMask = phase_refmask; + } + + mov->Dataflow.DstReg = rc_dataflow_create_vector(c, RC_FILE_TEMPORARY, tempreg, mov); + mov->Dataflow.DstReg->ValidMask = split.Phase[phase]; + + if (vector) { + mov->Dataflow.DstRegPrev = rc_dataflow_create_ref(c, vector, mov); + mov->Dataflow.DstRegPrev->UseMask = vector->ValidMask; + mov->Dataflow.DstReg->ValidMask |= vector->ValidMask; + mov->Dataflow.DstRegAliased = 1; + } + + mov->Dataflow.DstReg->UseMask = mov->Dataflow.DstReg->ValidMask; + vector = mov->Dataflow.DstReg; + } + + if (oldref) + rc_dataflow_remove_ref(oldref); + inst->Dataflow.SrcReg[src] = rc_dataflow_create_ref(c, vector, inst); + inst->Dataflow.SrcReg[src]->UseMask = usemask; + + inst->I.SrcReg[src].File = RC_FILE_TEMPORARY; + inst->I.SrcReg[src].Index = tempreg; + inst->I.SrcReg[src].Swizzle = 0; + inst->I.SrcReg[src].Negate = RC_MASK_NONE; + inst->I.SrcReg[src].Abs = 0; + for(unsigned int chan = 0; chan < 4; ++chan) { + SET_SWZ(inst->I.SrcReg[src].Swizzle, chan, + GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); + } +} + +void rc_dataflow_swizzles(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + + for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + unsigned int src; + + for(src = 0; src < opcode->NumSrcRegs; ++src) { + if (!c->SwizzleCaps->IsNative(inst->I.Opcode, inst->I.SrcReg[src])) + rewrite_source(c, inst, src); + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c deleted file mode 100644 index 3e02ebee81..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * @file - * - * "Not-quite SSA" and Dead-Code Elimination. - */ - -#include "radeon_nqssadce.h" - -#include "radeon_compiler.h" - - -/** - * Return the @ref register_state for the given register (or 0 for untracked - * registers, i.e. constants). - */ -static struct register_state *get_reg_state(struct nqssadce_state* s, rc_register_file file, unsigned int index) -{ - if (index >= RC_REGISTER_MAX_INDEX) - return 0; - - switch(file) { - case RC_FILE_TEMPORARY: return &s->Temps[index]; - case RC_FILE_OUTPUT: return &s->Outputs[index]; - case RC_FILE_ADDRESS: return &s->Address; - default: return 0; - } -} - - -static void track_used_srcreg(struct nqssadce_state* s, - int src, unsigned int sourced) -{ - struct rc_sub_instruction * inst = &s->IP->I; - int i; - unsigned int deswz_source = 0; - - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) { - unsigned int swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); - deswz_source |= 1 << swz; - } else { - inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); - inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i); - } - } - - if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { - struct rc_dst_register dstreg = inst->DstReg; - dstreg.File = RC_FILE_TEMPORARY; - dstreg.Index = rc_find_free_temporary(s->Compiler); - dstreg.WriteMask = sourced; - - s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); - - inst->SrcReg[src].File = RC_FILE_TEMPORARY; - inst->SrcReg[src].Index = dstreg.Index; - inst->SrcReg[src].Swizzle = 0; - inst->SrcReg[src].Negate = RC_MASK_NONE; - inst->SrcReg[src].Abs = 0; - for(i = 0; i < 4; ++i) { - if (GET_BIT(sourced, i)) - inst->SrcReg[src].Swizzle |= i << (3*i); - else - inst->SrcReg[src].Swizzle |= RC_SWIZZLE_UNUSED << (3*i); - } - deswz_source = sourced; - } - - struct register_state *regstate; - - if (inst->SrcReg[src].RelAddr) { - regstate = get_reg_state(s, RC_FILE_ADDRESS, 0); - if (regstate) - regstate->Sourced |= RC_MASK_X; - } else { - regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); - if (regstate) - regstate->Sourced |= deswz_source & 0xf; - } -} - -static void unalias_srcregs(struct rc_instruction *inst, unsigned int oldindex, unsigned int newindex) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - int i; - for(i = 0; i < opcode->NumSrcRegs; ++i) - if (inst->I.SrcReg[i].File == RC_FILE_TEMPORARY && inst->I.SrcReg[i].Index == oldindex) - inst->I.SrcReg[i].Index = newindex; -} - -static void unalias_temporary(struct nqssadce_state* s, unsigned int oldindex) -{ - unsigned int newindex = rc_find_free_temporary(s->Compiler); - struct rc_instruction * inst; - for(inst = s->Compiler->Program.Instructions.Next; inst != s->IP; inst = inst->Next) { - if (inst->I.DstReg.File == RC_FILE_TEMPORARY && inst->I.DstReg.Index == oldindex) - inst->I.DstReg.Index = newindex; - unalias_srcregs(inst, oldindex, newindex); - } - unalias_srcregs(s->IP, oldindex, newindex); -} - - -/** - * Handle one instruction. - */ -static void process_instruction(struct nqssadce_state* s) -{ - struct rc_sub_instruction *inst = &s->IP->I; - unsigned int WriteMask; - - if (inst->Opcode != RC_OPCODE_KIL) { - struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); - if (!regstate) { - rc_error(s->Compiler, "NqssaDce: bad destination register (%i[%i])\n", - inst->DstReg.File, inst->DstReg.Index); - return; - } - - inst->DstReg.WriteMask &= regstate->Sourced; - regstate->Sourced &= ~inst->DstReg.WriteMask; - - if (inst->DstReg.WriteMask == 0) { - struct rc_instruction * inst_remove = s->IP; - s->IP = s->IP->Prev; - rc_remove_instruction(inst_remove); - return; - } - - if (inst->DstReg.File == RC_FILE_TEMPORARY && !regstate->Sourced) - unalias_temporary(s, inst->DstReg.Index); - } - - WriteMask = inst->DstReg.WriteMask; - - switch (inst->Opcode) { - case RC_OPCODE_ARL: - case RC_OPCODE_DDX: - case RC_OPCODE_DDY: - case RC_OPCODE_FRC: - case RC_OPCODE_MOV: - track_used_srcreg(s, 0, WriteMask); - break; - case RC_OPCODE_ADD: - case RC_OPCODE_MAX: - case RC_OPCODE_MIN: - case RC_OPCODE_MUL: - case RC_OPCODE_SGE: - case RC_OPCODE_SLT: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - break; - case RC_OPCODE_CMP: - case RC_OPCODE_MAD: - track_used_srcreg(s, 0, WriteMask); - track_used_srcreg(s, 1, WriteMask); - track_used_srcreg(s, 2, WriteMask); - break; - case RC_OPCODE_COS: - case RC_OPCODE_EX2: - case RC_OPCODE_LG2: - case RC_OPCODE_RCP: - case RC_OPCODE_RSQ: - case RC_OPCODE_SIN: - track_used_srcreg(s, 0, 0x1); - break; - case RC_OPCODE_DP3: - track_used_srcreg(s, 0, 0x7); - track_used_srcreg(s, 1, 0x7); - break; - case RC_OPCODE_DP4: - track_used_srcreg(s, 0, 0xf); - track_used_srcreg(s, 1, 0xf); - break; - case RC_OPCODE_KIL: - case RC_OPCODE_TEX: - case RC_OPCODE_TXB: - case RC_OPCODE_TXP: - track_used_srcreg(s, 0, 0xf); - break; - case RC_OPCODE_DST: - track_used_srcreg(s, 0, 0x6); - track_used_srcreg(s, 1, 0xa); - break; - case RC_OPCODE_EXP: - case RC_OPCODE_LOG: - case RC_OPCODE_POW: - track_used_srcreg(s, 0, 0x3); - break; - case RC_OPCODE_LIT: - track_used_srcreg(s, 0, 0xb); - break; - default: - rc_error(s->Compiler, "NqssaDce: Unknown opcode %d\n", inst->Opcode); - return; - } - - s->IP = s->IP->Prev; -} - -void rc_calculate_inputs_outputs(struct radeon_compiler * c) -{ - struct rc_instruction *inst; - - c->Program.InputsRead = 0; - c->Program.OutputsWritten = 0; - - for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) - { - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - int i; - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->I.SrcReg[i].File == RC_FILE_INPUT) - c->Program.InputsRead |= 1 << inst->I.SrcReg[i].Index; - } - - if (opcode->HasDstReg) { - if (inst->I.DstReg.File == RC_FILE_OUTPUT) - c->Program.OutputsWritten |= 1 << inst->I.DstReg.Index; - } - } -} - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data) -{ - struct nqssadce_state s; - - memset(&s, 0, sizeof(s)); - s.Compiler = c; - s.Descr = descr; - s.UserData = data; - s.Descr->Init(&s); - s.IP = c->Program.Instructions.Prev; - - while(s.IP != &c->Program.Instructions && !c->Error) - process_instruction(&s); - - rc_calculate_inputs_outputs(c); -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h deleted file mode 100644 index a2aa1eb8ca..0000000000 --- a/src/mesa/drivers/dri/r300/compiler/radeon_nqssadce.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (C) 2008 Nicolai Haehnle. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __RADEON_PROGRAM_NQSSADCE_H_ -#define __RADEON_PROGRAM_NQSSADCE_H_ - -#include "radeon_program.h" - -struct register_state { - /** - * Bitmask indicating which components of the register are sourced - * by later instructions. - */ - unsigned int Sourced : 4; -}; - -/** - * Maintain state such as which registers are used, which registers are - * read from, etc. - */ -struct nqssadce_state { - struct radeon_compiler *Compiler; - struct radeon_nqssadce_descr *Descr; - - /** - * All instructions after this instruction pointer have been dealt with. - */ - struct rc_instruction * IP; - - /** - * Which registers are read by subsequent instructions? - */ - struct register_state Temps[RC_REGISTER_MAX_INDEX]; - struct register_state Outputs[RC_REGISTER_MAX_INDEX]; - struct register_state Address; - - void * UserData; -}; - - -/** - * This structure contains a description of the hardware in-so-far as - * it is required for the NqSSA-DCE pass. - */ -struct radeon_nqssadce_descr { - /** - * Fill in which outputs - */ - void (*Init)(struct nqssadce_state *); - - /** - * Check whether the given swizzle, absolute and negate combination - * can be implemented natively by the hardware for this opcode. - * - * \return 1 if the swizzle is native for the given opcode - */ - int (*IsNativeSwizzle)(rc_opcode opcode, struct rc_src_register reg); - - /** - * Emit (at the current IP) the instruction MOV dst, src; - * The transformation will work recursively on the emitted instruction(s). - */ - void (*BuildSwizzle)(struct nqssadce_state*, struct rc_dst_register dst, struct rc_src_register src); -}; - -void radeonNqssaDce(struct radeon_compiler * c, struct radeon_nqssadce_descr* descr, void * data); - -#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index ffe2de1a87..b7200990c2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -40,13 +40,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_ABS, .Name = "ABS", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_ADD, .Name = "ADD", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_ARL, @@ -58,25 +60,29 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_CMP, .Name = "CMP", .NumSrcRegs = 3, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_COS, .Name = "COS", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_DDX, .Name = "DDX", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_DDY, .Name = "DDY", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_DP3, @@ -106,7 +112,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_EX2, .Name = "EX2", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_EXP, @@ -118,13 +125,15 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_FLR, .Name = "FLR", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_FRC, .Name = "FRC", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_KIL, @@ -135,7 +144,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_LG2, .Name = "LG2", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_LIT, @@ -153,55 +163,64 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_LRP, .Name = "LRP", .NumSrcRegs = 3, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_MAD, .Name = "MAD", .NumSrcRegs = 3, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_MAX, .Name = "MAX", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_MIN, .Name = "MIN", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_MOV, .Name = "MOV", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_MUL, .Name = "MUL", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_POW, .Name = "POW", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_RCP, .Name = "RCP", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_RSQ, .Name = "RSQ", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_SCS, @@ -213,61 +232,71 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .Opcode = RC_OPCODE_SEQ, .Name = "SEQ", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SFL, .Name = "SFL", .NumSrcRegs = 0, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SGE, .Name = "SGE", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SGT, .Name = "SGT", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SIN, .Name = "SIN", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsStandardScalar = 1 }, { .Opcode = RC_OPCODE_SLE, .Name = "SLE", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SLT, .Name = "SLT", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SNE, .Name = "SNE", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SUB, .Name = "SUB", .NumSrcRegs = 2, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_SWZ, .Name = "SWZ", .NumSrcRegs = 1, - .HasDstReg = 1 + .HasDstReg = 1, + .IsComponentwise = 1 }, { .Opcode = RC_OPCODE_XPD, diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 4eb9be3e55..8e30bef1e3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -187,6 +187,14 @@ struct rc_opcode_info { unsigned int NumSrcRegs:2; unsigned int HasDstReg:1; + + /** true if this is a vector instruction that operates on components in parallel + * without any cross-component interaction */ + unsigned int IsComponentwise:1; + + /** true if this instruction sources only its operands X components + * to compute one result which is smeared across all output channels */ + unsigned int IsStandardScalar:1; }; extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c index 0e0c1f68e6..b97c48084b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c @@ -154,155 +154,7 @@ struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, str void rc_remove_instruction(struct rc_instruction * inst) { + rc_dataflow_remove_instruction(inst); inst->Prev->Next = inst->Next; inst->Next->Prev = inst->Prev; } - -static const char * textarget_to_string(rc_texture_target target) -{ - switch(target) { - case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; - case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; - case RC_TEXTURE_CUBE: return "CUBE"; - case RC_TEXTURE_3D: return "3D"; - case RC_TEXTURE_RECT: return "RECT"; - case RC_TEXTURE_2D: return "2D"; - case RC_TEXTURE_1D: return "1D"; - default: return "BAD_TEXTURE_TARGET"; - } -} - -static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) -{ - if (file == RC_FILE_NONE) { - fprintf(f, "none"); - } else { - const char * filename; - switch(file) { - case RC_FILE_TEMPORARY: filename = "temp"; break; - case RC_FILE_INPUT: filename = "input"; break; - case RC_FILE_OUTPUT: filename = "output"; break; - case RC_FILE_ADDRESS: filename = "addr"; break; - case RC_FILE_CONSTANT: filename = "const"; break; - default: filename = "BAD FILE"; break; - } - fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); - } -} - -static void rc_print_mask(FILE * f, unsigned int mask) -{ - if (mask & RC_MASK_X) fprintf(f, "x"); - if (mask & RC_MASK_Y) fprintf(f, "y"); - if (mask & RC_MASK_Z) fprintf(f, "z"); - if (mask & RC_MASK_W) fprintf(f, "w"); -} - -static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) -{ - rc_print_register(f, dst.File, dst.Index, dst.RelAddr); - if (dst.WriteMask != RC_MASK_XYZW) { - fprintf(f, "."); - rc_print_mask(f, dst.WriteMask); - } -} - -static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) -{ - unsigned int comp; - for(comp = 0; comp < 4; ++comp) { - rc_swizzle swz = GET_SWZ(swizzle, comp); - if (GET_BIT(negate, comp)) - fprintf(f, "-"); - switch(swz) { - case RC_SWIZZLE_X: fprintf(f, "x"); break; - case RC_SWIZZLE_Y: fprintf(f, "y"); break; - case RC_SWIZZLE_Z: fprintf(f, "z"); break; - case RC_SWIZZLE_W: fprintf(f, "w"); break; - case RC_SWIZZLE_ZERO: fprintf(f, "0"); break; - case RC_SWIZZLE_ONE: fprintf(f, "1"); break; - case RC_SWIZZLE_HALF: fprintf(f, "H"); break; - case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break; - } - } -} - -static void rc_print_src_register(FILE * f, struct rc_src_register src) -{ - int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); - - if (src.Negate == RC_MASK_XYZW) - fprintf(f, "-"); - if (src.Abs) - fprintf(f, "|"); - - rc_print_register(f, src.File, src.Index, src.RelAddr); - - if (src.Abs && !trivial_negate) - fprintf(f, "|"); - - if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { - fprintf(f, "."); - rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); - } - - if (src.Abs && trivial_negate) - fprintf(f, "|"); -} - -static void rc_print_instruction(FILE * f, struct rc_instruction * inst) -{ - const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); - unsigned int reg; - - fprintf(f, "%s", opcode->Name); - - switch(inst->I.SaturateMode) { - case RC_SATURATE_NONE: break; - case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; - case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; - default: fprintf(f, "_BAD_SAT"); break; - } - - if (opcode->HasDstReg) { - fprintf(f, " "); - rc_print_dst_register(f, inst->I.DstReg); - if (opcode->NumSrcRegs) - fprintf(f, ","); - } - - for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { - if (reg > 0) - fprintf(f, ","); - fprintf(f, " "); - rc_print_src_register(f, inst->I.SrcReg[reg]); - } - - if (opcode->HasTexture) { - fprintf(f, ", %s%s[%u]", - textarget_to_string(inst->I.TexSrcTarget), - inst->I.TexShadow ? "SHADOW" : "", - inst->I.TexSrcUnit); - } - - fprintf(f, ";\n"); -} - -/** - * Print program to stderr, default options. - */ -void rc_print_program(const struct rc_program *prog) -{ - unsigned int linenum = 0; - struct rc_instruction *inst; - - fprintf(stderr, "# Radeon Compiler Program\n"); - - for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { - fprintf(stderr, "%3d: ", linenum); - - rc_print_instruction(stderr, inst); - - linenum++; - } -} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index a2ab757fec..d38c9a420c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -33,102 +33,11 @@ #include "radeon_opcodes.h" #include "radeon_code.h" +#include "radeon_program_constants.h" +#include "radeon_dataflow.h" struct radeon_compiler; -typedef enum { - RC_SATURATE_NONE = 0, - RC_SATURATE_ZERO_ONE, - RC_SATURATE_MINUS_PLUS_ONE -} rc_saturate_mode; - -typedef enum { - RC_TEXTURE_2D_ARRAY, - RC_TEXTURE_1D_ARRAY, - RC_TEXTURE_CUBE, - RC_TEXTURE_3D, - RC_TEXTURE_RECT, - RC_TEXTURE_2D, - RC_TEXTURE_1D -} rc_texture_target; - -typedef enum { - /** - * Used to indicate unused register descriptions and - * source register that use a constant swizzle. - */ - RC_FILE_NONE = 0, - RC_FILE_TEMPORARY, - - /** - * Input register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_INPUT, - - /** - * Output register. - * - * \note The compiler attaches no implicit semantics to input registers. - * Fragment/vertex program specific semantics must be defined explicitly - * using the appropriate compiler interfaces. - */ - RC_FILE_OUTPUT, - RC_FILE_ADDRESS, - - /** - * Indicates a constant from the \ref rc_constant_list . - */ - RC_FILE_CONSTANT -} rc_register_file; - -#define RC_REGISTER_INDEX_BITS 10 -#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) - -typedef enum { - RC_SWIZZLE_X = 0, - RC_SWIZZLE_Y, - RC_SWIZZLE_Z, - RC_SWIZZLE_W, - RC_SWIZZLE_ZERO, - RC_SWIZZLE_ONE, - RC_SWIZZLE_HALF, - RC_SWIZZLE_UNUSED -} rc_swizzle; - -#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) -#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) -#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) -#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) - -#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) -#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) -#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) -#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) -#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) -#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) -#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) - -/** - * \name Bitmasks for components of vectors. - * - * Used for write masks, negation masks, etc. - */ -/*@{*/ -#define RC_MASK_NONE 0 -#define RC_MASK_X 1 -#define RC_MASK_Y 2 -#define RC_MASK_Z 4 -#define RC_MASK_W 8 -#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) -#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) -#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) -#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) -/*@}*/ - struct rc_src_register { rc_register_file File:3; @@ -198,6 +107,15 @@ struct rc_instruction { struct rc_instruction * Next; struct rc_sub_instruction I; + + /** + * Dataflow annotations. + * + * These are not supplied by the caller of the compiler, + * but filled in during compilation stages that make use of + * dataflow analysis. + */ + struct rc_instruction_dataflow Dataflow; }; struct rc_program { @@ -292,6 +210,10 @@ struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c); struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after); void rc_remove_instruction(struct rc_instruction * inst); -void rc_print_program(const struct rc_program *prog); +enum { + RC_PRINT_DATAFLOW = 0x1 +}; + +void rc_print_program(const struct rc_program *prog, unsigned int flags); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h new file mode 100644 index 0000000000..69994f9880 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_PROGRAM_CONSTANTS_H +#define RADEON_PROGRAM_CONSTANTS_H + +typedef enum { + RC_SATURATE_NONE = 0, + RC_SATURATE_ZERO_ONE, + RC_SATURATE_MINUS_PLUS_ONE +} rc_saturate_mode; + +typedef enum { + RC_TEXTURE_2D_ARRAY, + RC_TEXTURE_1D_ARRAY, + RC_TEXTURE_CUBE, + RC_TEXTURE_3D, + RC_TEXTURE_RECT, + RC_TEXTURE_2D, + RC_TEXTURE_1D +} rc_texture_target; + +typedef enum { + /** + * Used to indicate unused register descriptions and + * source register that use a constant swizzle. + */ + RC_FILE_NONE = 0, + RC_FILE_TEMPORARY, + + /** + * Input register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_INPUT, + + /** + * Output register. + * + * \note The compiler attaches no implicit semantics to input registers. + * Fragment/vertex program specific semantics must be defined explicitly + * using the appropriate compiler interfaces. + */ + RC_FILE_OUTPUT, + RC_FILE_ADDRESS, + + /** + * Indicates a constant from the \ref rc_constant_list . + */ + RC_FILE_CONSTANT +} rc_register_file; + +#define RC_REGISTER_INDEX_BITS 10 +#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS) + +typedef enum { + RC_SWIZZLE_X = 0, + RC_SWIZZLE_Y, + RC_SWIZZLE_Z, + RC_SWIZZLE_W, + RC_SWIZZLE_ZERO, + RC_SWIZZLE_ONE, + RC_SWIZZLE_HALF, + RC_SWIZZLE_UNUSED +} rc_swizzle; + +#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9)) +#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a)) +#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7) +#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1) +#define SET_SWZ(swz, idx, newv) \ + do { \ + (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \ + } while(0) + +#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W) +#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X) +#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y) +#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z) +#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W) +#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) +#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) + +/** + * \name Bitmasks for components of vectors. + * + * Used for write masks, negation masks, etc. + */ +/*@{*/ +#define RC_MASK_NONE 0 +#define RC_MASK_X 1 +#define RC_MASK_Y 2 +#define RC_MASK_Z 4 +#define RC_MASK_W 8 +#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y) +#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z) +#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W) +#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W) +/*@}*/ + +#endif /* RADEON_PROGRAM_CONSTANTS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c new file mode 100644 index 0000000000..38060ea3ad --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -0,0 +1,214 @@ +/* + * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_program.h" + +#include <stdio.h> + +static void print_comment(FILE * f) +{ + fprintf(f, " # "); +} + +static const char * textarget_to_string(rc_texture_target target) +{ + switch(target) { + case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY"; + case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY"; + case RC_TEXTURE_CUBE: return "CUBE"; + case RC_TEXTURE_3D: return "3D"; + case RC_TEXTURE_RECT: return "RECT"; + case RC_TEXTURE_2D: return "2D"; + case RC_TEXTURE_1D: return "1D"; + default: return "BAD_TEXTURE_TARGET"; + } +} + +static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr) +{ + if (file == RC_FILE_NONE) { + fprintf(f, "none"); + } else { + const char * filename; + switch(file) { + case RC_FILE_TEMPORARY: filename = "temp"; break; + case RC_FILE_INPUT: filename = "input"; break; + case RC_FILE_OUTPUT: filename = "output"; break; + case RC_FILE_ADDRESS: filename = "addr"; break; + case RC_FILE_CONSTANT: filename = "const"; break; + default: filename = "BAD FILE"; break; + } + fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : ""); + } +} + +static void rc_print_mask(FILE * f, unsigned int mask) +{ + if (mask & RC_MASK_X) fprintf(f, "x"); + if (mask & RC_MASK_Y) fprintf(f, "y"); + if (mask & RC_MASK_Z) fprintf(f, "z"); + if (mask & RC_MASK_W) fprintf(f, "w"); +} + +static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) +{ + rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + if (dst.WriteMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, dst.WriteMask); + } +} + +static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate) +{ + unsigned int comp; + for(comp = 0; comp < 4; ++comp) { + rc_swizzle swz = GET_SWZ(swizzle, comp); + if (GET_BIT(negate, comp)) + fprintf(f, "-"); + switch(swz) { + case RC_SWIZZLE_X: fprintf(f, "x"); break; + case RC_SWIZZLE_Y: fprintf(f, "y"); break; + case RC_SWIZZLE_Z: fprintf(f, "z"); break; + case RC_SWIZZLE_W: fprintf(f, "w"); break; + case RC_SWIZZLE_ZERO: fprintf(f, "0"); break; + case RC_SWIZZLE_ONE: fprintf(f, "1"); break; + case RC_SWIZZLE_HALF: fprintf(f, "H"); break; + case RC_SWIZZLE_UNUSED: fprintf(f, "_"); break; + } + } +} + +static void rc_print_src_register(FILE * f, struct rc_src_register src) +{ + int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW); + + if (src.Negate == RC_MASK_XYZW) + fprintf(f, "-"); + if (src.Abs) + fprintf(f, "|"); + + rc_print_register(f, src.File, src.Index, src.RelAddr); + + if (src.Abs && !trivial_negate) + fprintf(f, "|"); + + if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) { + fprintf(f, "."); + rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate); + } + + if (src.Abs && trivial_negate) + fprintf(f, "|"); +} + +static void rc_print_ref(FILE * f, struct rc_dataflow_ref * ref) +{ + fprintf(f, "ref(%p", ref->Vector); + + if (ref->UseMask != RC_MASK_XYZW) { + fprintf(f, "."); + rc_print_mask(f, ref->UseMask); + } + + fprintf(f, ")"); +} + +static void rc_print_instruction(FILE * f, unsigned int flags, struct rc_instruction * inst) +{ + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->I.Opcode); + unsigned int reg; + + fprintf(f, "%s", opcode->Name); + + switch(inst->I.SaturateMode) { + case RC_SATURATE_NONE: break; + case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break; + case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break; + default: fprintf(f, "_BAD_SAT"); break; + } + + if (opcode->HasDstReg) { + fprintf(f, " "); + rc_print_dst_register(f, inst->I.DstReg); + if (opcode->NumSrcRegs) + fprintf(f, ","); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + if (reg > 0) + fprintf(f, ","); + fprintf(f, " "); + rc_print_src_register(f, inst->I.SrcReg[reg]); + } + + if (opcode->HasTexture) { + fprintf(f, ", %s%s[%u]", + textarget_to_string(inst->I.TexSrcTarget), + inst->I.TexShadow ? "SHADOW" : "", + inst->I.TexSrcUnit); + } + + fprintf(f, ";\n"); + + if (flags & RC_PRINT_DATAFLOW) { + print_comment(f); + + fprintf(f, "Dst = %p", inst->Dataflow.DstReg); + if (inst->Dataflow.DstRegAliased) + fprintf(f, " aliased"); + if (inst->Dataflow.DstRegPrev) { + fprintf(f, " from "); + rc_print_ref(f, inst->Dataflow.DstRegPrev); + } + + for(reg = 0; reg < opcode->NumSrcRegs; ++reg) { + fprintf(f, ", "); + if (inst->Dataflow.SrcReg[reg]) + rc_print_ref(f, inst->Dataflow.SrcReg[reg]); + else + fprintf(f, "<no ref>"); + } + + fprintf(f, "\n"); + } +} + +/** + * Print program to stderr, default options. + */ +void rc_print_program(const struct rc_program *prog, unsigned int flags) +{ + unsigned int linenum = 0; + struct rc_instruction *inst; + + fprintf(stderr, "# Radeon Compiler Program%s\n", + flags & RC_PRINT_DATAFLOW ? " (with dataflow annotations)" : ""); + + for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) { + fprintf(stderr, "%3d: ", linenum); + + rc_print_instruction(stderr, flags, inst); + + linenum++; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h new file mode 100644 index 0000000000..c81d5f7a5e --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_SWIZZLE_H +#define RADEON_SWIZZLE_H + +#include "radeon_program.h" + +struct rc_swizzle_split { + unsigned char NumPhases; + unsigned char Phase[4]; +}; + +/** + * Describe the swizzling capability of target hardware. + */ +struct rc_swizzle_caps { + /** + * Check whether the given swizzle, absolute and negate combination + * can be implemented natively by the hardware for this opcode. + * + * \return 1 if the swizzle is native for the given opcode + */ + int (*IsNative)(rc_opcode opcode, struct rc_src_register reg); + + /** + * Determine how to split access to the masked channels of the + * given source register to obtain ALU-native swizzles. + */ + void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split); +}; + +#endif /* RADEON_SWIZZLE_H */ diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index be21268ba5..b7d5429dc5 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -41,7 +41,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "compiler/radeon_compiler.h" -#include "compiler/radeon_nqssadce.h" #include "radeon_mesa_to_rc.h" #include "r300_context.h" #include "r300_state.h" |