/* * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Authors: * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> */ #ifndef _R700_ASSEMBLER_H_ #define _R700_ASSEMBLER_H_ #include "main/mtypes.h" #include "shader/prog_instruction.h" #include "r700_chip.h" #include "r700_shaderinst.h" #include "r700_shader.h" typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, SPT_FP = 1 } SHADER_PIPE_TYPE; typedef enum ConstantCycles { NUMBER_OF_CYCLES = 3, NUMBER_OF_COMPONENTS = 4 } ConstantCycles; typedef enum HARDWARE_LIMIT_VALUES { TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE, MAX_TEMPORARY_REGISTERS = SQ_ALU_SRC_GPR_SIZE, MAX_CONSTANT_REGISTERS = SQ_ALU_SRC_CFILE_SIZE, CFILE_REGISTER_OFFSET = SQ_ALU_SRC_CFILE_BASE, NUMBER_OF_INPUT_COLORS = 2, NUMBER_OF_OUTPUT_COLORS = 8, NUMBER_OF_TEXTURE_UNITS = 16, MEGA_FETCH_BYTES = 32 } HARDWARE_LIMIT_VALUES; typedef enum AddressMode { ADDR_ABSOLUTE = 0, ADDR_RELATIVE_A0 = 1, ADDR_RELATIVE_FLI_0 = 2, NUMBER_OF_ADDR_MOD = 3 } AddressMode; typedef enum SrcRegisterType { SRC_REG_TEMPORARY = 0, SRC_REG_INPUT = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, NUMBER_OF_SRC_REG_TYPE = 4 } SrcRegisterType; typedef enum DstRegisterType { DST_REG_TEMPORARY = 0, DST_REG_A0 = 1, DST_REG_OUT = 2, DST_REG_OUT_X_REPL = 3, DST_REG_ALT_TEMPORARY = 4, DST_REG_INPUT = 5, NUMBER_OF_DST_REG_TYPE = 6 } DstRegisterType; typedef unsigned int BITS; typedef struct PVSDSTtag { BITS opcode:8; //(:6) //@@@ really should be 10 bits for OP2 BITS math:1; BITS predicated:1; //10 //8 BITS pred_inv :1; //11 //8 BITS rtype:3; BITS reg:10; //24 //20 BITS writex:1; BITS writey:1; BITS writez:1; BITS writew:1; //28 BITS op3:1; // 29 Represents *_OP3_* ALU opcode BITS dualop:1; // 30 //26 BITS addrmode0:1; //31 //29 BITS addrmode1:1; //32 } PVSDST; typedef struct PVSSRCtag { BITS rtype:4; BITS addrmode0:1; BITS reg:10; //15 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; BITS swizzlew:3; //27 BITS negx:1; BITS negy:1; BITS negz:1; BITS negw:1; //31 //BITS addrsel:2; BITS addrmode1:1; //32 } PVSSRC; typedef struct PVSMATHtag { BITS rtype:4; BITS spare:1; BITS reg:8; BITS swizzlex:3; BITS swizzley:3; BITS dstoff:2; // 2 bits of dest offset into alt ram BITS opcode:4; BITS negx:1; BITS negy:1; BITS dstcomp:2; // select dest component BITS spare2:3; } PVSMATH; typedef union PVSDWORDtag { BITS bits; PVSDST dst; PVSSRC src; PVSMATH math; float f; } PVSDWORD; typedef struct VAP_OUT_VTX_FMT_0tag { BITS pos:1; // 0 BITS misc:1; BITS clip_dist0:1; BITS clip_dist1:1; BITS pos_param:1; // 4 BITS color0:1; // 5 BITS color1:1; BITS color2:1; BITS color3:1; BITS color4:1; BITS color5:1; BITS color6:1; BITS color7:1; BITS normal:1; BITS depth:1; // 14 BITS point_size:1; // 15 BITS edge_flag:1; BITS rta_index:1; // shares same channel as kill_flag BITS kill_flag:1; BITS viewport_index:1; // 19 BITS resvd1:12; // 20 } VAP_OUT_VTX_FMT_0; typedef struct VAP_OUT_VTX_FMT_1tag { BITS tex0comp:3; BITS tex1comp:3; BITS tex2comp:3; BITS tex3comp:3; BITS tex4comp:3; BITS tex5comp:3; BITS tex6comp:3; BITS tex7comp:3; BITS resvd:8; } VAP_OUT_VTX_FMT_1; typedef struct VAP_OUT_VTX_FMT_2tag { BITS tex8comp :3; BITS tex9comp :3; BITS tex10comp:3; BITS tex11comp:3; BITS tex12comp:3; BITS tex13comp:3; BITS tex14comp:3; BITS tex15comp:3; BITS resvd:8; } VAP_OUT_VTX_FMT_2; typedef struct OUT_FRAGMENT_FMT_0tag { BITS color0:1; BITS color1:1; BITS color2:1; BITS color3:1; BITS color4:1; BITS color5:1; BITS color6:1; BITS color7:1; BITS depth:1; BITS stencil_ref:1; BITS coverage_to_mask:1; BITS mask:1; BITS resvd1:20; } OUT_FRAGMENT_FMT_0; typedef enum CF_CLAUSE_TYPE { CF_EXPORT_CLAUSE, CF_ALU_CLAUSE, CF_TEX_CLAUSE, CF_VTX_CLAUSE, CF_OTHER_CLAUSE, CF_EMPTY_CLAUSE, NUMBER_CF_CLAUSE_TYPES } CF_CLAUSE_TYPE; enum { MAX_BOOL_CONSTANTS = 32, MAX_INT_CONSTANTS = 32, MAX_FLOAT_CONSTANTS = 256, FC_NONE = 0, FC_IF = 1, FC_LOOP = 2, FC_REP = 3, COND_NONE = 0, COND_BOOL = 1, COND_PRED = 2, COND_ALU = 3, SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup SAFEDIST_ALU = 6 ///< the same for alu->fc }; typedef struct FC_LEVEL { unsigned int first; ///< first fc instruction on level (if, rep, loop) unsigned int* mid; ///< middle instructions - else or all breaks on this level unsigned int midLen; unsigned int type; unsigned int cond; unsigned int inv; unsigned int bpush; ///< 1 if first instruction does branch stack push int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD { GLboolean bEnableMini; GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; R700ControlFlowSXClause* cf_current_export_clause_ptr; R700ControlFlowALUClause* cf_current_alu_clause_ptr; R700ControlFlowGenericClause* cf_current_tex_clause_ptr; R700ControlFlowGenericClause* cf_current_vtx_clause_ptr; R700ControlFlowGenericClause* cf_current_cf_clause_ptr; //Result shader R700_Shader * pR700Shader; // No clause has been created yet CF_CLAUSE_TYPE cf_current_clause_type; GLuint number_of_exports; GLuint number_of_colorandz_exports; GLuint number_of_export_opcodes; PVSDWORD D; PVSDWORD S[3]; unsigned int uLastPosUpdate; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; unsigned int uIIns; unsigned int uOIns; unsigned int number_used_registers; unsigned int uUsedConsts; // Fragment programs unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; unsigned int uBoolConsts; unsigned int uIntConsts; unsigned int uInsts; unsigned int uConsts; // Vertex programs unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX]; unsigned char ucVP_OutputMap[VERT_RESULT_MAX]; unsigned char * pucOutMask; //----------------------------------------------------------------------------------- // flow control members //----------------------------------------------------------------------------------- unsigned int FCSP; FC_LEVEL fc_stack[32]; unsigned int branch_depth; unsigned int max_branch_depth; //----------------------------------------------------------------------------------- // ArgSubst used in Assemble_Source() function //----------------------------------------------------------------------------------- int aArgSubst[4]; GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ]; GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ]; GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ]; GLuint uOutputs; GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS]; GLint depth_export_register_number; GLint stencil_export_register_number; GLint coverage_to_mask_export_register_number; GLint mask_export_register_number; GLuint starting_export_register_number; GLuint starting_vfetch_register_number; GLuint starting_temp_register_number; GLuint uHelpReg; GLuint uFirstHelpReg; GLboolean input_position_is_used; GLboolean input_normal_is_used; GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS]; GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS]; R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX]; GLuint number_of_inputs; InstDeps *pInstDeps; SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; GLboolean bR6xx; /* helper to decide which type of instruction to assemble */ GLboolean is_tex; /* we inserted helper intructions and need barrier on next TEX ins */ GLboolean need_tex_barrier; } r700_AssemblerBase; //Internal use BITS addrmode_PVSDST(PVSDST * pPVSDST); void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode); void nomask_PVSDST(PVSDST * pPVSDST); BITS addrmode_PVSSRC(PVSSRC* pPVSSRC); void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode); void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz); void noswizzle_PVSSRC(PVSSRC* pPVSSRC); void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w); void neg_PVSSRC(PVSSRC* pPVSSRC); void noneg_PVSSRC(PVSSRC* pPVSSRC); void flipneg_PVSSRC(PVSSRC* pPVSSRC); void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c); void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c); BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0); BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); int check_current_clause(r700_AssemblerBase* pAsm, CF_CLAUSE_TYPE new_clause_type); GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, R700VertexInstruction* vertex_instruction_ptr); GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, R700TextureInstruction* tex_instruction_ptr); GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint gl_client_id, GLuint destination_register, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, GLuint destination_register, GLenum type, GLint size, GLubyte element, GLuint _signed, GLboolean normalize, VTX_FETCH_METHOD * pFetchMethod); GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); GLboolean mov_temp(r700_AssemblerBase* pAsm, int src); GLboolean checkop1(r700_AssemblerBase* pAsm); GLboolean checkop2(r700_AssemblerBase* pAsm); GLboolean checkop3(r700_AssemblerBase* pAsm); GLboolean assemble_src(r700_AssemblerBase *pAsm, int src, int fld); GLboolean assemble_dst(r700_AssemblerBase *pAsm); GLboolean tex_dst(r700_AssemblerBase *pAsm); GLboolean tex_src(r700_AssemblerBase *pAsm); GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized); void initialize(r700_AssemblerBase *pAsm); GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, PVSSRC* pSource, BITS scalar_channel_index); GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, BITS* psrc_rel, BITS* psrc_chan, BITS* psrc_neg); int is_cfile(BITS sel); int is_const(BITS sel); int is_gpr(BITS sel); GLboolean reserve_cfile(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan); GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle); GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle); GLboolean check_scalar(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean check_vector(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); GLboolean assemble_KIL(r700_AssemblerBase *pAsm); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); GLboolean assemble_LOG(r700_AssemblerBase *pAsm); GLboolean assemble_MAD(r700_AssemblerBase *pAsm); GLboolean assemble_LIT(r700_AssemblerBase *pAsm); GLboolean assemble_MAX(r700_AssemblerBase *pAsm); GLboolean assemble_MIN(r700_AssemblerBase *pAsm); GLboolean assemble_MOV(r700_AssemblerBase *pAsm); GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); GLboolean assemble_IF(r700_AssemblerBase *pAsm); GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, GLuint export_count, GLuint starting_register_number, GLboolean is_depth_export); GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); //Interface GLboolean AssembleInstr(GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); #endif //_R700_ASSEMBLER_H_