From edefc658e4374fea96404715ce176b24942322d3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 9 Aug 2006 20:05:26 +0000 Subject: More GLSL code - more support for vec4 operations on x86 back-end. --- src/mesa/shader/slang/slang_assemble.c | 200 ++++++++++++--------- src/mesa/shader/slang/slang_assemble.h | 3 + src/mesa/shader/slang/slang_assemble_assignment.c | 139 ++++++++------ src/mesa/shader/slang/slang_assemble_constructor.c | 46 +++-- src/mesa/shader/slang/slang_execute_x86.c | 197 ++++++++++---------- src/mesa/shader/slang/slang_storage.c | 119 +++++++----- src/mesa/shader/slang/slang_storage.h | 22 ++- src/mesa/x86/rtasm/x86sse.c | 14 ++ src/mesa/x86/rtasm/x86sse.h | 2 + 9 files changed, 428 insertions(+), 314 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/slang/slang_assemble.c b/src/mesa/shader/slang/slang_assemble.c index 9e5851383c..36fb2305f6 100644 --- a/src/mesa/shader/slang/slang_assemble.c +++ b/src/mesa/shader/slang/slang_assemble.c @@ -398,74 +398,95 @@ GLboolean _slang_cleanup_stack (slang_assemble_ctx *A, slang_operation *op) /* _slang_assemble_operation() */ -static GLboolean dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, - GLuint *size, slang_swizzle *swz, GLboolean is_swizzled) +static GLboolean +dereference_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *size, slang_swizzle *swz, + GLboolean is_swizzled) { - GLuint i; - - for (i = agg->count; i > 0; i--) - { - const slang_storage_array *arr = &agg->arrays[i - 1]; - GLuint j; - - for (j = arr->length; j > 0; j--) - { - if (arr->type == slang_stor_aggregate) - { - if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled)) - return GL_FALSE; - } - else - { - GLuint src_offset; - slang_assembly_type ty; + GLuint src_offset; + slang_assembly_type ty; + + *size -= _slang_sizeof_type (type); + + /* If swizzling is taking place, we are forced to use scalar operations, even if we have + * vec4 instructions enabled (this should be actually done with special vec4 shuffle + * instructions). + * Adjust the size and calculate the offset within source variable to read. + */ + if (is_swizzled) + src_offset = swz->swizzle[*size / 4] * 4; + else + src_offset = *size; + + /* dereference data slot of a basic type */ + if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4)) + return GL_FALSE; + if (!PUSH (A->file, slang_asm_addr_deref)) + return GL_FALSE; + if (src_offset != 0) { + if (!PLAB (A->file, slang_asm_addr_push, src_offset)) + return GL_FALSE; + if (!PUSH (A->file, slang_asm_addr_add)) + return GL_FALSE; + } + + switch (type) { + case slang_stor_bool: + ty = slang_asm_bool_deref; + break; + case slang_stor_int: + ty = slang_asm_int_deref; + break; + case slang_stor_float: + ty = slang_asm_float_deref; + break; +#if defined(USE_X86_ASM) || defined(SLANG_X86) + case slang_stor_vec4: + ty = slang_asm_vec4_deref; + break; +#endif + default: + _mesa_problem(NULL, "Unexpected arr->type in dereference_basic"); + ty = slang_asm_none; + } - *size -= 4; + return PUSH (A->file, ty); +} - /* calculate the offset within source variable to read */ - if (is_swizzled) - { - /* swizzle the index to get the actual offset */ - src_offset = swz->swizzle[*size / 4] * 4; - } - else - { - /* no swizzling - read sequentially */ - src_offset = *size; - } +static GLboolean +dereference_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *size, + slang_swizzle *swz, GLboolean is_swizzled) +{ + GLuint i; - /* dereference data slot of a basic type */ - if (!PLAB2 (A->file, slang_asm_local_addr, A->local.addr_tmp, 4)) - return GL_FALSE; - if (!PUSH (A->file, slang_asm_addr_deref)) - return GL_FALSE; - if (!PLAB (A->file, slang_asm_addr_push, src_offset)) - return GL_FALSE; - if (!PUSH (A->file, slang_asm_addr_add)) - return GL_FALSE; + for (i = agg->count; i > 0; i--) { + const slang_storage_array *arr = &agg->arrays[i - 1]; + GLuint j; - switch (arr->type) - { - case slang_stor_bool: - ty = slang_asm_bool_deref; - break; - case slang_stor_int: - ty = slang_asm_int_deref; - break; - case slang_stor_float: - ty = slang_asm_float_deref; - break; - default: - _mesa_problem(NULL, "Unexpected arr->type in dereference_aggregate"); - ty = slang_asm_none; - } - if (!PUSH (A->file, ty)) - return GL_FALSE; - } - } - } + for (j = arr->length; j > 0; j--) { + if (arr->type == slang_stor_aggregate) { + if (!dereference_aggregate (A, arr->aggregate, size, swz, is_swizzled)) + return GL_FALSE; + } + else { + if (is_swizzled && arr->type == slang_stor_vec4) { + if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled)) + return GL_FALSE; + if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled)) + return GL_FALSE; + if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled)) + return GL_FALSE; + if (!dereference_basic (A, slang_stor_float, size, swz, is_swizzled)) + return GL_FALSE; + } + else { + if (!dereference_basic (A, arr->type, size, swz, is_swizzled)) + return GL_FALSE; + } + } + } + } - return GL_TRUE; + return GL_TRUE; } GLboolean _slang_dereference (slang_assemble_ctx *A, slang_operation *op) @@ -694,35 +715,40 @@ static GLboolean call_asm_instruction (slang_assemble_ctx *A, slang_atom a_name) return GL_TRUE; } -static GLboolean equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, - GLuint *index, GLuint size, GLuint z_label) +static GLboolean +equality_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index, + GLuint size, GLuint z_label) { - GLuint i; + GLuint i; - for (i = 0; i < agg->count; i++) - { - const slang_storage_array *arr = &agg->arrays[i]; - GLuint j; + for (i = 0; i < agg->count; i++) { + const slang_storage_array *arr = &agg->arrays[i]; + GLuint j; - for (j = 0; j < arr->length; j++) - { - if (arr->type == slang_stor_aggregate) - { - if (!equality_aggregate (A, arr->aggregate, index, size, z_label)) - return GL_FALSE; - } - else - { - if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index)) - return GL_FALSE; - *index += 4; - if (!PLAB (A->file, slang_asm_jump_if_zero, z_label)) - return GL_FALSE; - } - } - } + for (j = 0; j < arr->length; j++) { + if (arr->type == slang_stor_aggregate) { + if (!equality_aggregate (A, arr->aggregate, index, size, z_label)) + return GL_FALSE; + } + else { +#if defined(USE_X86_ASM) || defined(SLANG_X86) + if (arr->type == slang_stor_vec4) { + if (!PLAB2 (A->file, slang_asm_vec4_equal_int, size + *index, *index)) + return GL_FALSE; + } + else +#endif + if (!PLAB2 (A->file, slang_asm_float_equal_int, size + *index, *index)) + return GL_FALSE; - return GL_TRUE; + *index += _slang_sizeof_type (arr->type); + if (!PLAB (A->file, slang_asm_jump_if_zero, z_label)) + return GL_FALSE; + } + } + } + + return GL_TRUE; } static GLboolean equality (slang_assemble_ctx *A, slang_operation *op, GLboolean equal) diff --git a/src/mesa/shader/slang/slang_assemble.h b/src/mesa/shader/slang/slang_assemble.h index fbf88bd6d1..95e4fa263a 100644 --- a/src/mesa/shader/slang/slang_assemble.h +++ b/src/mesa/shader/slang/slang_assemble.h @@ -105,6 +105,9 @@ typedef enum slang_assembly_type_ slang_asm_vec4_divide, slang_asm_vec4_negate, slang_asm_vec4_dot, + slang_asm_vec4_copy, + slang_asm_vec4_deref, + slang_asm_vec4_equal_int, /* not a real assembly instruction */ slang_asm__last } slang_assembly_type; diff --git a/src/mesa/shader/slang/slang_assemble_assignment.c b/src/mesa/shader/slang/slang_assemble_assignment.c index 2f4cb1423f..d894a8db18 100644 --- a/src/mesa/shader/slang/slang_assemble_assignment.c +++ b/src/mesa/shader/slang/slang_assemble_assignment.c @@ -53,66 +53,89 @@ * +------------------+ */ -static GLboolean assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, - GLuint *index, GLuint size) +static GLboolean +assign_basic (slang_assemble_ctx *A, slang_storage_type type, GLuint *index, GLuint size) { - GLuint i; - - for (i = 0; i < agg->count; i++) - { - const slang_storage_array *arr = &agg->arrays[i]; - GLuint j; - - for (j = 0; j < arr->length; j++) - { - if (arr->type == slang_stor_aggregate) - { - if (!assign_aggregate (A, arr->aggregate, index, size)) - return GL_FALSE; - } - else - { - GLuint dst_addr_loc, dst_offset; - slang_assembly_type ty; - - /* calculate the distance from top of the stack to the destination address */ - dst_addr_loc = size - *index; - - /* calculate the offset within destination variable to write */ - if (A->swz.num_components != 0) - { - /* swizzle the index to get the actual offset */ - dst_offset = A->swz.swizzle[*index / 4] * 4; - } - else - { - /* no swizzling - write sequentially */ - dst_offset = *index; - } - - switch (arr->type) - { - case slang_stor_bool: - ty = slang_asm_bool_copy; - break; - case slang_stor_int: - ty = slang_asm_int_copy; - break; - case slang_stor_float: - ty = slang_asm_float_copy; - break; - default: - break; - } - if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset)) - return GL_FALSE; - - *index += 4; - } - } - } + GLuint dst_offset, dst_addr_loc; + slang_assembly_type ty; + + /* Calculate the offset within destination variable to write. */ + if (A->swz.num_components != 0) + dst_offset = A->swz.swizzle[*index / 4] * 4; + else + dst_offset = *index; + + switch (type) { + case slang_stor_bool: + ty = slang_asm_bool_copy; + break; + case slang_stor_int: + ty = slang_asm_int_copy; + break; + case slang_stor_float: + ty = slang_asm_float_copy; + break; +#if defined(USE_X86_ASM) || defined(SLANG_X86) + case slang_stor_vec4: + ty = slang_asm_vec4_copy; + break; +#endif + default: + _mesa_problem(NULL, "Unexpected arr->type in assign_basic"); + ty = slang_asm_none; + } + + /* Calculate the distance from top of the stack to the destination address. As the + * copy operation progresses, components of the source are being successively popped + * off the stack by the amount of *index increase step. + */ + dst_addr_loc = size - *index; + + if (!slang_assembly_file_push_label2 (A->file, ty, dst_addr_loc, dst_offset)) + return GL_FALSE; + *index += _slang_sizeof_type (type); + + return GL_TRUE; +} - return GL_TRUE; +static GLboolean +assign_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *agg, GLuint *index, + GLuint size) +{ + GLuint i; + + for (i = 0; i < agg->count; i++) { + const slang_storage_array *arr = &agg->arrays[i]; + GLuint j; + + for (j = 0; j < arr->length; j++) { + if (arr->type == slang_stor_aggregate) { + if (!assign_aggregate (A, arr->aggregate, index, size)) + return GL_FALSE; + } + else { + /* When the destination is swizzled, we are forced to do float_copy, even if + * vec4 extension is enabled with vec4_copy operation. + */ + if (A->swz.num_components != 0 && arr->type == slang_stor_vec4) { + if (!assign_basic (A, slang_stor_float, index, size)) + return GL_FALSE; + if (!assign_basic (A, slang_stor_float, index, size)) + return GL_FALSE; + if (!assign_basic (A, slang_stor_float, index, size)) + return GL_FALSE; + if (!assign_basic (A, slang_stor_float, index, size)) + return GL_FALSE; + } + else { + if (!assign_basic (A, arr->type, index, size)) + return GL_FALSE; + } + } + } + } + + return GL_TRUE; } GLboolean _slang_assemble_assignment (slang_assemble_ctx *A, slang_operation *op) diff --git a/src/mesa/shader/slang/slang_assemble_constructor.c b/src/mesa/shader/slang/slang_assemble_constructor.c index 6f02772bcd..9d1aa70718 100644 --- a/src/mesa/shader/slang/slang_assemble_constructor.c +++ b/src/mesa/shader/slang/slang_assemble_constructor.c @@ -143,38 +143,32 @@ GLvoid _slang_multiply_swizzles (slang_swizzle *dst, const slang_swizzle *left, /* _slang_assemble_constructor() */ -static GLboolean sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op) +static GLboolean +sizeof_argument (slang_assemble_ctx *A, GLuint *size, slang_operation *op) { - slang_assembly_typeinfo ti; - GLboolean result = GL_FALSE; - slang_storage_aggregate agg, flat_agg; + slang_assembly_typeinfo ti; + GLboolean result = GL_FALSE; + slang_storage_aggregate agg; - if (!slang_assembly_typeinfo_construct (&ti)) - return GL_FALSE; - if (!_slang_typeof_operation (A, op, &ti)) - goto end1; + if (!slang_assembly_typeinfo_construct (&ti)) + return GL_FALSE; + if (!_slang_typeof_operation (A, op, &ti)) + goto end1; - if (!slang_storage_aggregate_construct (&agg)) - goto end1; - if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs, - A->space.vars, A->mach, A->file, A->atoms)) - goto end2; + if (!slang_storage_aggregate_construct (&agg)) + goto end1; + if (!_slang_aggregate_variable (&agg, &ti.spec, 0, A->space.funcs, A->space.structs, + A->space.vars, A->mach, A->file, A->atoms)) + goto end; - if (!slang_storage_aggregate_construct (&flat_agg)) - goto end2; - if (!_slang_flatten_aggregate (&flat_agg, &agg)) - goto end; - - *size = flat_agg.count * 4; + *size = _slang_sizeof_aggregate (&agg); + result = GL_TRUE; - result = GL_TRUE; end: - slang_storage_aggregate_destruct (&flat_agg); -end2: - slang_storage_aggregate_destruct (&agg); + slang_storage_aggregate_destruct (&agg); end1: - slang_assembly_typeinfo_destruct (&ti); - return result; + slang_assembly_typeinfo_destruct (&ti); + return result; } static GLboolean constructor_aggregate (slang_assemble_ctx *A, const slang_storage_aggregate *flat, @@ -270,7 +264,7 @@ GLboolean _slang_assemble_constructor (slang_assemble_ctx *A, slang_operation *o arg_sums[1] = 0; /* will hold all argument's size sum */ for (i = 0; i < op->num_children; i++) { - GLuint arg_size; + GLuint arg_size = 0; if (!sizeof_argument (A, &arg_size, &op->children[i])) goto end; diff --git a/src/mesa/shader/slang/slang_execute_x86.c b/src/mesa/shader/slang/slang_execute_x86.c index 59c3aadcaa..468984dce6 100644 --- a/src/mesa/shader/slang/slang_execute_x86.c +++ b/src/mesa/shader/slang/slang_execute_x86.c @@ -50,6 +50,7 @@ typedef struct struct x86_reg r_eax; struct x86_reg r_ecx; struct x86_reg r_edx; + struct x86_reg r_ebx; struct x86_reg r_esp; struct x86_reg r_ebp; struct x86_reg r_st0; @@ -183,7 +184,7 @@ static GLvoid do_print_bool (slang_info_log **infolog, GLfloat x) static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log **infolog) { - GLint disp; + GLint disp, i; switch (a->type) { @@ -517,128 +518,133 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log * break; case slang_asm_vec4_add: /* [vec4] | vec4 */ - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_faddp (&G->f, G->r_st4); - x87_faddp (&G->f, G->r_st4); - x87_faddp (&G->f, G->r_st4); - x87_faddp (&G->f, G->r_st4); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fstp (&G->f, x86_deref (G->r_eax)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) + x87_faddp (&G->f, G->r_st4); + for (i = 0; i < 4; i++) + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_subtract: /* [vec4] | vec4 */ x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); - x87_fsubp (&G->f, G->r_st4); - x87_fsubp (&G->f, G->r_st4); - x87_fsubp (&G->f, G->r_st4); - x87_fsubp (&G->f, G->r_st4); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fstp (&G->f, x86_deref (G->r_eax)); + for (i = 0; i < 4; i++) + x87_fsubp (&G->f, G->r_st4); + for (i = 0; i < 4; i++) + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_multiply: /* [vec4] | vec4 */ - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fstp (&G->f, x86_deref (G->r_eax)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) + x87_fmulp (&G->f, G->r_st4); + for (i = 0; i < 4; i++) + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_divide: /* [vec4] | vec4 */ x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); - x87_fdivp (&G->f, G->r_st4); - x87_fdivp (&G->f, G->r_st4); - x87_fdivp (&G->f, G->r_st4); - x87_fdivp (&G->f, G->r_st4); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fstp (&G->f, x86_deref (G->r_eax)); + for (i = 0; i < 4; i++) + x87_fdivp (&G->f, G->r_st4); + for (i = 0; i < 4; i++) + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); break; case slang_asm_vec4_negate: /* [vec4] */ x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fchs (&G->f); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fchs (&G->f); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fchs (&G->f); - x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fchs (&G->f); - x87_fstp (&G->f, x86_deref (G->r_eax)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) { + x87_fchs (&G->f); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12 - i * 4)); + } break; case slang_asm_vec4_dot: /* [vec4] | vec4 */ - x87_fld (&G->f, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); - x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_esp, i * 4)); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); - x87_fld (&G->f, x86_deref (G->r_eax)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); - x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_fmulp (&G->f, G->r_st4); - x87_faddp (&G->f, G->r_st1); - x87_faddp (&G->f, G->r_st1); - x87_faddp (&G->f, G->r_st1); + for (i = 0; i < 4; i++) + x87_fld (&G->f, x86_make_disp (G->r_eax, i * 4)); + for (i = 0; i < 4; i++) + x87_fmulp (&G->f, G->r_st4); + for (i = 0; i < 3; i++) + x87_faddp (&G->f, G->r_st1); x87_fstp (&G->f, x86_deref (G->r_eax)); break; - default: - assert (0); - } + case slang_asm_vec4_copy: + /* [vec4] | vec4 */ + x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, a->param[0])); + x86_pop (&G->f, G->r_ecx); + x86_pop (&G->f, G->r_edx); + x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1]), G->r_ecx); + x86_pop (&G->f, G->r_ebx); + x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 4), G->r_edx); + x86_pop (&G->f, G->r_ecx); + x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 8), G->r_ebx); + x86_mov (&G->f, x86_make_disp (G->r_eax, a->param[1] + 12), G->r_ecx); + break; + case slang_asm_vec4_deref: + /* [vec4] */ + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x86_mov (&G->f, G->r_ecx, x86_make_disp (G->r_eax, 12)); + x86_mov (&G->f, G->r_edx, x86_make_disp (G->r_eax, 8)); + x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); + x86_mov (&G->f, G->r_ebx, x86_make_disp (G->r_eax, 4)); + x86_push (&G->f, G->r_edx); + x86_mov (&G->f, G->r_ecx, x86_deref (G->r_eax)); + x86_push (&G->f, G->r_ebx); + x86_push (&G->f, G->r_ecx); + break; + case slang_asm_vec4_equal_int: + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, -4)); + x86_mov_reg_imm (&G->f, G->r_edx, 0x4000); + for (i = 0; i < 4; i++) { + x87_fld (&G->f, x86_make_disp (G->r_esp, a->param[0] + 4 + i * 4)); + x87_fcomp (&G->f, x86_make_disp (G->r_esp, a->param[1] + 4 + i * 4)); + x87_fnstsw (&G->f, G->r_eax); + x86_and (&G->f, G->r_edx, G->r_eax); + } + /* TODO: use test r8,imm8 */ + x86_mov_reg_imm (&G->f, G->r_ecx, 0x4000); + x86_test (&G->f, G->r_edx, G->r_ecx); + { + GLubyte *lab0, *lab1; + + /* TODO: use jcc rel8 */ + lab0 = x86_jcc_forward (&G->f, cc_E); + x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ONE); + /* TODO: use jmp rel8 */ + lab1 = x86_jmp_forward (&G->f); + x86_fixup_fwd_jump (&G->f, lab0); + x86_mov_reg_imm (&G->f, G->r_ecx, FLOAT_ZERO); + x86_fixup_fwd_jump (&G->f, lab1); + x86_mov (&G->f, x86_deref (G->r_esp), G->r_ecx); + } + break; + default: + assert (0); + } } GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GLuint start) @@ -664,6 +670,7 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL G.r_eax = x86_make_reg (file_REG32, reg_AX); G.r_ecx = x86_make_reg (file_REG32, reg_CX); G.r_edx = x86_make_reg (file_REG32, reg_DX); + G.r_ebx = x86_make_reg (file_REG32, reg_BX); G.r_esp = x86_make_reg (file_REG32, reg_SP); G.r_ebp = x86_make_reg (file_REG32, reg_BP); G.r_st0 = x86_make_reg (file_x87, 0); diff --git a/src/mesa/shader/slang/slang_storage.c b/src/mesa/shader/slang/slang_storage.c index 34c62081f0..6220b7c5bf 100644 --- a/src/mesa/shader/slang/slang_storage.c +++ b/src/mesa/shader/slang/slang_storage.c @@ -211,14 +211,22 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp return aggregate_vector (agg, slang_stor_float, 2); case slang_spec_vec3: return aggregate_vector (agg, slang_stor_float, 3); - case slang_spec_vec4: - return aggregate_vector (agg, slang_stor_float, 4); + case slang_spec_vec4: +#if defined(USE_X86_ASM) || defined(SLANG_X86) + return aggregate_vector (agg, slang_stor_vec4, 1); +#else + return aggregate_vector (agg, slang_stor_float, 4); +#endif case slang_spec_mat2: return aggregate_matrix (agg, slang_stor_float, 2); case slang_spec_mat3: return aggregate_matrix (agg, slang_stor_float, 3); - case slang_spec_mat4: - return aggregate_matrix (agg, slang_stor_float, 4); + case slang_spec_mat4: +#if defined(USE_X86_ASM) || defined(SLANG_X86) + return aggregate_vector (agg, slang_stor_vec4, 4); +#else + return aggregate_matrix (agg, slang_stor_float, 4); +#endif case slang_spec_sampler1D: case slang_spec_sampler2D: case slang_spec_sampler3D: @@ -258,54 +266,77 @@ GLboolean _slang_aggregate_variable (slang_storage_aggregate *agg, slang_type_sp } } -/* _slang_sizeof_aggregate() */ +/* _slang_sizeof_type() */ -GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg) +GLuint +_slang_sizeof_type (slang_storage_type type) { - GLuint i, size = 0; + if (type == slang_stor_aggregate) + return 0; + if (type == slang_stor_vec4) + return 4 * sizeof (GLfloat); + return sizeof (GLfloat); +} - for (i = 0; i < agg->count; i++) - { - GLuint element_size; +/* _slang_sizeof_aggregate() */ - if (agg->arrays[i].type == slang_stor_aggregate) - element_size = _slang_sizeof_aggregate (agg->arrays[i].aggregate); - else - element_size = sizeof (GLfloat); - size += element_size * agg->arrays[i].length; - } - return size; +GLuint _slang_sizeof_aggregate (const slang_storage_aggregate *agg) +{ + GLuint i, size = 0; + + for (i = 0; i < agg->count; i++) { + slang_storage_array *arr = &agg->arrays[i]; + GLuint element_size; + + if (arr->type == slang_stor_aggregate) + element_size = _slang_sizeof_aggregate (arr->aggregate); + else + element_size = _slang_sizeof_type (arr->type); + size += element_size * arr->length; + } + return size; } /* _slang_flatten_aggregate () */ -GLboolean _slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg) +GLboolean +_slang_flatten_aggregate (slang_storage_aggregate *flat, const slang_storage_aggregate *agg) { - GLuint i; - - for (i = 0; i < agg->count; i++) - { - GLuint j; - - for (j = 0; j < agg->arrays[i].length; j++) - { - if (agg->arrays[i].type == slang_stor_aggregate) - { - if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate)) - return GL_FALSE; - } - else - { - slang_storage_array *arr; - - arr = slang_storage_aggregate_push_new (flat); - if (arr == NULL) - return GL_FALSE; - arr->type = agg->arrays[i].type; - arr->length = 1; - } - } - } - return GL_TRUE; + GLuint i; + + for (i = 0; i < agg->count; i++) { + GLuint j; + + for (j = 0; j < agg->arrays[i].length; j++) { + if (agg->arrays[i].type == slang_stor_aggregate) { + if (!_slang_flatten_aggregate (flat, agg->arrays[i].aggregate)) + return GL_FALSE; + } + else { + GLuint k, count; + slang_storage_type type; + + if (agg->arrays[i].type == slang_stor_vec4) { + count = 4; + type = slang_stor_float; + } + else { + count = 1; + type = agg->arrays[i].type; + } + + for (k = 0; k < count; k++) { + slang_storage_array *arr; + + arr = slang_storage_aggregate_push_new (flat); + if (arr == NULL) + return GL_FALSE; + arr->type = type; + arr->length = 1; + } + } + } + } + return GL_TRUE; } diff --git a/src/mesa/shader/slang/slang_storage.h b/src/mesa/shader/slang/slang_storage.h index 532ea638ee..209f8674d9 100644 --- a/src/mesa/shader/slang/slang_storage.h +++ b/src/mesa/shader/slang/slang_storage.h @@ -41,13 +41,19 @@ extern "C" { * * For now, only the three basic types are supported, that is bool, int and float. Other built-in * types like vector or matrix can easily be decomposed into a series of basic types. + * + * If the vec4 module is enabled, 4-component vectors of floats are used when possible. 4x4 matrices + * are constructed of 4 vec4 slots. */ typedef enum slang_storage_type_ { - slang_stor_aggregate, - slang_stor_bool, - slang_stor_int, - slang_stor_float + /* core */ + slang_stor_aggregate, + slang_stor_bool, + slang_stor_int, + slang_stor_float, + /* vec4 */ + slang_stor_vec4 } slang_storage_type; /* @@ -105,6 +111,14 @@ _slang_evaluate_int(slang_assembly_file *file, GLuint *pint, slang_atom_pool *atoms); +/* + * Returns the size (in machine units) of the given storage type. + * It is an error to pass-in slang_stor_aggregate. + * Returns 0 on error. + */ +extern GLuint +_slang_sizeof_type (slang_storage_type); + /* * Returns total size (in machine units) of the given aggregate. * Returns 0 on error. diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 6137aef8ec..3ea37bb5e7 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -367,6 +367,20 @@ void x86_sub( struct x86_function *p, emit_op_modrm(p, 0x2b, 0x29, dst, src ); } +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + /*********************************************************************** diff --git a/src/mesa/x86/rtasm/x86sse.h b/src/mesa/x86/rtasm/x86sse.h index 5ec5489431..66fb852ac9 100644 --- a/src/mesa/x86/rtasm/x86sse.h +++ b/src/mesa/x86/rtasm/x86sse.h @@ -172,12 +172,14 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_dec( struct x86_function *p, struct x86_reg reg ); void x86_inc( struct x86_function *p, struct x86_reg reg ); void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); void x86_ret( struct x86_function *p ); -- cgit v1.2.3