summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrian Paul <brian.paul@tungstengraphics.com>2008-07-01 11:41:21 -0600
committerBrian Paul <brian.paul@tungstengraphics.com>2008-07-01 11:41:21 -0600
commit32a5c4033665d51277c733318ac0461e5f85ad09 (patch)
tree5d788e14e59221d65a7d0a2fc123d0c3616546d9
parentb2247c7d29667047cd34180826a8966675f8be3a (diff)
mesa: better function inlining in the presence of 'return' statements
Before, the presence of a 'return' statement always prevented inlining a function. This was because we didn't want to accidentally return from the _calling_ function. We still need the semantic of 'return' when inlining but we can't always use unconditional branches/jumps (GPUs don't always support arbitrary branching). Now, we allow inlining functions w/ return if the return is the last statement in the function. This fixes the common case of a function that returns a value, such as: vec4 square(const in vec4 x) { return x * x; } which effectively compiles into: vec4 square(const in vec4 x) { __retVal = x * x; return; } The 'return' can be no-op'd now and we can inline the function.
-rw-r--r--src/mesa/shader/slang/slang_codegen.c128
1 files changed, 105 insertions, 23 deletions
diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c
index d19d5a0abb..821ed11901 100644
--- a/src/mesa/shader/slang/slang_codegen.c
+++ b/src/mesa/shader/slang/slang_codegen.c
@@ -707,6 +707,64 @@ _slang_find_node_type(slang_operation *oper, slang_operation_type type)
}
+/**
+ * Count the number of operations of the given time rooted at 'oper'.
+ */
+static GLuint
+_slang_count_node_type(slang_operation *oper, slang_operation_type type)
+{
+ GLuint i, count = 0;
+ if (oper->type == type) {
+ return 1;
+ }
+ for (i = 0; i < oper->num_children; i++) {
+ count += _slang_count_node_type(&oper->children[i], type);
+ }
+ return count;
+}
+
+
+/**
+ * Check if the 'return' statement found under 'oper' is a "tail return"
+ * that can be no-op'd. For example:
+ *
+ * void func(void)
+ * {
+ * .. do something ..
+ * return; // this is a no-op
+ * }
+ *
+ * This is used when determining if a function can be inlined. If the
+ * 'return' is not the last statement, we can't inline the function since
+ * we still need the semantic behaviour of the 'return' but we don't want
+ * to accidentally return from the _calling_ function. We'd need to use an
+ * unconditional branch, but we don't have such a GPU instruction (not
+ * always, at least).
+ */
+static GLboolean
+_slang_is_tail_return(const slang_operation *oper)
+{
+ GLuint k = oper->num_children;
+
+ while (k > 0) {
+ const slang_operation *last = &oper->children[k - 1];
+ if (last->type == SLANG_OPER_RETURN)
+ return GL_TRUE;
+ else if (last->type == SLANG_OPER_IDENTIFIER ||
+ last->type == SLANG_OPER_LABEL)
+ k--; /* try prev child */
+ else if (last->type == SLANG_OPER_BLOCK_NO_NEW_SCOPE ||
+ last->type == SLANG_OPER_BLOCK_NEW_SCOPE)
+ /* try sub-children */
+ return _slang_is_tail_return(last);
+ else
+ break;
+ }
+
+ return GL_FALSE;
+}
+
+
static void
slang_resolve_variable(slang_operation *oper)
{
@@ -1207,38 +1265,62 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun,
}
else {
/* non-assembly function */
+ /* We always generate an "inline-able" block of code here.
+ * We may either:
+ * 1. insert the inline code
+ * 2. Generate a call to the "inline" code as a subroutine
+ */
+
+
+ slang_operation *ret = NULL;
+
inlined = slang_inline_function_call(A, fun, oper, dest);
- if (inlined && _slang_find_node_type(inlined, SLANG_OPER_RETURN)) {
- slang_operation *callOper;
- /* The function we're calling has one or more 'return' statements.
- * So, we can't truly inline this function because we need to
- * implement 'return' with RET (and CAL).
- * Nevertheless, we performed "inlining" to make a new instance
- * of the function body to deal with static register allocation.
- *
- * XXX check if there's one 'return' and if it's the very last
- * statement in the function - we can optimize that case.
- */
- assert(inlined->type == SLANG_OPER_BLOCK_NEW_SCOPE ||
- inlined->type == SLANG_OPER_SEQUENCE);
- if (_slang_function_has_return_value(fun) && !dest) {
- assert(inlined->children[0].type == SLANG_OPER_VARIABLE_DECL);
- assert(inlined->children[2].type == SLANG_OPER_IDENTIFIER);
- callOper = &inlined->children[1];
+ if (!inlined)
+ return NULL;
+
+ ret = _slang_find_node_type(inlined, SLANG_OPER_RETURN);
+ if (ret) {
+ /* check if this is a "tail" return */
+ if (_slang_count_node_type(inlined, SLANG_OPER_RETURN) == 1 &&
+ _slang_is_tail_return(inlined)) {
+ /* The only RETURN is the last stmt in the function, no-op it
+ * and inline the function body.
+ */
+ ret->type = SLANG_OPER_NONE;
}
else {
- callOper = inlined;
+ slang_operation *callOper;
+ /* The function we're calling has one or more 'return' statements.
+ * So, we can't truly inline this function because we need to
+ * implement 'return' with RET (and CAL).
+ * Nevertheless, we performed "inlining" to make a new instance
+ * of the function body to deal with static register allocation.
+ *
+ * XXX check if there's one 'return' and if it's the very last
+ * statement in the function - we can optimize that case.
+ */
+ assert(inlined->type == SLANG_OPER_BLOCK_NEW_SCOPE ||
+ inlined->type == SLANG_OPER_SEQUENCE);
+
+ if (_slang_function_has_return_value(fun) && !dest) {
+ assert(inlined->children[0].type == SLANG_OPER_VARIABLE_DECL);
+ assert(inlined->children[2].type == SLANG_OPER_IDENTIFIER);
+ callOper = &inlined->children[1];
+ }
+ else {
+ callOper = inlined;
+ }
+ callOper->type = SLANG_OPER_NON_INLINED_CALL;
+ callOper->fun = fun;
+ callOper->label = _slang_label_new_unique((char*) fun->header.a_name);
}
- callOper->type = SLANG_OPER_NON_INLINED_CALL;
- callOper->fun = fun;
- callOper->label = _slang_label_new_unique((char*) fun->header.a_name);
}
}
if (!inlined)
return NULL;
- /* Replace the function call with the inlined block */
+ /* Replace the function call with the inlined block (or new CALL stmt) */
slang_operation_destruct(oper);
*oper = *inlined;
_slang_free(inlined);
@@ -3024,7 +3106,7 @@ _slang_codegen_function(slang_assemble_ctx * A, slang_function * fun)
if (_mesa_strcmp((char *) fun->header.a_name, "main") != 0) {
/* we only really generate code for main, all other functions get
- * inlined.
+ * inlined or codegen'd upon an actual call.
*/
#if 0
/* do some basic error checking though */