From d9572e3e303929e1b6d7267d43c0904dac3078f5 Mon Sep 17 00:00:00 2001 From: Dale Weiler Date: Sat, 23 Nov 2013 06:40:27 -0500 Subject: [PATCH] Fixed pow/exp using fast approximation until the fractional part is hit. Once fractional is reached a binary search is used to get close to the value. --- intrin.c | 511 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 404 insertions(+), 107 deletions(-) diff --git a/intrin.c b/intrin.c index d8442dc..8f0d818 100644 --- a/intrin.c +++ b/intrin.c @@ -58,147 +58,427 @@ static GMQCC_INLINE void intrin_reg(intrin_t *intrin, ast_value *const value, as vec_push(intrin->parser->globals, (ast_expression*)value); } -#define QC_M_E 2.71828182845905f +#define QC_M_E 2.718281828459045f +#define QC_POW_EPSILON 0.00001f static ast_expression *intrin_pow (intrin_t *intrin) { /* - * float pow(float x, float y) { - * float local = 1.0f; - * while (y > 0) { - * while (!(y & 1)) { - * y >>= 2; - * x *= x; + * + * float pow(float base, float exp) { + * float result; + * float low; + * float high; + * float mid; + * float square; + * float accumulate; + * + * if (exp == 0.0) + * return base; + * if (exp < 0) + * return 1.0 / pow(base, -exp); + * if (exp >= 1) { + * result = pow(base, exp / 2); + * return result * result; + * } + * + * low = 0.0f; + * high = 1.0f; + * square = sqrt(base); + * accumulate = square; + * mid = high / 2.0f + * + * while (fabs(mid - exp) > QC_POW_EPSILON) { + * square = sqrt(square); + * if (mid < exp) { + * low = mid; + * accumulate *= square; + * } else { + * high = mid; + * accumulate *= (1.0f / square); + * } + * mid = (low + high) / 2; * } - * y--; - * local *= x; - * } - * return local; + * return accumulate; * } */ ast_value *value = NULL; - ast_value *arg1 = ast_value_new(intrin_ctx(intrin), "x", TYPE_FLOAT); - ast_value *arg2 = ast_value_new(intrin_ctx(intrin), "y", TYPE_FLOAT); - ast_value *local = ast_value_new(intrin_ctx(intrin), "local", TYPE_FLOAT); - ast_block *body = ast_block_new(intrin_ctx(intrin)); - ast_block *l1b = ast_block_new(intrin_ctx(intrin)); /* loop 1 body */ - ast_block *l2b = ast_block_new(intrin_ctx(intrin)); /* loop 2 body */ - ast_loop *loop1 = NULL; - ast_loop *loop2 = NULL; - ast_function *func = intrin_value(intrin, &value, "pow", TYPE_FLOAT); + ast_function *func = intrin_value(intrin, &value, "pow", TYPE_FLOAT); + + /* prepare some calls for later */ + ast_call *callpow1 = ast_call_new(intrin_ctx(intrin), (ast_expression*)value); /* for pow(base, -exp) */ + ast_call *callpow2 = ast_call_new(intrin_ctx(intrin), (ast_expression*)value); /* for pow(vase, exp / 2) */ + ast_call *callsqrt1 = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "sqrt")); /* for sqrt(base) */ + ast_call *callsqrt2 = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "sqrt")); /* for sqrt(square) */ + ast_call *callfabs = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "fabs")); /* for fabs(mid - exp) */ + + /* prepare some blocks for later */ + ast_block *expgt1 = ast_block_new(intrin_ctx(intrin)); + ast_block *midltexp = ast_block_new(intrin_ctx(intrin)); + ast_block *midltexpelse = ast_block_new(intrin_ctx(intrin)); + ast_block *whileblock = ast_block_new(intrin_ctx(intrin)); + + /* float pow(float base, float exp) */ + ast_value *base = ast_value_new(intrin_ctx(intrin), "base", TYPE_FLOAT); + ast_value *exp = ast_value_new(intrin_ctx(intrin), "exp", TYPE_FLOAT); + /* { */ + ast_block *body = ast_block_new(intrin_ctx(intrin)); - /* arguments */ - vec_push(value->expression.params, arg1); - vec_push(value->expression.params, arg2); + /* + * float result; + * float low; + * float high; + * float square; + * float accumulate; + * float mid; + */ + ast_value *result = ast_value_new(intrin_ctx(intrin), "result", TYPE_FLOAT); + ast_value *low = ast_value_new(intrin_ctx(intrin), "low", TYPE_FLOAT); + ast_value *high = ast_value_new(intrin_ctx(intrin), "high", TYPE_FLOAT); + ast_value *square = ast_value_new(intrin_ctx(intrin), "square", TYPE_FLOAT); + ast_value *accumulate = ast_value_new(intrin_ctx(intrin), "accumulate", TYPE_FLOAT); + ast_value *mid = ast_value_new(intrin_ctx(intrin), "mid", TYPE_FLOAT); + vec_push(body->locals, result); + vec_push(body->locals, low); + vec_push(body->locals, high); + vec_push(body->locals, square); + vec_push(body->locals, accumulate); + vec_push(body->locals, mid); + + vec_push(value->expression.params, base); + vec_push(value->expression.params, exp); - /* local */ - vec_push(body->locals, local); + /* + * if (exp == 0.0) + * return base; + */ + vec_push(body->exprs, + (ast_expression*)ast_ifthen_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_EQ_F, + (ast_expression*)exp, + (ast_expression*)intrin->fold->imm_float[0] + ), + (ast_expression*)ast_return_new( + intrin_ctx(intrin), + (ast_expression*)base + ), + NULL + ) + ); + + /* = pow(base, -exp) */ + vec_push(callpow1->params, (ast_expression*)base); + vec_push(callpow1->params, + (ast_expression*)ast_unary_new( + intrin_ctx(intrin), + VINSTR_NEG_F, + (ast_expression*)exp + ) + ); - /* assignment to local of value 1.0f */ + /* + * if (exp < 0) + * return 1.0 / ; + */ vec_push(body->exprs, - (ast_expression*)ast_store_new ( + (ast_expression*)ast_ifthen_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_LT, + (ast_expression*)exp, + (ast_expression*)intrin->fold->imm_float[0] + ), + (ast_expression*)ast_return_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)intrin->fold->imm_float[1], + (ast_expression*)callpow1 + ) + ), + NULL + ) + ); + + /* = pow(base, exp / 2) */ + vec_push(callpow2->params, (ast_expression*)base); + vec_push(callpow2->params, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)exp, + (ast_expression*)fold_constgen_float(intrin->fold, 2.0f) + ) + ); + + /* + * = { + * result = ; + * return result * result; + * } + */ + vec_push(expgt1->exprs, + (ast_expression*)ast_store_new( intrin_ctx(intrin), INSTR_STORE_F, - (ast_expression*)local, - (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */ + (ast_expression*)result, + (ast_expression*)callpow2 + ) + ); + vec_push(expgt1->exprs, + (ast_expression*)ast_return_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_MUL_F, + (ast_expression*)result, + (ast_expression*)result + ) + ) + ); + + /* + * if (exp >= 1) { + * + * } + */ + vec_push(body->exprs, + (ast_expression*)ast_ifthen_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_GE, + (ast_expression*)exp, + (ast_expression*)intrin->fold->imm_float[1] + ), + (ast_expression*)expgt1, + NULL ) ); - /* y >>= 2 */ - vec_push(l2b->exprs, - (ast_expression*)ast_binstore_new ( + /* + * = sqrt(base) + */ + vec_push(callsqrt1->params, (ast_expression*)base); + + /* + * low = 0.0f; + * high = 1.0f; + * square = sqrt(base); + * accumulate = square; + * mid = high / 2.0f; + */ + vec_push(body->exprs, + (ast_expression*)ast_store_new(intrin_ctx(intrin), + INSTR_STORE_F, + (ast_expression*)low, + (ast_expression*)intrin->fold->imm_float[0] + ) + ); + vec_push(body->exprs, + (ast_expression*)ast_store_new( intrin_ctx(intrin), INSTR_STORE_F, - INSTR_MUL_F, - (ast_expression*)arg2, - (ast_expression*)fold_constgen_float(intrin->parser->fold, 0.25f) + (ast_expression*)high, + (ast_expression*)intrin->fold->imm_float[1] + ) + ); + vec_push(body->exprs, + (ast_expression*)ast_store_new( + intrin_ctx(intrin), + INSTR_STORE_F, + (ast_expression*)square, + (ast_expression*)callsqrt1 + ) + ); + vec_push(body->exprs, + (ast_expression*)ast_store_new( + intrin_ctx(intrin), + INSTR_STORE_F, + (ast_expression*)accumulate, + (ast_expression*)square + ) + ); + vec_push(body->exprs, + (ast_expression*)ast_store_new( + intrin_ctx(intrin), + INSTR_STORE_F, + (ast_expression*)mid, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)high, + (ast_expression*)fold_constgen_float(intrin->fold, 2.0f) + ) ) ); - /* x *= x */ - vec_push(l2b->exprs, - (ast_expression*)ast_binstore_new ( + /* + * = { + * low = mid; + * accumulate *= square; + * } + */ + vec_push(midltexp->exprs, + (ast_expression*)ast_store_new( + intrin_ctx(intrin), + INSTR_STORE_F, + (ast_expression*)low, + (ast_expression*)mid + ) + ); + vec_push(midltexp->exprs, + (ast_expression*)ast_binstore_new( intrin_ctx(intrin), INSTR_STORE_F, INSTR_MUL_F, - (ast_expression*)arg1, - (ast_expression*)arg1 + (ast_expression*)accumulate, + (ast_expression*)square ) ); - /* while (!(y&1)) */ - loop2 = ast_loop_new ( - intrin_ctx(intrin), - NULL, - (ast_expression*)ast_binary_new ( + /* + * = { + * high = mid; + * accumulate *= (1.0 / square); + * } + */ + vec_push(midltexpelse->exprs, + (ast_expression*)ast_store_new( intrin_ctx(intrin), - INSTR_AND, - (ast_expression*)arg2, - (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */ - ), - true, /* ! not */ - NULL, - false, - NULL, - (ast_expression*)l2b + INSTR_STORE_F, + (ast_expression*)high, + (ast_expression*)mid + ) + ); + vec_push(midltexpelse->exprs, + (ast_expression*)ast_binstore_new( + intrin_ctx(intrin), + INSTR_STORE_F, + INSTR_MUL_F, + (ast_expression*)accumulate, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)intrin->fold->imm_float[1], + (ast_expression*)square + ) + ) ); - /* push nested loop into loop expressions */ - vec_push(l1b->exprs, (ast_expression*)loop2); + /* + * = sqrt(square) + */ + vec_push(callsqrt2->params, (ast_expression*)square); - /* y-- */ - vec_push(l1b->exprs, - (ast_expression*)ast_binstore_new ( + /* + * = { + * square = ; + * if (mid < exp) + * ; + * else + * ; + * + * mid = (low + high) / 2; + * } + */ + vec_push(whileblock->exprs, + (ast_expression*)ast_store_new( intrin_ctx(intrin), INSTR_STORE_F, - INSTR_SUB_F, - (ast_expression*)arg2, - (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */ + (ast_expression*)square, + (ast_expression*)callsqrt2 ) ); - /* local *= x */ - vec_push(l1b->exprs, - (ast_expression*)ast_binstore_new ( + vec_push(whileblock->exprs, + (ast_expression*)ast_ifthen_new( + intrin_ctx(intrin), + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_LT, + (ast_expression*)mid, + (ast_expression*)exp + ), + (ast_expression*)midltexp, + (ast_expression*)midltexpelse + ) + ); + vec_push(whileblock->exprs, + (ast_expression*)ast_store_new( intrin_ctx(intrin), INSTR_STORE_F, - INSTR_MUL_F, - (ast_expression*)local, - (ast_expression*)arg1 + (ast_expression*)mid, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_ADD_F, + (ast_expression*)low, + (ast_expression*)high + ), + (ast_expression*)fold_constgen_float(intrin->fold, 2.0f) + ) ) ); - /* while (y > 0) */ - loop1 = ast_loop_new ( - intrin_ctx(intrin), - NULL, - (ast_expression*)ast_binary_new ( + /* + * = fabs(mid - exp) + */ + vec_push(callfabs->params, + (ast_expression*)ast_binary_new( intrin_ctx(intrin), - INSTR_GT, - (ast_expression*)arg2, - (ast_expression*)intrin->fold->imm_float[0] /* 0 == 0.0f */ - ), - false, - NULL, - false, - NULL, - (ast_expression*)l1b + INSTR_SUB_F, + (ast_expression*)mid, + (ast_expression*)exp + ) ); - /* push the loop1 into the body for the function */ - vec_push(body->exprs, (ast_expression*)loop1); + /* + * while ( > epsilon) + * + */ + vec_push(body->exprs, + (ast_expression*)ast_loop_new( + intrin_ctx(intrin), + /* init */ + NULL, + /* pre condition */ + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_GT, + (ast_expression*)callfabs, + (ast_expression*)fold_constgen_float(intrin->fold, QC_POW_EPSILON) + ), + /* pre not */ + false, + /* post condition */ + NULL, + /* post not */ + false, + /* increment expression */ + NULL, + /* code block */ + (ast_expression*)whileblock + ) + ); - /* return local; */ + /* return midvalue */ vec_push(body->exprs, - (ast_expression*)ast_return_new ( + (ast_expression*)ast_return_new( intrin_ctx(intrin), - (ast_expression*)local + (ast_expression*)accumulate ) ); - /* push block and register intrin for codegen */ + /* } */ vec_push(func->blocks, body); intrin_reg(intrin, value, func); - return (ast_expression*)value; } @@ -302,33 +582,51 @@ static ast_expression *intrin_mod(intrin_t *intrin) { static ast_expression *intrin_exp(intrin_t *intrin) { /* * float exp(float x) { - * return pow(QC_M_E, x); + * // mul 10 to round increments of 0.1f + * return floor((pow(QC_M_E, x) * 10) + 0.5) / 10; * } */ - ast_value *value = NULL; - ast_call *call = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "pow")); - ast_value *arg1 = ast_value_new(intrin_ctx(intrin), "x", TYPE_FLOAT); - ast_block *body = ast_block_new(intrin_ctx(intrin)); - ast_function *func = intrin_value(intrin, &value, "exp", TYPE_FLOAT); + ast_value *value = NULL; + ast_call *callpow = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "pow")); + ast_call *callfloor = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "floor")); + ast_value *arg1 = ast_value_new(intrin_ctx(intrin), "x", TYPE_FLOAT); + ast_block *body = ast_block_new(intrin_ctx(intrin)); + ast_function *func = intrin_value(intrin, &value, "exp", TYPE_FLOAT); - /* push arguments for params to call */ - vec_push(call->params, (ast_expression*)fold_constgen_float(intrin->fold, QC_M_E)); - vec_push(call->params, (ast_expression*)arg1); + vec_push(value->expression.params, arg1); + + vec_push(callpow->params, (ast_expression*)fold_constgen_float(intrin->fold, QC_M_E)); + vec_push(callpow->params, (ast_expression*)arg1); + vec_push(callfloor->params, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_ADD_F, + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_MUL_F, + (ast_expression*)callpow, + (ast_expression*)fold_constgen_float(intrin->fold, 10.0f) + ), + (ast_expression*)fold_constgen_float(intrin->fold, 0.5f) + ) + ); - /* return pow(QC_M_E, x) */ + /* return / 10.0f */ vec_push(body->exprs, (ast_expression*)ast_return_new( intrin_ctx(intrin), - (ast_expression*)call + (ast_expression*)ast_binary_new( + intrin_ctx(intrin), + INSTR_DIV_F, + (ast_expression*)callfloor, + (ast_expression*)fold_constgen_float(intrin->fold, 10.0f) + ) ) ); - vec_push(value->expression.params, arg1); /* float x (for param) */ - - vec_push(func->blocks, body); /* {{{ body }}} */ + vec_push(func->blocks, body); /* {{{ body }}} */ intrin_reg(intrin, value, func); - return (ast_expression*)value; } @@ -399,10 +697,9 @@ static ast_expression *intrin_fabs(intrin_t *intrin) { (ast_expression*)arg1, (ast_expression*)intrin->fold->imm_float[0] ), - (ast_expression*)ast_binary_new( + (ast_expression*)ast_unary_new( intrin_ctx(intrin), - INSTR_SUB_F, - (ast_expression*)intrin->fold->imm_float[0], + VINSTR_NEG_F, (ast_expression*)arg1 ), (ast_expression*)arg1 -- 2.39.2