]> git.rm.cloudns.org Git - xonotic/gmqcc.git/commitdiff
Fixed pow/exp using fast approximation until the fractional part is hit. Once fractio...
authorDale Weiler <killfieldengine@gmail.com>
Sat, 23 Nov 2013 11:40:27 +0000 (06:40 -0500)
committerDale Weiler <killfieldengine@gmail.com>
Sat, 23 Nov 2013 11:40:27 +0000 (06:40 -0500)
intrin.c

index d8442dc71aec519b7975d95f733f95c9452abe5e..8f0d818d6ec284849688227695af0045e2d13e32 100644 (file)
--- a/intrin.c
+++ b/intrin.c
@@ -58,147 +58,427 @@ static GMQCC_INLINE void intrin_reg(intrin_t *intrin, ast_value *const value, as
     vec_push(intrin->parser->globals,   (ast_expression*)value);
 }
 
-#define QC_M_E 2.71828182845905f
+#define QC_M_E         2.718281828459045f
+#define QC_POW_EPSILON 0.00001f
 
 static ast_expression *intrin_pow (intrin_t *intrin) {
     /*
-     * float pow(float x, float y) {
-     *   float local = 1.0f;
-     *   while (y > 0) {
-     *     while (!(y & 1)) {
-     *       y >>= 2;
-     *       x *=  x;
+     *
+     * float pow(float base, float exp) {
+     *     float result;
+     *     float low;
+     *     float high;
+     *     float mid;
+     *     float square;
+     *     float accumulate;
+     *
+     *     if (exp == 0.0)
+     *         return base;
+     *     if (exp < 0)
+     *         return 1.0 / pow(base, -exp);
+     *     if (exp >= 1) {
+     *         result = pow(base, exp / 2);
+     *         return result * result;
+     *     }
+     *
+     *     low        = 0.0f;
+     *     high       = 1.0f;
+     *     square     = sqrt(base);
+     *     accumulate = square;
+     *     mid        = high / 2.0f
+     *
+     *     while (fabs(mid - exp) > QC_POW_EPSILON) {
+     *         square = sqrt(square);
+     *         if (mid < exp) {
+     *             low         = mid;
+     *             accumulate *= square;
+     *         } else {
+     *             high        = mid;
+     *             accumulate *= (1.0f / square);
+     *         }
+     *         mid = (low + high) / 2;
      *     }
-     *     y--;
-     *     local *= x;
-     *   }
-     *   return local;
+     *     return accumulate;
      * }
      */
     ast_value    *value = NULL;
-    ast_value    *arg1  = ast_value_new(intrin_ctx(intrin), "x",     TYPE_FLOAT);
-    ast_value    *arg2  = ast_value_new(intrin_ctx(intrin), "y",     TYPE_FLOAT);
-    ast_value    *local = ast_value_new(intrin_ctx(intrin), "local", TYPE_FLOAT);
-    ast_block    *body  = ast_block_new(intrin_ctx(intrin));
-    ast_block    *l1b   = ast_block_new(intrin_ctx(intrin)); /* loop 1 body */
-    ast_block    *l2b   = ast_block_new(intrin_ctx(intrin)); /* loop 2 body */
-    ast_loop     *loop1 = NULL;
-    ast_loop     *loop2 = NULL;
-    ast_function *func  = intrin_value(intrin, &value, "pow", TYPE_FLOAT);
+    ast_function *func = intrin_value(intrin, &value, "pow", TYPE_FLOAT);
+
+    /* prepare some calls for later */
+    ast_call *callpow1  = ast_call_new(intrin_ctx(intrin), (ast_expression*)value);      /* for pow(base, -exp)    */
+    ast_call *callpow2  = ast_call_new(intrin_ctx(intrin), (ast_expression*)value);      /* for pow(vase, exp / 2) */
+    ast_call *callsqrt1 = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "sqrt")); /* for sqrt(base)         */
+    ast_call *callsqrt2 = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "sqrt")); /* for sqrt(square)       */
+    ast_call *callfabs  = ast_call_new(intrin_ctx(intrin), intrin_func(intrin, "fabs")); /* for fabs(mid - exp)    */
+
+    /* prepare some blocks for later */
+    ast_block *expgt1       = ast_block_new(intrin_ctx(intrin));
+    ast_block *midltexp     = ast_block_new(intrin_ctx(intrin));
+    ast_block *midltexpelse = ast_block_new(intrin_ctx(intrin));
+    ast_block *whileblock   = ast_block_new(intrin_ctx(intrin));
+
+    /* float pow(float base, float exp) */
+    ast_value    *base = ast_value_new(intrin_ctx(intrin), "base", TYPE_FLOAT);
+    ast_value    *exp  = ast_value_new(intrin_ctx(intrin), "exp",  TYPE_FLOAT);
+    /* { */
+    ast_block    *body = ast_block_new(intrin_ctx(intrin));
 
-    /* arguments */
-    vec_push(value->expression.params, arg1);
-    vec_push(value->expression.params, arg2);
+    /*
+     * float result;
+     * float low;
+     * float high;
+     * float square;
+     * float accumulate;
+     * float mid;
+     */
+    ast_value *result     = ast_value_new(intrin_ctx(intrin), "result",     TYPE_FLOAT);
+    ast_value *low        = ast_value_new(intrin_ctx(intrin), "low",        TYPE_FLOAT);
+    ast_value *high       = ast_value_new(intrin_ctx(intrin), "high",       TYPE_FLOAT);
+    ast_value *square     = ast_value_new(intrin_ctx(intrin), "square",     TYPE_FLOAT);
+    ast_value *accumulate = ast_value_new(intrin_ctx(intrin), "accumulate", TYPE_FLOAT);
+    ast_value *mid        = ast_value_new(intrin_ctx(intrin), "mid",        TYPE_FLOAT);
+    vec_push(body->locals, result);
+    vec_push(body->locals, low);
+    vec_push(body->locals, high);
+    vec_push(body->locals, square);
+    vec_push(body->locals, accumulate);
+    vec_push(body->locals, mid);
+
+    vec_push(value->expression.params, base);
+    vec_push(value->expression.params, exp);
 
-    /* local */
-    vec_push(body->locals, local);
+    /*
+     * if (exp == 0.0)
+     *     return base;
+     */
+    vec_push(body->exprs,
+        (ast_expression*)ast_ifthen_new(
+            intrin_ctx(intrin),
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_EQ_F,
+                (ast_expression*)exp,
+                (ast_expression*)intrin->fold->imm_float[0]
+            ),
+            (ast_expression*)ast_return_new(
+                intrin_ctx(intrin),
+                (ast_expression*)base
+            ),
+            NULL
+        )
+    );
+
+    /* <callpow1> = pow(base, -exp) */
+    vec_push(callpow1->params, (ast_expression*)base);
+    vec_push(callpow1->params,
+        (ast_expression*)ast_unary_new(
+            intrin_ctx(intrin),
+            VINSTR_NEG_F,
+            (ast_expression*)exp
+        )
+    );
 
-    /* assignment to local of value 1.0f */
+    /*
+     * if (exp < 0)
+     *     return 1.0 / <callpow1>;
+     */
     vec_push(body->exprs,
-        (ast_expression*)ast_store_new (
+        (ast_expression*)ast_ifthen_new(
+            intrin_ctx(intrin),
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_LT,
+                (ast_expression*)exp,
+                (ast_expression*)intrin->fold->imm_float[0]
+            ),
+            (ast_expression*)ast_return_new(
+                intrin_ctx(intrin),
+                (ast_expression*)ast_binary_new(
+                    intrin_ctx(intrin),
+                    INSTR_DIV_F,
+                    (ast_expression*)intrin->fold->imm_float[1],
+                    (ast_expression*)callpow1
+                )
+            ),
+            NULL
+        )
+    );
+
+    /* <callpow2> = pow(base, exp / 2) */
+    vec_push(callpow2->params, (ast_expression*)base);
+    vec_push(callpow2->params,
+        (ast_expression*)ast_binary_new(
+            intrin_ctx(intrin),
+            INSTR_DIV_F,
+            (ast_expression*)exp,
+            (ast_expression*)fold_constgen_float(intrin->fold, 2.0f)
+        )
+    );
+
+    /*
+     * <expgt1> = {
+     *     result = <callpow2>;
+     *     return result * result;
+     * }
+     */
+    vec_push(expgt1->exprs,
+        (ast_expression*)ast_store_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
-            (ast_expression*)local,
-            (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */
+            (ast_expression*)result,
+            (ast_expression*)callpow2
+        )
+    );
+    vec_push(expgt1->exprs,
+        (ast_expression*)ast_return_new(
+            intrin_ctx(intrin),
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_MUL_F,
+                (ast_expression*)result,
+                (ast_expression*)result
+            )
+        )
+    );
+
+    /*
+     * if (exp >= 1) {
+     *     <expgt1>
+     * }
+     */
+    vec_push(body->exprs,
+        (ast_expression*)ast_ifthen_new(
+            intrin_ctx(intrin),
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_GE,
+                (ast_expression*)exp,
+                (ast_expression*)intrin->fold->imm_float[1]
+            ),
+            (ast_expression*)expgt1,
+            NULL
         )
     );
 
-    /* y >>= 2 */
-    vec_push(l2b->exprs,
-        (ast_expression*)ast_binstore_new (
+    /*
+     * <callsqrt1> = sqrt(base)
+     */
+    vec_push(callsqrt1->params, (ast_expression*)base);
+
+    /*
+     * low        = 0.0f;
+     * high       = 1.0f;
+     * square     = sqrt(base);
+     * accumulate = square;
+     * mid        = high / 2.0f;
+     */
+    vec_push(body->exprs,
+        (ast_expression*)ast_store_new(intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)low,
+            (ast_expression*)intrin->fold->imm_float[0]
+        )
+    );
+    vec_push(body->exprs,
+        (ast_expression*)ast_store_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
-            INSTR_MUL_F,
-            (ast_expression*)arg2,
-            (ast_expression*)fold_constgen_float(intrin->parser->fold, 0.25f)
+            (ast_expression*)high,
+            (ast_expression*)intrin->fold->imm_float[1]
+        )
+    );
+    vec_push(body->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)square,
+            (ast_expression*)callsqrt1
+        )
+    );
+    vec_push(body->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)accumulate,
+            (ast_expression*)square
+        )
+    );
+    vec_push(body->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)mid,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_DIV_F,
+                (ast_expression*)high,
+                (ast_expression*)fold_constgen_float(intrin->fold, 2.0f)
+            )
         )
     );
 
-    /* x *= x */
-    vec_push(l2b->exprs,
-        (ast_expression*)ast_binstore_new (
+    /*
+     * <midltexp> = {
+     *     low         = mid;
+     *     accumulate *= square;
+     * }
+     */
+    vec_push(midltexp->exprs,
+        (ast_expression*)ast_store_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            (ast_expression*)low,
+            (ast_expression*)mid
+        )
+    );
+    vec_push(midltexp->exprs,
+        (ast_expression*)ast_binstore_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
             INSTR_MUL_F,
-            (ast_expression*)arg1,
-            (ast_expression*)arg1
+            (ast_expression*)accumulate,
+            (ast_expression*)square
         )
     );
 
-    /* while (!(y&1)) */
-    loop2 = ast_loop_new (
-        intrin_ctx(intrin),
-        NULL,
-        (ast_expression*)ast_binary_new (
+    /*
+     * <midltexpelse> = {
+     *     high        = mid;
+     *     accumulate *= (1.0 / square);
+     * }
+     */
+    vec_push(midltexpelse->exprs,
+        (ast_expression*)ast_store_new(
             intrin_ctx(intrin),
-            INSTR_AND,
-            (ast_expression*)arg2,
-            (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */
-        ),
-        true, /* ! not */
-        NULL,
-        false,
-        NULL,
-        (ast_expression*)l2b
+            INSTR_STORE_F,
+            (ast_expression*)high,
+            (ast_expression*)mid
+        )
+    );
+    vec_push(midltexpelse->exprs,
+        (ast_expression*)ast_binstore_new(
+            intrin_ctx(intrin),
+            INSTR_STORE_F,
+            INSTR_MUL_F,
+            (ast_expression*)accumulate,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_DIV_F,
+                (ast_expression*)intrin->fold->imm_float[1],
+                (ast_expression*)square
+            )
+        )
     );
 
-    /* push nested loop into loop expressions */
-    vec_push(l1b->exprs, (ast_expression*)loop2);
+    /*
+     * <callsqrt2> = sqrt(square)
+     */
+    vec_push(callsqrt2->params, (ast_expression*)square);
 
-    /* y-- */
-    vec_push(l1b->exprs,
-        (ast_expression*)ast_binstore_new (
+    /*
+     * <whileblock> = {
+     *     square = <callsqrt2>;
+     *     if (mid < exp)
+     *          <midltexp>;
+     *     else
+     *          <midltexpelse>;
+     *
+     *     mid = (low + high) / 2;
+     * }
+     */
+    vec_push(whileblock->exprs,
+        (ast_expression*)ast_store_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
-            INSTR_SUB_F,
-            (ast_expression*)arg2,
-            (ast_expression*)intrin->fold->imm_float[1] /* 1 == 1.0f */
+            (ast_expression*)square,
+            (ast_expression*)callsqrt2
         )
     );
-    /* local *= x */
-    vec_push(l1b->exprs,
-        (ast_expression*)ast_binstore_new (
+    vec_push(whileblock->exprs,
+        (ast_expression*)ast_ifthen_new(
+            intrin_ctx(intrin),
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_LT,
+                (ast_expression*)mid,
+                (ast_expression*)exp
+            ),
+            (ast_expression*)midltexp,
+            (ast_expression*)midltexpelse
+        )
+    );
+    vec_push(whileblock->exprs,
+        (ast_expression*)ast_store_new(
             intrin_ctx(intrin),
             INSTR_STORE_F,
-            INSTR_MUL_F,
-            (ast_expression*)local,
-            (ast_expression*)arg1
+            (ast_expression*)mid,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_DIV_F,
+                (ast_expression*)ast_binary_new(
+                    intrin_ctx(intrin),
+                    INSTR_ADD_F,
+                    (ast_expression*)low,
+                    (ast_expression*)high
+                ),
+                (ast_expression*)fold_constgen_float(intrin->fold, 2.0f)
+            )
         )
     );
 
-    /* while (y > 0) */
-    loop1 = ast_loop_new (
-        intrin_ctx(intrin),
-        NULL,
-        (ast_expression*)ast_binary_new (
+    /*
+     * <callabs> = fabs(mid - exp)
+     */
+    vec_push(callfabs->params,
+        (ast_expression*)ast_binary_new(
             intrin_ctx(intrin),
-            INSTR_GT,
-            (ast_expression*)arg2,
-            (ast_expression*)intrin->fold->imm_float[0] /* 0 == 0.0f */
-        ),
-        false,
-        NULL,
-        false,
-        NULL,
-        (ast_expression*)l1b
+            INSTR_SUB_F,
+            (ast_expression*)mid,
+            (ast_expression*)exp
+        )
     );
 
-    /* push the loop1 into the body for the function */
-    vec_push(body->exprs, (ast_expression*)loop1);
+    /*
+     * while (<callfabs>  > epsilon)
+     *     <whileblock>
+     */
+    vec_push(body->exprs,
+        (ast_expression*)ast_loop_new(
+            intrin_ctx(intrin),
+            /* init */
+            NULL,
+            /* pre condition */
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_GT,
+                (ast_expression*)callfabs,
+                (ast_expression*)fold_constgen_float(intrin->fold, QC_POW_EPSILON)
+            ),
+            /* pre not */
+            false,
+            /* post condition */
+            NULL,
+            /* post not */
+            false,
+            /* increment expression */
+            NULL,
+            /* code block */
+            (ast_expression*)whileblock
+        )
+    );
 
-    /* return local; */
+    /* return midvalue */
     vec_push(body->exprs,
-        (ast_expression*)ast_return_new (
+        (ast_expression*)ast_return_new(
             intrin_ctx(intrin),
-            (ast_expression*)local
+            (ast_expression*)accumulate
         )
     );
 
-    /* push block and register intrin for codegen */
+    /* } */
     vec_push(func->blocks, body);
 
     intrin_reg(intrin, value, func);
-
     return (ast_expression*)value;
 }
 
@@ -302,33 +582,51 @@ static ast_expression *intrin_mod(intrin_t *intrin) {
 static ast_expression *intrin_exp(intrin_t *intrin) {
     /*
      * float exp(float x) {
-     *     return pow(QC_M_E, x);
+     *     // mul 10 to round increments of 0.1f
+     *     return floor((pow(QC_M_E, x) * 10) + 0.5) / 10;
      * }
      */
-    ast_value    *value = NULL;
-    ast_call     *call  = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "pow"));
-    ast_value    *arg1  = ast_value_new(intrin_ctx(intrin), "x", TYPE_FLOAT);
-    ast_block    *body  = ast_block_new(intrin_ctx(intrin));
-    ast_function *func  = intrin_value(intrin, &value, "exp", TYPE_FLOAT);
+    ast_value    *value     = NULL;
+    ast_call     *callpow   = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "pow"));
+    ast_call     *callfloor = ast_call_new (intrin_ctx(intrin), intrin_func(intrin, "floor"));
+    ast_value    *arg1      = ast_value_new(intrin_ctx(intrin), "x", TYPE_FLOAT);
+    ast_block    *body      = ast_block_new(intrin_ctx(intrin));
+    ast_function *func      = intrin_value(intrin, &value, "exp", TYPE_FLOAT);
 
-    /* push arguments for params to call */
-    vec_push(call->params, (ast_expression*)fold_constgen_float(intrin->fold, QC_M_E));
-    vec_push(call->params, (ast_expression*)arg1);
+    vec_push(value->expression.params, arg1);
+
+    vec_push(callpow->params, (ast_expression*)fold_constgen_float(intrin->fold, QC_M_E));
+    vec_push(callpow->params, (ast_expression*)arg1);
+    vec_push(callfloor->params,
+        (ast_expression*)ast_binary_new(
+            intrin_ctx(intrin),
+            INSTR_ADD_F,
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_MUL_F,
+                (ast_expression*)callpow,
+                (ast_expression*)fold_constgen_float(intrin->fold, 10.0f)
+            ),
+            (ast_expression*)fold_constgen_float(intrin->fold, 0.5f)
+        )
+    );
 
-    /* return pow(QC_M_E, x) */
+    /* return <callfloor> / 10.0f */
     vec_push(body->exprs,
         (ast_expression*)ast_return_new(
             intrin_ctx(intrin),
-            (ast_expression*)call
+            (ast_expression*)ast_binary_new(
+                intrin_ctx(intrin),
+                INSTR_DIV_F,
+                (ast_expression*)callfloor,
+                (ast_expression*)fold_constgen_float(intrin->fold, 10.0f)
+            )
         )
     );
 
-    vec_push(value->expression.params, arg1); /* float x (for param) */
-
-    vec_push(func->blocks,             body); /* {{{ body }}} */
+    vec_push(func->blocks, body); /* {{{ body }}} */
 
     intrin_reg(intrin, value, func);
-
     return (ast_expression*)value;
 }
 
@@ -399,10 +697,9 @@ static ast_expression *intrin_fabs(intrin_t *intrin) {
                     (ast_expression*)arg1,
                     (ast_expression*)intrin->fold->imm_float[0]
                 ),
-                (ast_expression*)ast_binary_new(
+                (ast_expression*)ast_unary_new(
                     intrin_ctx(intrin),
-                    INSTR_SUB_F,
-                    (ast_expression*)intrin->fold->imm_float[0],
+                    VINSTR_NEG_F,
                     (ast_expression*)arg1
                 ),
                 (ast_expression*)arg1