]> git.rm.cloudns.org Git - xonotic/darkplaces.git/commitdiff
Improve glsl shader performance (#211)
authoruis246 <uis9936@gmail.com>
Fri, 8 Nov 2024 13:17:30 +0000 (16:17 +0300)
committerGitHub <noreply@github.com>
Fri, 8 Nov 2024 13:17:30 +0000 (08:17 -0500)
Most optimizations are done by abusing mathematical properties and
replacing slow on some hardware(e.g. Inetl) min and max with saturation.
For example dot product is in range [-1, 1], so `max(0, dot(...))` =
`sat(dot(...))`.
In places where this potentially can mess with HDR comments were added.

shader_glsl.h

index db72342bb6c31735bde60a0f29e65335d40a887b..dc56d085033a7d52f35384bd8f7aeffeb794dc5b 100644 (file)
 "# endif\n",
 "#endif\n",
 "\n",
+"#define sat(x) clamp(x, 0, 1)\n",
+"#define possat(x) sat(x)\n",//As replacement of max(x, 0) when x<=1, better for gpus that can't do 0-cycle max(x, 0)
+"#define minonesat(x) sat(x)\n",//As replacement of min(x, 1) when x>=0, better for gpus that can't do 0-cycle min(x, 1)
+"#define possatdot(x, y) possat(dot(x, y))\n",
+"\n",
 "#ifdef USECELSHADING\n",
-"# define SHADEDIFFUSE myhalf diffuse = cast_myhalf(min(max(float(dot(surfacenormal, lightnormal)) * 2.0, 0.0), 1.0));\n",
+"# define SHADEDIFFUSE myhalf diffuse = cast_myhalf(sat(float(dot(surfacenormal, lightnormal)) * 2.0));\n",
 "# ifdef USEEXACTSPECULARMATH\n",
-"#  define SHADESPECULAR(specpow) myhalf specular = pow(cast_myhalf(max(float(dot(reflect(lightnormal, surfacenormal), eyenormal))*-1.0, 0.0)), 1.0 + specpow);specular = max(0.0, specular * 10.0 - 9.0);\n",
+"#  define SHADESPECULAR(specpow) myhalf specular = pow(cast_myhalf(float(possatdot(reflect(lightnormal, surfacenormal), -eyenormal))), 1.0 + specpow);specular = possat(specular * 10.0 - 9.0);\n",
 "# else\n",
-"#  define SHADESPECULAR(specpow) myhalf3 specularnormal = normalize(lightnormal + eyenormal);myhalf specular = pow(cast_myhalf(max(float(dot(surfacenormal, specularnormal)), 0.0)), 1.0 + specpow);specular = max(0.0, specular * 10.0 - 9.0);\n",
+"#  define SHADESPECULAR(specpow) myhalf3 specularnormal = normalize(lightnormal + eyenormal);myhalf specular = pow(cast_myhalf(float(possatdot(surfacenormal, specularnormal))), 1.0 + specpow);specular = possat(specular * 10.0 - 9.0);\n",
 "# endif\n",
 "#else\n",
-"# define SHADEDIFFUSE myhalf diffuse = cast_myhalf(max(float(dot(surfacenormal, lightnormal)), 0.0));\n",
+"# define SHADEDIFFUSE myhalf diffuse = cast_myhalf(float(possatdot(surfacenormal, lightnormal)));\n",
 "# ifdef USEEXACTSPECULARMATH\n",
-"#  define SHADESPECULAR(specpow) myhalf specular = pow(cast_myhalf(max(float(dot(reflect(lightnormal, surfacenormal), eyenormal))*-1.0, 0.0)), 1.0 + specpow);\n",
+"#  define SHADESPECULAR(specpow) myhalf specular = pow(cast_myhalf(float(possatdot(reflect(lightnormal, surfacenormal), -eyenormal))), 1.0 + specpow);\n",
 "# else\n",
-"#  define SHADESPECULAR(specpow) myhalf3 specularnormal = normalize(lightnormal + eyenormal);myhalf specular = pow(cast_myhalf(max(float(dot(surfacenormal, specularnormal)), 0.0)), 1.0 + specpow);\n",
+"#  define SHADESPECULAR(specpow) myhalf3 specularnormal = normalize(lightnormal + eyenormal);myhalf specular = pow(cast_myhalf(float(possatdot(surfacenormal, specularnormal))), 1.0 + specpow);\n",
 "# endif\n",
 "#endif\n",
 "\n",
@@ -90,7 +95,7 @@
 "#ifdef USEDEPTHRGB\n",
 "      // for 565 RGB we'd need to use different multipliers\n",
 "#define decodedepthmacro(d) dot((d).rgb, vec3(1.0, 255.0 / 65536.0, 255.0 / 16777215.0))\n",
-"#define encodedepthmacro(d) (vec4(d, d*256.0, d*65536.0, 0.0) - floor(vec4(d, d*256.0, d*65536.0, 0.0)))\n",
+"#define encodedepthmacro(d) fract(vec4(d, d*256.0, d*65536.0, 0.0))\n",
 "#endif\n",
 "\n",
 "#ifdef VERTEX_SHADER\n",
 "#endif\n",
 "\n",
 "#ifdef MODE_DEPTH_OR_SHADOW\n",
-"dp_varying highp float Depth;\n",
 "#ifdef VERTEX_SHADER\n",
 "void main(void)\n",
 "{\n",
 "#ifdef USETRIPPY\n",
 "      gl_Position = TrippyVertex(gl_Position);\n",
 "#endif\n",
-"      Depth = gl_Position.z;\n",
 "}\n",
 "#endif\n",
 "\n",
 "void main(void)\n",
 "{\n",
 "#ifdef USEDEPTHRGB\n",
-"      dp_FragColor = encodedepthmacro(Depth);\n",
+"      dp_FragColor = encodedepthmacro(gl_FragCoord.z);\n",
 "#else\n",
 "      dp_FragColor = vec4(1.0,1.0,1.0,1.0);\n",
 "#endif\n",
 "\n",
 "\n",
 "#ifdef MODE_POSTPROCESS\n",
+"#ifdef USEBLOOM\n",
+"dp_varying mediump vec4 TexCoord1;\n",
+"#else\n",
 "dp_varying mediump vec2 TexCoord1;\n",
-"dp_varying mediump vec2 TexCoord2;\n",
+"#endif\n",
 "\n",
 "#ifdef VERTEX_SHADER\n",
 "void main(void)\n",
 "{\n",
 "      gl_Position = ModelViewProjectionMatrix * Attrib_Position;\n",
-"      TexCoord1 = Attrib_TexCoord0.xy;\n",
+"      TexCoord1.xy = Attrib_TexCoord0.xy;\n",
 "#ifdef USEBLOOM\n",
-"      TexCoord2 = Attrib_TexCoord4.xy;\n",
+"      TexCoord1.zw = Attrib_TexCoord4.xy;\n",
 "#endif\n",
 "}\n",
 "#endif\n",
 "      float minreduct = (1.0 / 128.0);\n",
 "\n",
 "      // directions\n",
-"      vec3 NW = dp_texture2D(Texture_First, TexCoord1 + (vec2(-1.0, -1.0) * PixelSize)).xyz;\n",
-"      vec3 NE = dp_texture2D(Texture_First, TexCoord1 + (vec2(+1.0, -1.0) * PixelSize)).xyz;\n",
-"      vec3 SW = dp_texture2D(Texture_First, TexCoord1 + (vec2(-1.0, +1.0) * PixelSize)).xyz;\n",
-"      vec3 SE = dp_texture2D(Texture_First, TexCoord1 + (vec2(+1.0, +1.0) * PixelSize)).xyz;\n",
-"      vec3 M = dp_texture2D(Texture_First, TexCoord1).xyz;\n",
+"      vec3 NW = dp_texture2D(Texture_First, TexCoord1.xy + (vec2(-1.0, -1.0) * PixelSize)).xyz;\n",
+"      vec3 NE = dp_texture2D(Texture_First, TexCoord1.xy + (vec2(+1.0, -1.0) * PixelSize)).xyz;\n",
+"      vec3 SW = dp_texture2D(Texture_First, TexCoord1.xy + (vec2(-1.0, +1.0) * PixelSize)).xyz;\n",
+"      vec3 SE = dp_texture2D(Texture_First, TexCoord1.xy + (vec2(+1.0, +1.0) * PixelSize)).xyz;\n",
+"      vec3 M = dp_texture2D(Texture_First, TexCoord1.xy).xyz;\n",
 "\n",
 "      // luminance directions\n",
 "      vec3 luma = vec3(0.299, 0.587, 0.114);\n",
 "      dir = min(vec2(maxspan, maxspan), max(vec2(-maxspan, -maxspan), dir * rcp)) * PixelSize;\n",
 "\n",
 "      vec3 rA = (1.0/2.0) * (\n",
-"              dp_texture2D(Texture_First, TexCoord1 + dir * (1.0/3.0 - 0.5)).xyz +\n",
-"              dp_texture2D(Texture_First, TexCoord1 + dir * (2.0/3.0 - 0.5)).xyz);\n",
+"              dp_texture2D(Texture_First, TexCoord1.xy + dir * (1.0/3.0 - 0.5)).xyz +\n",
+"              dp_texture2D(Texture_First, TexCoord1.xy + dir * (2.0/3.0 - 0.5)).xyz);\n",
 "      vec3 rB = rA * (1.0/2.0) + (1.0/4.0) * (\n",
-"              dp_texture2D(Texture_First, TexCoord1 + dir * (0.0/3.0 - 0.5)).xyz +\n",
-"              dp_texture2D(Texture_First, TexCoord1 + dir * (3.0/3.0 - 0.5)).xyz);\n",
+"              dp_texture2D(Texture_First, TexCoord1.xy + dir * (0.0/3.0 - 0.5)).xyz +\n",
+"              dp_texture2D(Texture_First, TexCoord1.xy + dir * (3.0/3.0 - 0.5)).xyz);\n",
 "      float lB = dot(rB, luma);\n",
 "\n",
 "      ret.xyz = ((lB < lMin) || (lB > lMax)) ? rA : rB;\n",
 "{\n",
 "#ifdef USECOLORFRINGE\n",
 "      float fringe = ColorFringe;//.0033f;\n",
-"      float amount = distance(TexCoord1, vec2(.5f,.5f));\n",
+"      float amount = distance(TexCoord1.xy, vec2(.5f,.5f));\n",
 "      vec2 offset = vec2(amount*fringe,amount*fringe);\n",
-"      dp_FragColor.xy = dp_texture2D(Texture_First, TexCoord1-offset).xy;\n",
-"      dp_FragColor.z = dp_texture2D(Texture_First, TexCoord1+offset).z;\n",
+"      dp_FragColor.xy = dp_texture2D(Texture_First, TexCoord1.xy-offset).xy;\n",
+"      dp_FragColor.z = dp_texture2D(Texture_First, TexCoord1.xy+offset).z;\n",
 "#else\n",
-"      dp_FragColor = dp_texture2D(Texture_First, TexCoord1);\n",
+"      dp_FragColor = dp_texture2D(Texture_First, TexCoord1.xy);\n",
 "#endif\n",
 "\n",
 "#ifdef USEFXAA\n",
 "      // vec2 ts = textureSize(Texture_First, 0);\n",
 "      // vec2 px = vec2(1/ts.x, 1/ts.y);\n",
 "      vec2 px = PixelSize;\n",
-"      vec3 x1 = dp_texture2D(Texture_First, TexCoord1 + vec2(-px.x, px.y)).rgb;\n",
-"      vec3 x2 = dp_texture2D(Texture_First, TexCoord1 + vec2(-px.x,  0.0)).rgb;\n",
-"      vec3 x3 = dp_texture2D(Texture_First, TexCoord1 + vec2(-px.x,-px.y)).rgb;\n",
-"      vec3 x4 = dp_texture2D(Texture_First, TexCoord1 + vec2( px.x, px.y)).rgb;\n",
-"      vec3 x5 = dp_texture2D(Texture_First, TexCoord1 + vec2( px.x,  0.0)).rgb;\n",
-"      vec3 x6 = dp_texture2D(Texture_First, TexCoord1 + vec2( px.x,-px.y)).rgb;\n",
-"      vec3 y1 = dp_texture2D(Texture_First, TexCoord1 + vec2( px.x,-px.y)).rgb;\n",
-"      vec3 y2 = dp_texture2D(Texture_First, TexCoord1 + vec2(  0.0,-px.y)).rgb;\n",
-"      vec3 y3 = dp_texture2D(Texture_First, TexCoord1 + vec2(-px.x,-px.y)).rgb;\n",
-"      vec3 y4 = dp_texture2D(Texture_First, TexCoord1 + vec2( px.x, px.y)).rgb;\n",
-"      vec3 y5 = dp_texture2D(Texture_First, TexCoord1 + vec2(  0.0, px.y)).rgb;\n",
-"      vec3 y6 = dp_texture2D(Texture_First, TexCoord1 + vec2(-px.x, px.y)).rgb;\n",
+"      vec3 x1 = dp_texture2D(Texture_First, TexCoord1.xy + vec2(-px.x, px.y)).rgb;\n",
+"      vec3 x2 = dp_texture2D(Texture_First, TexCoord1.xy + vec2(-px.x,  0.0)).rgb;\n",
+"      vec3 x3 = dp_texture2D(Texture_First, TexCoord1.xy + vec2(-px.x,-px.y)).rgb;\n",
+"      vec3 x4 = dp_texture2D(Texture_First, TexCoord1.xy + vec2( px.x, px.y)).rgb;\n",
+"      vec3 x5 = dp_texture2D(Texture_First, TexCoord1.xy + vec2( px.x,  0.0)).rgb;\n",
+"      vec3 x6 = dp_texture2D(Texture_First, TexCoord1.xy + vec2( px.x,-px.y)).rgb;\n",
+"      vec3 y2 = dp_texture2D(Texture_First, TexCoord1.xy + vec2(  0.0,-px.y)).rgb;\n",
+"      vec3 y5 = dp_texture2D(Texture_First, TexCoord1.xy + vec2(  0.0, px.y)).rgb;\n",
 "      float px1 = -1.0 * dot(vec3(0.3, 0.59, 0.11), x1);\n",
 "      float px2 = -2.0 * dot(vec3(0.3, 0.59, 0.11), x2);\n",
 "      float px3 = -1.0 * dot(vec3(0.3, 0.59, 0.11), x3);\n",
 "      float px4 =  1.0 * dot(vec3(0.3, 0.59, 0.11), x4);\n",
 "      float px5 =  2.0 * dot(vec3(0.3, 0.59, 0.11), x5);\n",
 "      float px6 =  1.0 * dot(vec3(0.3, 0.59, 0.11), x6);\n",
-"      float py1 = -1.0 * dot(vec3(0.3, 0.59, 0.11), y1);\n",
+"      float py1 = -px6;\n",
 "      float py2 = -2.0 * dot(vec3(0.3, 0.59, 0.11), y2);\n",
-"      float py3 = -1.0 * dot(vec3(0.3, 0.59, 0.11), y3);\n",
-"      float py4 =  1.0 * dot(vec3(0.3, 0.59, 0.11), y4);\n",
+"      float py3 =  px3;\n",
+"      float py4 =  px4;\n",
 "      float py5 =  2.0 * dot(vec3(0.3, 0.59, 0.11), y5);\n",
-"      float py6 =  1.0 * dot(vec3(0.3, 0.59, 0.11), y6);\n",
-"      sobel = 0.25 * abs(px1 + px2 + px3 + px4 + px5 + px6) + 0.25 * abs(py1 + py2 + py3 + py4 + py5 + py6);\n",
-"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1 + PixelSize*UserVec1.x*vec2(-0.987688, -0.156434)) * UserVec1.y;\n",
-"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1 + PixelSize*UserVec1.x*vec2(-0.156434, -0.891007)) * UserVec1.y;\n",
-"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1 + PixelSize*UserVec1.x*vec2( 0.891007, -0.453990)) * UserVec1.y;\n",
-"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1 + PixelSize*UserVec1.x*vec2( 0.707107,  0.707107)) * UserVec1.y;\n",
-"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1 + PixelSize*UserVec1.x*vec2(-0.453990,  0.891007)) * UserVec1.y;\n",
+"      float py6 = -px1;\n",
+"      sobel = 0.25 * (abs(px1 + px2 + px3 + px4 + px5 + px6) + abs(py1 + py2 + py3 + py4 + py5 + py6));\n",
+"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1.xy + PixelSize*UserVec1.x*vec2(-0.987688, -0.156434)) * UserVec1.y;\n",
+"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1.xy + PixelSize*UserVec1.x*vec2(-0.156434, -0.891007)) * UserVec1.y;\n",
+"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1.xy + PixelSize*UserVec1.x*vec2( 0.891007, -0.453990)) * UserVec1.y;\n",
+"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1.xy + PixelSize*UserVec1.x*vec2( 0.707107,  0.707107)) * UserVec1.y;\n",
+"      dp_FragColor += dp_texture2D(Texture_First, TexCoord1.xy + PixelSize*UserVec1.x*vec2(-0.453990,  0.891007)) * UserVec1.y;\n",
 "      dp_FragColor /= (1.0 + 5.0 * UserVec1.y);\n",
 "      dp_FragColor.rgb = dp_FragColor.rgb * (1.0 + UserVec2.x) + vec3(max(0.0, sobel - UserVec2.z))*UserVec2.y;\n",
 "#endif\n",
 "#endif\n",
 "\n",
 "#ifdef USEBLOOM\n",
-"      dp_FragColor += max(vec4(0,0,0,0), dp_texture2D(Texture_Second, TexCoord2) - BloomColorSubtract);\n",
+"      //TODO: replacing here possat back to max may be needed for HDR\n",
+"      dp_FragColor += possat(dp_texture2D(Texture_Second, TexCoord1.zw) - BloomColorSubtract);\n",
 "#endif\n",
 "\n",
 "#ifdef USEVIEWTINT\n",
 "      // content.\n",
 "      // Remove this 'ack once we have a better way to stop this thing from\n",
 "      // 'appening.\n",
-"      float f = min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(0.01, 0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(0.01, -0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(-0.01, 0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(-0.01, -0.01)).rgb) / 0.05);\n",
+"      float f = minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(0.01, 0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(0.01, -0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(-0.01, 0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord + vec2(-0.01, -0.01)).rgb) / 0.05);\n",
 "      ScreenTexCoord = mix(SafeScreenTexCoord, ScreenTexCoord, f);\n",
 "      dp_FragColor = vec4(dp_texture2D(Texture_Refraction, ScreenTexCoord).rgb, 1.0) * refractcolor;\n",
 "}\n",
 "      #ifdef USENORMALMAPSCROLLBLEND\n",
 "              vec3 normal = dp_texture2D(Texture_Normal, (TexCoord + vec2(0.08, 0.08)*ClientTime*NormalmapScrollBlend.x*0.5)*NormalmapScrollBlend.y).rgb - vec3(1.0);\n",
 "              normal += dp_texture2D(Texture_Normal, (TexCoord + vec2(-0.06, -0.09)*ClientTime*NormalmapScrollBlend.x)*NormalmapScrollBlend.y*0.75).rgb;\n",
-"              vec4 ScreenTexCoord = SafeScreenTexCoord + vec2(normalize(normal) + vec3(0.15)).xyxy * distort;\n",
+"              vec4 ScreenTexCoord = SafeScreenTexCoord + (normalize(normal) + vec3(0.15)).xyxy * distort;\n",
 "      #else\n",
 "              vec4 ScreenTexCoord = SafeScreenTexCoord + vec2(normalize(vec3(dp_texture2D(Texture_Normal, TexCoord)) - vec3(0.5))).xyxy * distort;\n",
 "      #endif\n",
 "      // content.\n",
 "      // Remove this 'ack once we have a better way to stop this thing from\n",
 "      // 'appening.\n",
-"      float f  = min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(0.005, 0.01)).rgb) / 0.002);\n",
-"      f       *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(0.005, -0.01)).rgb) / 0.002);\n",
-"      f       *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(-0.005, 0.01)).rgb) / 0.002);\n",
-"      f       *= min(1.0, length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(-0.005, -0.01)).rgb) / 0.002);\n",
+"      float f  = minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(0.005, 0.01)).rgb) / 0.002);\n",
+"      f       *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(0.005, -0.01)).rgb) / 0.002);\n",
+"      f       *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(-0.005, 0.01)).rgb) / 0.002);\n",
+"      f       *= minonesat(length(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy + vec2(-0.005, -0.01)).rgb) / 0.002);\n",
 "      ScreenTexCoord.xy = mix(SafeScreenTexCoord.xy, ScreenTexCoord.xy, f);\n",
-"      f  = min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(0.005, 0.005)).rgb) / 0.002);\n",
-"      f *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(0.005, -0.005)).rgb) / 0.002);\n",
-"      f *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(-0.005, 0.005)).rgb) / 0.002);\n",
-"      f *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(-0.005, -0.005)).rgb) / 0.002);\n",
+"      f  = minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(0.005, 0.005)).rgb) / 0.002);\n",
+"      f *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(0.005, -0.005)).rgb) / 0.002);\n",
+"      f *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(-0.005, 0.005)).rgb) / 0.002);\n",
+"      f *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw + vec2(-0.005, -0.005)).rgb) / 0.002);\n",
 "      ScreenTexCoord.zw = mix(SafeScreenTexCoord.zw, ScreenTexCoord.zw, f);\n",
 "      float Fresnel = pow(min(1.0, 1.0 - float(normalize(EyeVector).z)), 2.0) * reflectfactor + reflectoffset;\n",
 "      dp_FragColor = mix(vec4(dp_texture2D(Texture_Refraction, ScreenTexCoord.xy).rgb, 1) * refractcolor, vec4(dp_texture2D(Texture_Reflection, ScreenTexCoord.zw).rgb, 1) * ReflectColor, Fresnel);\n",
 "      return mix(fogheightpixel.rgb * fc, surfacecolor.rgb, dp_texture2D(Texture_FogMask, cast_myhalf2(length(EyeVectorModelSpace)*fogfrac*FogRangeRecip, 0.0)).r);\n",
 "#else\n",
 "# ifdef USEFOGOUTSIDE\n",
-"      fogfrac = min(0.0, FogPlaneVertexDist) / (FogPlaneVertexDist - FogPlaneViewDist) * min(1.0, min(0.0, FogPlaneVertexDist) * FogHeightFade);\n",
+"      fogfrac = min(0.0, FogPlaneVertexDist) / (FogPlaneVertexDist - FogPlaneViewDist) * min(1.0, min(0.0, FogPlaneVertexDist) * FogHeightFade);\n",//TODO: can saturate?
 "# else\n",
 "      fogfrac = FogPlaneViewDist / (FogPlaneViewDist - max(0.0, FogPlaneVertexDist)) * min(1.0, (min(0.0, FogPlaneVertexDist) + FogPlaneViewDist) * FogHeightFade);\n",
 "# endif\n",
 "#else\n",
 "      mediump float LODSteps = ceil(GuessLODFactor * OffsetMapping_ScaleSteps.y);\n",
 "#endif\n",
-"      mediump float LODFactor = LODSteps / OffsetMapping_ScaleSteps.y;\n",
-"      mediump vec4 ScaleSteps = vec4(OffsetMapping_ScaleSteps.x, LODSteps, 1.0 / LODSteps, OffsetMapping_ScaleSteps.w * LODFactor);\n",
+"      mediump vec4 ScaleSteps = vec4(OffsetMapping_ScaleSteps.x, LODSteps, vec2(1.0, OffsetMapping_ScaleSteps.w * LODSteps) / vec2(LODSteps, OffsetMapping_ScaleSteps.y));\n",
 "#else\n",
 "      #define ScaleSteps OffsetMapping_ScaleSteps\n",
 "#endif\n",
 "      //vec2 OffsetVector = vec2(normalize(EyeVectorFogDepth.xy) * ScaleSteps.x * vec2(-1, 1));\n",
 "      vec2 OffsetVector = vec2(normalize(EyeVectorFogDepth.xyz).xy * ScaleSteps.x * vec2(-1, 1));\n",
 "      OffsetVector *= ScaleSteps.z;\n",
+"      vec2 OneMinusBias_OffsetVector = (1.0 - OffsetMapping_Bias) * OffsetVector;\n",
 "      for(i = 0.0; i < ScaleSteps.y; ++i)\n",
-"              TexCoord += OffsetVector * ((1.0 - OffsetMapping_Bias) - dp_textureGrad(Texture_Normal, TexCoord, dPdx, dPdy).a);\n",
-"      return TexCoord;\n",
+"              TexCoord += -dp_textureGrad(Texture_Normal, TexCoord, dPdx, dPdy).a * OffsetVector + OneMinusBias_OffsetVector;\n",
+"      return TexCoord;\n"
 "#endif\n",
 "}\n",
 "#endif // USEOFFSETMAPPING\n",
 "\n",
 "#if defined(MODE_LIGHTDIRECTIONMAP_MODELSPACE) || defined(USEREFLECTCUBE) || defined(USEBOUNCEGRIDDIRECTIONAL) || defined(MODE_LIGHTGRID)\n",
 "# ifdef USEFOG\n",
-"      VectorS = vec4(Attrib_TexCoord1.xyz, EyePosition.x - Attrib_Position.x);\n",
-"      VectorT = vec4(Attrib_TexCoord2.xyz, EyePosition.y - Attrib_Position.y);\n",
-"      VectorR = vec4(Attrib_TexCoord3.xyz, EyePosition.z - Attrib_Position.z);\n",
+"      vec3 EyeDir = EyePosition - Attrib_Position.xyz;\n",
+"      VectorS = vec4(Attrib_TexCoord1.xyz, EyeDir.x);\n",
+"      VectorT = vec4(Attrib_TexCoord2.xyz, EyeDir.y);\n",
+"      VectorR = vec4(Attrib_TexCoord3.xyz, EyeDir.z);\n",
 "# else\n",
 "      VectorS = vec4(Attrib_TexCoord1, 0);\n",
 "      VectorT = vec4(Attrib_TexCoord2, 0);\n",
 "#ifdef USEVERTEXTEXTUREBLEND\n",
 "#ifdef USEBOTHALPHAS\n",
 "      myhalf4 color2 = cast_myhalf4(dp_texture2D(Texture_SecondaryColor, TexCoord2));\n",
-"      myhalf terrainblend = clamp(cast_myhalf(VertexColor.a) * color.a, cast_myhalf(1.0 - color2.a), cast_myhalf(1.0));\n",
+"      myhalf terrainblend = max(sat(cast_myhalf(VertexColor.a) * color.a), cast_myhalf(1.0 - color2.a));\n",//move sat outside?
 "      color.rgb = mix(color2.rgb, color.rgb, terrainblend);\n",
 "#else\n",
-"      myhalf terrainblend = clamp(cast_myhalf(VertexColor.a) * color.a * 2.0 - 0.5, cast_myhalf(0.0), cast_myhalf(1.0));\n",
+"      myhalf terrainblend = sat(cast_myhalf(VertexColor.a) * color.a * 2.0 - 0.5);\n",
 "      //myhalf terrainblend = min(cast_myhalf(VertexColor.a) * color.a * 2.0, cast_myhalf(1.0));\n",
 "      //myhalf terrainblend = cast_myhalf(VertexColor.a) * color.a > 0.5;\n",
 "      color.rgb = mix(cast_myhalf3(dp_texture2D(Texture_SecondaryColor, TexCoord2)), color.rgb, terrainblend);\n",
 "      myhalf4 bouncegrid_coeff7 = cast_myhalf4(dp_texture3D(Texture_BounceGrid, BounceGridTexCoord + vec3(0.0, 0.0, 0.750)));\n",
 "      myhalf4 bouncegrid_coeff8 = cast_myhalf4(dp_texture3D(Texture_BounceGrid, BounceGridTexCoord + vec3(0.0, 0.0, 0.875)));\n",
 "      myhalf3 bouncegrid_dir = normalize(mat3(BounceGridMatrix) * (surfacenormal.x * VectorS.xyz + surfacenormal.y * VectorT.xyz + surfacenormal.z * VectorR.xyz));\n",
-"      myhalf3 bouncegrid_dirp = max(cast_myhalf3(0.0, 0.0, 0.0), bouncegrid_dir);\n",
-"      myhalf3 bouncegrid_dirn = max(cast_myhalf3(0.0, 0.0, 0.0), -bouncegrid_dir);\n",
+"      myhalf3 bouncegrid_dirp = possat(bouncegrid_dir);\n",
+"      myhalf3 bouncegrid_dirn = possat(-bouncegrid_dir);\n",
 "//    bouncegrid_dirp  = bouncegrid_dirn = cast_myhalf3(1.0,1.0,1.0);\n",
 "      myhalf3 bouncegrid_light = cast_myhalf3(\n",
 "              dot(bouncegrid_coeff3.xyz, bouncegrid_dirp) + dot(bouncegrid_coeff6.xyz, bouncegrid_dirn),\n",
 "      float DepthScale1 = 4.0 / DepthCenter; // inner ink (shadow on object)\n",
 "//    float DepthScale1 = -4.0 / DepthCenter; // outer ink (shadow around object)\n",
 "//    float DepthScale1 = 0.003;\n",
-"      float DepthScale2 = DepthScale1 / 2.0;\n",
+"      float DepthScale2 = DepthScale1 * 0.5;\n",
 "//    float DepthScale3 = DepthScale1 / 4.0;\n",
 "      float DepthBias1 = -DepthCenter * DepthScale1;\n",
 "      float DepthBias2 = -DepthCenter * DepthScale2;\n",
 "//    float DepthBias3 = -DepthCenter * DepthScale3;\n",
-"      float DepthShadow = max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-1.0,  0.0)).b * DepthScale1 + DepthBias1)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 1.0,  0.0)).b * DepthScale1 + DepthBias1)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -1.0)).b * DepthScale1 + DepthBias1)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  1.0)).b * DepthScale1 + DepthBias1)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-2.0,  0.0)).b * DepthScale2 + DepthBias2)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 2.0,  0.0)).b * DepthScale2 + DepthBias2)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -2.0)).b * DepthScale2 + DepthBias2)\n",
-"                        + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  2.0)).b * DepthScale2 + DepthBias2)\n",
-"//                      + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-3.0,  0.0)).b * DepthScale3 + DepthBias3)\n",
-"//                      + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 3.0,  0.0)).b * DepthScale3 + DepthBias3)\n",
-"//                      + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -3.0)).b * DepthScale3 + DepthBias3)\n",
-"//                      + max(0.0, dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  3.0)).b * DepthScale3 + DepthBias3)\n",
+"      float DepthShadow = possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-1.0,  0.0)).b * DepthScale1 + DepthBias1)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 1.0,  0.0)).b * DepthScale1 + DepthBias1)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -1.0)).b * DepthScale1 + DepthBias1)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  1.0)).b * DepthScale1 + DepthBias1)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-2.0,  0.0)).b * DepthScale2 + DepthBias2)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 2.0,  0.0)).b * DepthScale2 + DepthBias2)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -2.0)).b * DepthScale2 + DepthBias2)\n",
+"                        + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  2.0)).b * DepthScale2 + DepthBias2)\n",
+"//                      + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2(-3.0,  0.0)).b * DepthScale3 + DepthBias3)\n",
+"//                      + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 3.0,  0.0)).b * DepthScale3 + DepthBias3)\n",
+"//                      + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0, -3.0)).b * DepthScale3 + DepthBias3)\n",
+"//                      + possat(dp_texture2D(Texture_ScreenNormalMap, ScreenTexCoord + PixelToScreenTexCoord * vec2( 0.0,  3.0)).b * DepthScale3 + DepthBias3)\n",
 "                        - 0.0;\n",
-"      color.rgb *= 1.0 - max(0.0, min(DepthShadow, 1.0));\n",
+"      color.rgb *= sat(1.0 - DepthShadow);\n",
 "//    color.r = DepthCenter / -1024.0;\n",
 "# endif\n",
 "#endif\n",
 "      // content.\n",
 "      // Remove this 'ack once we have a better way to stop this thing from\n",
 "      // 'appening.\n",
-"      float f = min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(0.01, 0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(0.01, -0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(-0.01, 0.01)).rgb) / 0.05);\n",
-"      f      *= min(1.0, length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(-0.01, -0.01)).rgb) / 0.05);\n",
+"      float f = minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(0.01, 0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(0.01, -0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(-0.01, 0.01)).rgb) / 0.05);\n",
+"      f      *= minonesat(length(dp_texture2D(Texture_Reflection, ScreenTexCoord + vec2(-0.01, -0.01)).rgb) / 0.05);\n",
 "      ScreenTexCoord = mix(SafeScreenTexCoord, ScreenTexCoord, f);\n",
 "      color.rgb = mix(color.rgb, cast_myhalf3(dp_texture2D(Texture_Reflection, ScreenTexCoord)) * ReflectColor.rgb, ReflectColor.a);\n",
 "#endif\n",