#include "globals.h" // GLSLES has limited number of vertex shader registers so we have to use less bones #ifdef GLSLES #define MAX_BONE_COUNT 32 #else #define MAX_BONE_COUNT 72 #endif // PowerVR saturate() is compiled to min/max pair // These are cross-platform specialized saturates that are free on PC and only cost 1 cycle on PowerVR #ifdef GLSLES float saturate0(float v) { return max(v, 0); } float saturate1(float v) { return min(v, 1); } #else float saturate0(float v) { return saturate(v); } float saturate1(float v) { return saturate(v); } #endif #define GBUFFER_MAX_DEPTH 500.0f float4 gbufferPack(float depth, float3 diffuse, float3 specular, float fog) { depth = saturate(depth / GBUFFER_MAX_DEPTH); const float3 bitSh = float3(255*255, 255, 1); const float3 lumVec = float3(0.299, 0.587, 0.114); float2 comp; comp = depth*float2(255,255*256); comp = frac(comp); comp = float2(depth,comp.x*256/255) - float2(comp.x, comp.y)/255; float4 result; result.r = lerp(1, dot(specular, lumVec), saturate(3 * fog)); result.g = lerp(0, dot(diffuse, lumVec), saturate(3 * fog)); result.ba = comp.yx; return result; } float3 getPosInLightSpace(float3 posIn) { float3 lightToWorld = posIn - G.BlobShadowData0.xyz; return float3(dot(G.Lamp0Right, lightToWorld), dot(G.Lamp0Up, lightToWorld), dot(G.Lamp0Dir, lightToWorld)); } float getSingleBlobShadowOrigin(float3 lightSpacePos, float4 blobData) { float distSq = dot(lightSpacePos.xy, lightSpacePos.xy); // OH MY GOD! a BRANCH? Why? Because this produces a better assembly over other solution float projDistScaled = lightSpacePos.z * 0.04; if (lightSpacePos.z < 0) projDistScaled = lightSpacePos.z * -0.3; return min(1, distSq * G.OutlineBrightness_ShadowInfo.z + projDistScaled + blobData.a); } float getSingleBlobShadow(float3 lightSpacePos, float4 blobData) { return getSingleBlobShadowOrigin(lightSpacePos - blobData.xyz, blobData); } float getBlobShadow(float3 lightSpacePos) { #ifdef PIN_HQ float shadow = min(getSingleBlobShadowOrigin(lightSpacePos, G.BlobShadowData0), getSingleBlobShadow(lightSpacePos, G.BlobShadowData1)); shadow = min(getSingleBlobShadow(lightSpacePos, G.BlobShadowData2), shadow); shadow = min(getSingleBlobShadow(lightSpacePos, G.BlobShadowData3), shadow); return shadow; #else return getSingleBlobShadowOrigin(lightSpacePos, G.BlobShadowData0); #endif } float3 lgridOffset(float3 v, float3 n) { // cells are 4 studs in size // offset in normal direction to prevent self-occlusion // the offset has to be 1.5 cells in order to fully eliminate the influence of the source cell with trilinear filtering // (i.e. 1 cell is enough for point filtering, but is not enough for trilinear filtering) return v + n * (1.5f * 4.f); } float3 lgridPrepareSample(float3 c) { // yxz swizzle is necessary for GLSLES sampling to work efficiently // (having .y as the first component allows to do the LUT lookup as a non-dependent texture fetch) return c.yxz * G.LightConfig0.xyz + G.LightConfig1.xyz; } #ifdef GLSLES #define LGRID_SAMPLER sampler2D float4 lgridSample(LGRID_SAMPLER t, sampler2D lut, float3 data) { float4 offsets = tex2D(lut, data.xy); // texture is 64 pixels high // let's compute slice lerp coeff float slicef = frac(data.x * 64); // texture has 64 slices with 8x8 atlas setup float2 base = saturate(data.yz) * 0.125; float4 s0 = tex2D(t, base + offsets.xy); float4 s1 = tex2D(t, base + offsets.zw); return lerp(s0, s1, slicef); } #else #define LGRID_SAMPLER sampler3D float4 lgridSample(LGRID_SAMPLER t, sampler2D lut, float3 data) { float3 edge = step(G.LightConfig3.xyz, abs(data - G.LightConfig2.xyz)); float edgef = saturate1(dot(edge, 1)); // replace data with 0 on edges to minimize texture cache misses float4 light = tex3D(t, data.yzx - data.yzx * edgef); return lerp(light, G.LightBorder, edgef); } #endif #ifdef GLSLES float3 nmapUnpack(float4 value) { return value.rgb * 2 - 1; } #else float3 nmapUnpack(float4 value) { float2 xy = value.ag * 2 - 1; return float3(xy, sqrt(saturate(1 + dot(-xy, xy)))); } #endif