Clients/Client2014/shaders/source/ssao.hlsl

448 lines
10 KiB
HLSL

#include "common.h"
// tweakables
#define SSAO_NUM_PAIRS 8
#define SSAO_SPHERE_RAD 2.0f // world-space
#define SSAO_MIN_PIXEL_RANGE 10.0f
#define SSAO_MAX_PIXEL_RANGE 100.0f
#define BLUR_DEPTH_DELTA 0.4f
#define COMPOSITE_DEPTH_DELTA 0.02f
#define COMPOSITE_DEPTH_DELTA2 0.4f
struct Appdata
{
float4 p : POSITION;
float2 uv : TEXCOORD0;
};
struct VertexOutput
{
float4 p : POSITION;
float2 uv : TEXCOORD0;
};
// .xy = gbuffer width/height, .zw = inverse gbuffer width/height
uniform float4 TextureSize;
#ifdef GLSL
float4 convertPosition(float4 p, float scale)
{
return p;
}
float2 convertUv(float4 p)
{
return p.xy * 0.5 + 0.5;
}
#else
float4 convertPosition(float4 p, float scale)
{
// half-pixel offset
return p + float4(-TextureSize.z, TextureSize.w, 0, 0) * scale;
}
float2 convertUv(float4 p)
{
return p.xy * float2(0.5, -0.5) + 0.5;
}
#endif
VertexOutput ssao_vs(Appdata IN)
{
float2 uv = convertUv(IN.p);
VertexOutput OUT;
OUT.p = convertPosition(IN.p, 1);
OUT.uv = uv;
return OUT;
}
// used for depth downsampling pass
struct VertexOutput_4uv
{
float4 p : POSITION;
float2 uv : TEXCOORD0;
float4 uv12 : TEXCOORD1;
float4 uv34 : TEXCOORD2;
};
VertexOutput_4uv ssaoDepthDown_vs(Appdata IN)
{
float2 uv = convertUv(IN.p);
VertexOutput_4uv OUT;
OUT.p = convertPosition(IN.p, 2);
OUT.uv = uv;
float2 uvOffset = TextureSize.zw * 0.5f;
OUT.uv12.xy = uv + uvOffset * float2(-1, -1);
OUT.uv12.zw = uv + uvOffset * float2(+1, -1);
OUT.uv34.xy = uv + uvOffset * float2(-1, +1);
OUT.uv34.zw = uv + uvOffset * float2(+1, +1);
return OUT;
}
struct VertexOutput_8uv
{
float4 p : POSITION;
float2 uv : TEXCOORD0;
float4 uv12 : TEXCOORD1;
float4 uv34 : TEXCOORD2;
float4 uv56 : TEXCOORD3;
float4 uv78 : TEXCOORD4;
};
// used for ssao blurring passes
VertexOutput_8uv ssaoBlur_vs(Appdata IN, float2 uvOffset)
{
float2 uv = convertUv(IN.p);
VertexOutput_8uv OUT;
OUT.p = convertPosition(IN.p, 2);
OUT.uv = uv;
OUT.uv12.xy = uv + 1 * uvOffset;
OUT.uv12.zw = uv + 2 * uvOffset;
OUT.uv34.xy = uv + 3 * uvOffset;
OUT.uv34.zw = uv + 4 * uvOffset;
OUT.uv56.xy = uv - 1 * uvOffset;
OUT.uv56.zw = uv - 2 * uvOffset;
OUT.uv78.xy = uv - 3 * uvOffset;
OUT.uv78.zw = uv - 4 * uvOffset;
return OUT;
}
VertexOutput_8uv ssaoBlurX_vs(Appdata IN)
{
return ssaoBlur_vs(IN, float2(TextureSize.z * 2, 0));
}
VertexOutput_8uv ssaoBlurY_vs(Appdata IN)
{
return ssaoBlur_vs(IN, float2(0, TextureSize.w * 2));
}
float unpackDepth( sampler2D s, float2 uv )
{
float4 geomTex = tex2D(s, uv);
float d = geomTex.z * (1.0f/256.0f) + geomTex.w;
return d;
}
float getDepth( sampler2D s, float2 uv )
{
return (float)tex2D(s,uv).r;
}
#define NUM_PAIRS SSAO_NUM_PAIRS
#define RANGE 60.0/1024.0
#define pi 3.14159265359
#define RAD(X) ( (X) * (pi/180) )
float2 GetRotatedSample(float i)
{
return (i+1) / (NUM_PAIRS+2) * float2(cos( RAD(45) + i / NUM_PAIRS * 2 * pi ), sin( RAD(45) + i / NUM_PAIRS * 2 * pi ) );
}
#define NUM_SAMPLES NUM_PAIRS*2+1
float4 ssao_ps(
float2 uv: TEXCOORD0,
uniform sampler2D depthBuffer: register(s0),
uniform sampler2D randMap: register(s1)): COLOR0
{
float2 mapSize = TextureSize.xy / 2;
float baseDepth = getDepth( depthBuffer, uv );
float4 noiseTex = tex2D(randMap, uv*mapSize/4) * 2 - 1;
float2x2 rotation =
{
{ noiseTex.y, noiseTex.x },
{ -noiseTex.x, noiseTex.y }
};
float2 OFFSETS1[NUM_PAIRS] =
{
GetRotatedSample(0),
GetRotatedSample(1),
GetRotatedSample(2),
GetRotatedSample(3),
GetRotatedSample(4),
GetRotatedSample(5),
#if NUM_PAIRS > 6
GetRotatedSample(6),
GetRotatedSample(7),
#if NUM_PAIRS > 8
GetRotatedSample(8),
GetRotatedSample(9),
GetRotatedSample(10),
GetRotatedSample(11),
#endif
#endif
};
float occ = 1;
float sphereRadiusZB = (float) ( 2.0f / GBUFFER_MAX_DEPTH );
#define MINPIXEL SSAO_MIN_PIXEL_RANGE
#define MAXPIXEL SSAO_MAX_PIXEL_RANGE
float radiusTex = (float)clamp( 0.5*sphereRadiusZB / baseDepth, MINPIXEL / mapSize.x, MAXPIXEL / mapSize.y);
float numSamples = 2;
for(int i = 0; i < NUM_PAIRS; i++)
{
float2 offset1 = mul(rotation, OFFSETS1[i]);
float2 offseted1 = uv + offset1 * radiusTex;
float2 offseted2 = uv - offset1 * radiusTex;
float2 offsetDepth;
offsetDepth.x = getDepth( depthBuffer, offseted1 );
offsetDepth.y = getDepth( depthBuffer, offseted2 );
float2 diff = offsetDepth - baseDepth.xx;
float normalizedOffsetLen = (float)(i+1)/(NUM_PAIRS+2);
float segmentDiff = (float) ( 1.5f*sphereRadiusZB*sqrt(1-normalizedOffsetLen*normalizedOffsetLen) );
float2 normalizedDiff = (diff / segmentDiff) + 0.5;
float minDiff = min(normalizedDiff.x, normalizedDiff.y);
// At 0, full sample
// At -1, zero sample, zero weight
float sampleadd = (float) saturate(1+minDiff);
float a = (float)(saturate(normalizedDiff.x) + saturate(normalizedDiff.y))*sampleadd;
occ += a;
numSamples += 2 * sampleadd;
}
occ = occ / numSamples;
float finalocc = (float)saturate(occ*2);
if(baseDepth - (1.0f-1/256.0f) > 0)
finalocc += 1;
return float4(finalocc, finalocc, finalocc, 1);
}
// this function estimates depth discrepancy tolerance for the blur filter
float depthTolerance( float baseDepth, float sphereRadiusZB )
{
float ramp = 80; // tweak
return ( clamp( sphereRadiusZB * (baseDepth * ramp) , 0.1f * sphereRadiusZB, 40*sphereRadiusZB ) );
}
float ssaoBlur(
float2 uv,
float4 uv12,
float4 uv34,
float4 uv56,
float4 uv78,
sampler2D map,
sampler2D depthBuffer
)
{
float sphereRadiusZB = BLUR_DEPTH_DELTA / GBUFFER_MAX_DEPTH;
float4 i = { 1, 2, 3, 4 };
float4 iw = 4-i;
float4 denom = 1;
float4 sum = tex2D(map, uv).rrrr * denom;
float baseDepth = getDepth( depthBuffer, uv );
float4 newDepth, delta, ssample, coef;
newDepth.x = getDepth( depthBuffer, uv12.xy );
newDepth.y = getDepth( depthBuffer, uv12.zw );
newDepth.z = getDepth( depthBuffer, uv34.xy );
newDepth.w = getDepth( depthBuffer, uv34.zw );
delta = (newDepth - baseDepth.xxxx);
coef = iw * ( abs(delta) < depthTolerance( baseDepth, sphereRadiusZB ).xxxx );
ssample.x = tex2D( map, uv12.xy ).r;
ssample.y = tex2D( map, uv12.zw ).r;
ssample.z = tex2D( map, uv34.xy ).r;
ssample.w = tex2D( map, uv34.zw ).r;
sum += ssample * coef;
denom += coef;
////////////////////////////////////////
newDepth.x = getDepth( depthBuffer, uv56.xy );
newDepth.y = getDepth( depthBuffer, uv56.zw );
newDepth.z = getDepth( depthBuffer, uv78.xy );
newDepth.w = getDepth( depthBuffer, uv78.zw );
delta = newDepth - baseDepth.xxxx;
coef = iw * ( abs(delta) < depthTolerance( baseDepth, sphereRadiusZB ).xxxx );
ssample.x = tex2D( map, uv56.xy ).r;
ssample.y = tex2D( map, uv56.zw ).r;
ssample.z = tex2D( map, uv78.xy ).r;
ssample.w = tex2D( map, uv78.zw ).r;
sum += ssample * coef;
denom += coef;
return dot( sum, float4(1,1,1,1) ) / dot( denom, float4(1,1,1,1) );
}
float4 ssaoBlurX_ps(
float2 uv : TEXCOORD0,
float4 uv12 : TEXCOORD1,
float4 uv34 : TEXCOORD2,
float4 uv56 : TEXCOORD3,
float4 uv78 : TEXCOORD4,
uniform sampler2D map : register(s0), uniform sampler2D depthBuffer : register(s1) ): COLOR0
{
//return tex2D( map, uv );
float ssaoTerm = ssaoBlur( uv, uv12, uv34, uv56, uv78, map, depthBuffer);
return float4(ssaoTerm.xxx, 1);
}
#define SPECULAR_WEIGHT 3
float4 ssaoBlurY_ps(
float2 uv : TEXCOORD0,
float4 uv12 : TEXCOORD1,
float4 uv34 : TEXCOORD2,
float4 uv56 : TEXCOORD3,
float4 uv78 : TEXCOORD4,
uniform sampler2D map : register(s0), uniform sampler2D depthBuffer : register(s1), uniform sampler2D geomMap : register(s2) ): COLOR0
{
float ssaoTerm = ssaoBlur(uv, uv12, uv34, uv56, uv78, map, depthBuffer);
float4 geom = tex2D(geomMap, uv);
float specular = geom.x;
float diffuse = geom.y;
// Making specular kill SSAO faster, so it doesn't get capped by 1
return (SPECULAR_WEIGHT*specular + diffuse * ssaoTerm) / (SPECULAR_WEIGHT*specular + diffuse + 0.001);
}
float4 ssaoDepthDown_ps(
float2 uv : TEXCOORD0,
float4 uv12 : TEXCOORD1,
float4 uv34 : TEXCOORD2,
uniform sampler2D depthBuffer : register(s0)
) : COLOR0
{
float4 d;
d.x = unpackDepth( depthBuffer, uv12.xy );
d.y = unpackDepth( depthBuffer, uv12.zw );
d.z = unpackDepth( depthBuffer, uv34.xy );
d.w = unpackDepth( depthBuffer, uv34.zw );
float2 tmp = min( d.xy, d.zw );
return min( tmp.x, tmp.y ).x;
}
VertexOutput_4uv ssaoComposit_vs(Appdata IN)
{
float2 uv = convertUv(IN.p);
VertexOutput_4uv OUT;
OUT.p = convertPosition(IN.p, 1);
OUT.uv = uv;
float2 uvOffset = TextureSize.zw * 2;
OUT.uv12.xy = uv + float2(uvOffset.x, 0);
OUT.uv12.zw = uv - float2(uvOffset.x, 0);
OUT.uv34.xy = uv + float2(0, uvOffset.y);
OUT.uv34.zw = uv - float2(0, uvOffset.y);
return OUT;
}
float4 ssaoCompositBlank_ps(float2 uv : TEXCOORD0, uniform sampler2D colorMap: register(s0)): COLOR0
{
return tex2D(colorMap, uv);
}
#define CMP_LESS(X,Y) ( (X) < (Y) )
float4 ssaoComposit_ps(
float2 uv : TEXCOORD0,
float4 uv12 : TEXCOORD1,
float4 uv34 : TEXCOORD2,
uniform sampler2D colorMap : register(s0),
uniform sampler2D map : register(s1),
uniform sampler2D gbuffer : register(s2),
uniform sampler2D depthBuffer: register(s3)
): COLOR0
{
//return float4(1,0,0,1);
float depth_range = COMPOSITE_DEPTH_DELTA / GBUFFER_MAX_DEPTH;
float depth_range2 = COMPOSITE_DEPTH_DELTA2 / GBUFFER_MAX_DEPTH;
// we're here
float baseDepth = unpackDepth( gbuffer, uv );
float ssaoTerm = 1.0f;
float depth = getDepth( depthBuffer, uv );
float diff = abs( depth - baseDepth );
ssaoTerm = tex2D( map, uv ).x;
float chk1 = CMP_LESS( depth_range, diff ); // can we trust the base depth? 0 - yes, 1 - no
float4 ssaoTermNew = 0, chk2, depth2, diff2;
depth2.x = getDepth( depthBuffer, uv12.xy );
depth2.y = getDepth( depthBuffer, uv12.zw );
depth2.z = getDepth( depthBuffer, uv34.xy );
depth2.w = getDepth( depthBuffer, uv34.zw );
ssaoTermNew.x = tex2D( map, uv12.xy ).x;
ssaoTermNew.y = tex2D( map, uv12.zw ).x;
ssaoTermNew.z = tex2D( map, uv34.xy ).x;
ssaoTermNew.w = tex2D( map, uv34.zw ).x;
diff2 = abs( depth2 - baseDepth.xxxx );
chk2 = CMP_LESS( diff2, depth_range2.xxxx );
ssaoTermNew *= chk2;
float den = dot( chk2, 1 ); // + 1e-5f; - TODO: add this if we encounter glitches; //
ssaoTermNew.x = dot( ssaoTermNew, 1 ) / den;
// the final decision: pick the base sample or its estimate, if base depth in unauthorative
ssaoTerm = saturate(den*chk1) ? ssaoTermNew.x : ssaoTerm;
return tex2D(colorMap, uv) * ssaoTerm;
}