#include "common.h"

// tweakables
#define SSAO_NUM_PAIRS         8
#define SSAO_SPHERE_RAD        2.0f   // world-space
#define SSAO_MIN_PIXEL_RANGE   10.0f
#define SSAO_MAX_PIXEL_RANGE   100.0f
#define BLUR_DEPTH_DELTA       0.4f

#define COMPOSITE_DEPTH_DELTA    0.02f              
#define COMPOSITE_DEPTH_DELTA2   0.4f

struct Appdata
{
    float4 p   : POSITION;
    float2 uv  : TEXCOORD0;
};

struct VertexOutput
{
    float4 p   : POSITION;
    float2 uv  : TEXCOORD0;
};


// .xy = gbuffer width/height, .zw = inverse gbuffer width/height
uniform float4 TextureSize;

#ifdef GLSL
float4 convertPosition(float4 p, float scale)
{
	return p;
}

float2 convertUv(float4 p)
{
	return p.xy * 0.5 + 0.5;
}
#else
float4 convertPosition(float4 p, float scale)
{
	// half-pixel offset
	return p + float4(-TextureSize.z, TextureSize.w, 0, 0) * scale;
}

float2 convertUv(float4 p)
{
	return p.xy * float2(0.5, -0.5) + 0.5;
}
#endif

VertexOutput ssao_vs(Appdata IN)
{
    float2 uv = convertUv(IN.p);

    VertexOutput OUT;
    OUT.p = convertPosition(IN.p, 1);
    OUT.uv = uv;

    return OUT;
}

// used for depth downsampling pass
struct VertexOutput_4uv
{
    float4 p    : POSITION;
    float2 uv   : TEXCOORD0;
	float4 uv12 : TEXCOORD1;
	float4 uv34 : TEXCOORD2;
};

VertexOutput_4uv ssaoDepthDown_vs(Appdata IN)
{
    float2 uv = convertUv(IN.p);

    VertexOutput_4uv OUT;
    OUT.p = convertPosition(IN.p, 2);
    OUT.uv = uv;

	float2 uvOffset = TextureSize.zw * 0.5f;

	OUT.uv12.xy = uv + uvOffset * float2(-1, -1);
	OUT.uv12.zw = uv + uvOffset * float2(+1, -1);
	OUT.uv34.xy = uv + uvOffset * float2(-1, +1);
	OUT.uv34.zw = uv + uvOffset * float2(+1, +1);

    return OUT;
}

struct VertexOutput_8uv
{
    float4 p    : POSITION;
    float2 uv   : TEXCOORD0;
	float4 uv12 : TEXCOORD1;
	float4 uv34 : TEXCOORD2;
	float4 uv56 : TEXCOORD3;
	float4 uv78 : TEXCOORD4;
};

// used for ssao blurring passes
VertexOutput_8uv ssaoBlur_vs(Appdata IN, float2 uvOffset)
{
    float2 uv = convertUv(IN.p);

    VertexOutput_8uv OUT;
    OUT.p = convertPosition(IN.p, 2);
	OUT.uv = uv;

	OUT.uv12.xy = uv + 1 * uvOffset;
	OUT.uv12.zw = uv + 2 * uvOffset;
	OUT.uv34.xy = uv + 3 * uvOffset;
	OUT.uv34.zw = uv + 4 * uvOffset;
	
	OUT.uv56.xy = uv - 1 * uvOffset;
	OUT.uv56.zw = uv - 2 * uvOffset;
	OUT.uv78.xy = uv - 3 * uvOffset;
	OUT.uv78.zw = uv - 4 * uvOffset;
	
    return OUT;
}

VertexOutput_8uv ssaoBlurX_vs(Appdata IN)
{
	return ssaoBlur_vs(IN, float2(TextureSize.z * 2, 0));
}

VertexOutput_8uv ssaoBlurY_vs(Appdata IN)
{
	return ssaoBlur_vs(IN, float2(0, TextureSize.w * 2));
}

float unpackDepth( sampler2D s, float2 uv )
{
	float4 geomTex = tex2D(s, uv);
	float d = geomTex.z * (1.0f/256.0f) + geomTex.w;
	return d;
}

float getDepth( sampler2D s, float2 uv )
{
	return (float)tex2D(s,uv).r;
}

#define NUM_PAIRS   SSAO_NUM_PAIRS
#define RANGE 60.0/1024.0

#define pi 3.14159265359
#define RAD(X) ( (X) * (pi/180) )

float2 GetRotatedSample(float i)
{
	return (i+1) / (NUM_PAIRS+2) * float2(cos( RAD(45) + i / NUM_PAIRS * 2 * pi ), sin( RAD(45) + i / NUM_PAIRS * 2 * pi ) );
}

#define NUM_SAMPLES NUM_PAIRS*2+1

float4 ssao_ps(
    float2 uv: TEXCOORD0,
    uniform sampler2D depthBuffer: register(s0),
    uniform sampler2D randMap: register(s1)): COLOR0
{
	float2 mapSize = TextureSize.xy / 2;

	float baseDepth = getDepth( depthBuffer, uv );
	
	float4 noiseTex = tex2D(randMap, uv*mapSize/4) * 2 - 1;
	
	float2x2 rotation = 
	{
		{ noiseTex.y, noiseTex.x },
		{ -noiseTex.x, noiseTex.y }
	};
	
	float2 OFFSETS1[NUM_PAIRS] =
	{
		GetRotatedSample(0),
		GetRotatedSample(1),
		GetRotatedSample(2),
		GetRotatedSample(3),
		GetRotatedSample(4),
		GetRotatedSample(5),
#if NUM_PAIRS > 6
		GetRotatedSample(6),
		GetRotatedSample(7),
#if NUM_PAIRS > 8
		GetRotatedSample(8),
		GetRotatedSample(9),
		GetRotatedSample(10),
		GetRotatedSample(11),
#endif
#endif
	};
	
	float occ = 1;
	
	float sphereRadiusZB = (float) ( 2.0f / GBUFFER_MAX_DEPTH );
	
#define MINPIXEL SSAO_MIN_PIXEL_RANGE
#define MAXPIXEL SSAO_MAX_PIXEL_RANGE
	
	float radiusTex = (float)clamp( 0.5*sphereRadiusZB / baseDepth, MINPIXEL / mapSize.x, MAXPIXEL / mapSize.y);
	
	float numSamples = 2;
	
	for(int i = 0; i < NUM_PAIRS; i++)
	{
		float2 offset1 = mul(rotation, OFFSETS1[i]);
	
		float2 offseted1 = uv + offset1 * radiusTex;
		float2 offseted2 = uv - offset1 * radiusTex;
		
		float2 offsetDepth;
		offsetDepth.x = getDepth( depthBuffer, offseted1 );
		offsetDepth.y = getDepth( depthBuffer, offseted2 );
		
		float2 diff = offsetDepth - baseDepth.xx;
		
		float normalizedOffsetLen = (float)(i+1)/(NUM_PAIRS+2);
		
		float segmentDiff = (float) ( 1.5f*sphereRadiusZB*sqrt(1-normalizedOffsetLen*normalizedOffsetLen) );
		
		float2 normalizedDiff = (diff / segmentDiff) + 0.5;
		
		float minDiff = min(normalizedDiff.x, normalizedDiff.y);
		
		// At 0, full sample
		// At -1, zero sample, zero weight
		
		float sampleadd = (float) saturate(1+minDiff);
		
		float a = (float)(saturate(normalizedDiff.x) + saturate(normalizedDiff.y))*sampleadd;
		occ += a;
		numSamples += 2 * sampleadd;
 	}
	
	occ = occ / numSamples;

	float finalocc = (float)saturate(occ*2);
	
	if(baseDepth - (1.0f-1/256.0f) > 0)
		finalocc += 1;
	
	return float4(finalocc, finalocc, finalocc, 1);
}

// this function estimates depth discrepancy tolerance for the blur filter
float depthTolerance( float baseDepth, float sphereRadiusZB )
{
	float ramp = 80; // tweak
	return (  clamp(  sphereRadiusZB * (baseDepth * ramp) , 0.1f * sphereRadiusZB, 40*sphereRadiusZB  ) );
}

float ssaoBlur(
	float2 uv,
	
	float4 uv12,
	float4 uv34,
	float4 uv56, 
	float4 uv78,

	sampler2D map, 
	sampler2D depthBuffer
	)
{
	float sphereRadiusZB = BLUR_DEPTH_DELTA / GBUFFER_MAX_DEPTH;
	float4 i = { 1, 2, 3, 4 };
	float4 iw = 4-i;
    float4 denom = 1;


    float4 sum = tex2D(map, uv).rrrr * denom;
	
	float baseDepth = getDepth( depthBuffer, uv );
	
	float4 newDepth, delta, ssample, coef;

	newDepth.x = getDepth( depthBuffer, uv12.xy );
	newDepth.y = getDepth( depthBuffer, uv12.zw );
	newDepth.z = getDepth( depthBuffer, uv34.xy );
	newDepth.w = getDepth( depthBuffer, uv34.zw );
	
	delta = (newDepth - baseDepth.xxxx);
	coef  = iw * ( abs(delta) < depthTolerance( baseDepth, sphereRadiusZB ).xxxx  );
	
	
	ssample.x = tex2D( map, uv12.xy ).r;
	ssample.y = tex2D( map, uv12.zw ).r;
	ssample.z = tex2D( map, uv34.xy ).r;
	ssample.w = tex2D( map, uv34.zw ).r;
	
	sum += ssample * coef;
	denom += coef;

	////////////////////////////////////////
	
	newDepth.x = getDepth( depthBuffer, uv56.xy );
	newDepth.y = getDepth( depthBuffer, uv56.zw );
	newDepth.z = getDepth( depthBuffer, uv78.xy );
	newDepth.w = getDepth( depthBuffer, uv78.zw );
	
	delta = newDepth - baseDepth.xxxx;
	coef  = iw * ( abs(delta) <  depthTolerance( baseDepth, sphereRadiusZB ).xxxx );
	
	ssample.x = tex2D( map, uv56.xy ).r;
	ssample.y = tex2D( map, uv56.zw ).r;
	ssample.z = tex2D( map, uv78.xy ).r;
	ssample.w = tex2D( map, uv78.zw ).r;
	
	sum += ssample * coef;
	denom += coef;
	
	return dot( sum, float4(1,1,1,1) ) / dot( denom, float4(1,1,1,1) );
}


float4 ssaoBlurX_ps(
	float2 uv : TEXCOORD0,
	float4 uv12 : TEXCOORD1,
	float4 uv34 : TEXCOORD2,
	float4 uv56 : TEXCOORD3,
	float4 uv78 : TEXCOORD4,
	
    uniform sampler2D map : register(s0), uniform sampler2D depthBuffer : register(s1) ): COLOR0
{
	//return tex2D( map, uv );
    float ssaoTerm = ssaoBlur( uv, uv12, uv34, uv56, uv78, map, depthBuffer);

    return float4(ssaoTerm.xxx, 1);
}

#define SPECULAR_WEIGHT 3


float4 ssaoBlurY_ps(
	float2 uv : TEXCOORD0,
	float4 uv12 : TEXCOORD1,
	float4 uv34 : TEXCOORD2,
	float4 uv56 : TEXCOORD3,
	float4 uv78 : TEXCOORD4,
	
    uniform sampler2D map : register(s0), uniform sampler2D depthBuffer : register(s1), uniform sampler2D geomMap : register(s2) ): COLOR0
{
    float ssaoTerm = ssaoBlur(uv, uv12, uv34, uv56, uv78,  map, depthBuffer);

    float4 geom = tex2D(geomMap, uv);
    
    float specular = geom.x;
    float diffuse = geom.y;
	
    // Making specular kill SSAO faster, so it doesn't get capped by 1
    return (SPECULAR_WEIGHT*specular + diffuse * ssaoTerm) / (SPECULAR_WEIGHT*specular + diffuse + 0.001);
}


float4 ssaoDepthDown_ps( 
	float2 uv : TEXCOORD0, 
	float4 uv12 : TEXCOORD1,
	float4 uv34 : TEXCOORD2,
	
	uniform sampler2D depthBuffer : register(s0)  
) : COLOR0
{

	float4 d;
	d.x = unpackDepth( depthBuffer, uv12.xy );
	d.y = unpackDepth( depthBuffer, uv12.zw );
	d.z = unpackDepth( depthBuffer, uv34.xy );
	d.w = unpackDepth( depthBuffer, uv34.zw );
	
	float2 tmp = min( d.xy, d.zw );
	return min( tmp.x, tmp.y ).x;
}

VertexOutput_4uv ssaoComposit_vs(Appdata IN)
{
    float2 uv = convertUv(IN.p);

    VertexOutput_4uv OUT;
    OUT.p = convertPosition(IN.p, 1);
    OUT.uv = uv;

	float2 uvOffset = TextureSize.zw * 2;

	OUT.uv12.xy = uv + float2(uvOffset.x, 0);
	OUT.uv12.zw = uv - float2(uvOffset.x, 0);
	OUT.uv34.xy = uv + float2(0, uvOffset.y);
	OUT.uv34.zw = uv - float2(0, uvOffset.y);

    return OUT;
}

float4 ssaoCompositBlank_ps(float2 uv : TEXCOORD0, uniform sampler2D colorMap: register(s0)): COLOR0
{
    return tex2D(colorMap, uv);
}
 
#define CMP_LESS(X,Y) (  (X) < (Y) )

float4 ssaoComposit_ps(
	float2 uv : TEXCOORD0,
	float4 uv12 : TEXCOORD1,
	float4 uv34 : TEXCOORD2,
	
	uniform sampler2D colorMap :    register(s0),
	uniform sampler2D map :         register(s1), 
	uniform sampler2D gbuffer :     register(s2),
	uniform sampler2D depthBuffer:  register(s3)
	): COLOR0
{
	//return float4(1,0,0,1);
	float depth_range  = COMPOSITE_DEPTH_DELTA / GBUFFER_MAX_DEPTH;
	float depth_range2 = COMPOSITE_DEPTH_DELTA2 / GBUFFER_MAX_DEPTH;

	// we're here
	float baseDepth = unpackDepth( gbuffer, uv );
	float ssaoTerm = 1.0f; 

	float depth = getDepth( depthBuffer, uv );
	float diff = abs( depth - baseDepth );
	ssaoTerm = tex2D( map, uv ).x;
	
	float chk1 = CMP_LESS( depth_range, diff );   // can we trust the base depth? 0 - yes, 1 - no
	float4 ssaoTermNew = 0, chk2, depth2, diff2; 

	depth2.x  = getDepth( depthBuffer, uv12.xy );
	depth2.y  = getDepth( depthBuffer, uv12.zw );
	depth2.z  = getDepth( depthBuffer, uv34.xy );
	depth2.w  = getDepth( depthBuffer, uv34.zw );

	ssaoTermNew.x = tex2D( map, uv12.xy ).x;
	ssaoTermNew.y = tex2D( map, uv12.zw ).x;
	ssaoTermNew.z = tex2D( map, uv34.xy ).x;
	ssaoTermNew.w = tex2D( map, uv34.zw ).x;

	diff2 = abs( depth2 - baseDepth.xxxx );
	chk2  = CMP_LESS( diff2, depth_range2.xxxx );
	
	ssaoTermNew *= chk2;
	float den = dot( chk2, 1 ); // + 1e-5f;    - TODO: add this if we encounter glitches; // 
	ssaoTermNew.x = dot( ssaoTermNew, 1 ) / den;

	// the final decision: pick the base sample or its estimate, if base depth in unauthorative
	ssaoTerm = saturate(den*chk1) ? ssaoTermNew.x :  ssaoTerm;

	return tex2D(colorMap, uv) * ssaoTerm;
}