#include "../shaderenv.h"


////////////////////
// Screen Spaced Ambient Occlusion shader
// based on shader of Alexander Kusternig


#define USE_EYESPACE_DEPTH 1


struct fragment
{
	float2 texCoord: TEXCOORD0; 
	float3 view: TEXCOORD1;
};


struct pixel
{
	float4 illum_col: COLOR0;
};


inline float occlusionPower(float radius, float dist)
{
	return 6.283185307179586476925286766559f * (1.0f - cos(asin(radius / dist)));
}


inline float2 myreflect(float2 pt, float2 n)
{
	// distance to plane
	float d = dot(n, pt);
	// reflect around plane
	float2 rpt = pt - d * 2.0f * n;

	return rpt;
}


inline float3 Interpol(float2 w, float3 bl, float3 br, float3 tl, float3 tr)
{
	float3 x1 = lerp(bl, tl, w.y);
	float3 x2 = lerp(br, tr, w.y); 
	float3 v = lerp(x1, x2, w.x); 

	return v;
}


// reconstruct world space position
inline float3 ReconstructSamplePos(uniform sampler2D tex,
								   float2 texcoord, 
								   float3 bl, float3 br, float3 tl, float3 tr)
{
	const float eyeSpaceDepth = tex2Dlod(tex, float4(texcoord, 0, 0)).w;
	
	float3 viewVec = Interpol(texcoord, bl, br, tl, tr);
	float3 samplePos = -viewVec * eyeSpaceDepth;

	return samplePos;
}


inline float ComputeDifference(float2 offset,
							   sampler2D oldTex,
							   float4x4 oldModelViewProj,
							   sampler2D colors,
							   sampler2D noiseTex,
							   float scaleFactor,
							   float3 bl,
							   float3 br,
							   float3 tl,
							   float3 tr, 
							   float2 texcoord0,
							   float3 oldEyePos,
							   float3 oldbl,
							   float3 oldbr,
							   float3 oldtl,
							   float3 oldtr,
							   float eyeSpaceDepth
							   )
{
	const float2 mynoise = tex2Dlod(noiseTex, float4(texcoord0, 0, 0)).xy;

	const float2 offsetTransformed = myreflect(offset, mynoise);
	float2 texCoord = texcoord0 + offsetTransformed * scaleFactor;
	
	const float sampleEyeSpaceDepth = tex2Dlod(colors, float4(texCoord, 0, 0)).w;
	
	const float3 viewVec = Interpol(texCoord, bl, br, tl, tr);
	const float3 samplePos = -viewVec * sampleEyeSpaceDepth;
	const float3 translatedPos = samplePos - oldEyePos;

	// reproject into old frame and calculate projected depth
	float4 projPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
	projPos /= projPos.w;
	// fit from unit cube into 0 .. 1
	const float2 oldTexCoords = projPos.xy * 0.5f + 0.5f;
	// retrieve the sample from the last frame
	const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));
	// the eye linear depth from the previous frame
	const float oldEyeSpaceDepth = oldPixel.w;
	
	// projected linear depth
	const float3 oldViewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
	const float invlen = 1.0f / length(oldViewVec);
	const float projectedEyeSpaceDepth = invlen * length(translatedPos);

	float depthDif = (abs(eyeSpaceDepth - sampleEyeSpaceDepth) > 1.0f) ? 
		0 : abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);

	return depthDif;
}


/** This shader computes the reprojection and stores 
	reprojected color / depth values as well as a boolean that 
*/
inline float2 temporalSmoothing(float4 worldPos,
								float eyeSpaceDepth,
								float2 texcoord0,
								float3 oldEyePos,
								sampler2D oldTex,
								float4x4 oldModelViewProj,
								float temporalCoherence,
								sampler2D colors,
								float3 bl,
								float3 br,
								float3 tl,
								float3 tr, 
								float3 projPos,
								float invW,
								sampler2D noiseTex,
								float2 samples[NUM_SAMPLES],
								float scaleFactor,
								float3 oldbl,
								float3 oldbr,
								float3 oldtl,
								float3 oldtr,
								float3 diffVec
								)
{
	// compute position from old frame for dynamic objects + translational portion
	const float3 translatedPos = diffVec - oldEyePos + worldPos.xyz;


	/////////////////
	//-- reproject into old frame and calculate texture position of sample in old frame

	// note: the old model view matrix only holds the view orientation part
	float4 backProjPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
	backProjPos /= backProjPos.w;
	
	// fit from unit cube into 0 .. 1
	const float2 oldTexCoords = backProjPos.xy * 0.5f + 0.5f;
	// retrieve the sample from the last frame
	const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));

#if USE_EYESPACE_DEPTH

	// calculate eye space position of sample in old frame
	const float oldEyeSpaceDepth = oldPixel.w;

	// vector from eye pos to old sample 
	const float3 viewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
	const float invLen = 1.0f / length(viewVec);
	const float projectedEyeSpaceDepth = invLen * length(translatedPos);
	//const float projectedEyeSpaceDepth = length(translatedPos);
	
	const float depthDif = abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);

#else

	// calculate eye space position of sample in old frame
	const float oldDepth = oldPixel.w;
	// the depth projected into the old frame
	const float projectedDepth = projPos.z;
	// calculate depth difference 
	const float depthDif = abs(projectedDepth - oldDepth);

#endif

	float notValid = 0.5f;
	float overallDepth = 0;

#if 1
	const float squaredLen = diffVec.x * diffVec.x + diffVec.y * diffVec.y + diffVec.z * diffVec.z;

	if (squaredLen < 1e-8f) // object not dynamic
	{
		for (int i = 0; i < NUM_SAMPLES; ++ i) 
		{
			float sampleDif = ComputeDifference(samples[i],
												oldTex,
												oldModelViewProj,
												colors,
												noiseTex,
												scaleFactor,
												bl, br, tl, tr, 
												texcoord0,
												oldEyePos,
												oldbl, oldbr, oldtl, oldtr,
												eyeSpaceDepth
												);
			//overallDepth += sampleDif;
			if (sampleDif >= MIN_DEPTH_DIFF) ++ notValid;
		}
	}
#endif

	const float oldWeight = clamp(oldPixel.y, .0f, temporalCoherence);
	//const float oldWeight = oldPixel.y;

	float newWeight;

	if ((temporalCoherence > 1e-6f)
		&& (oldTexCoords.x >= 0.0f) && (oldTexCoords.x < 1.0f)
		&& (oldTexCoords.y >= 0.0f) && (oldTexCoords.y < 1.0f)
		&& (depthDif <= MIN_DEPTH_DIFF) 
		// if visibility changed in the surrounding area we have to recompute
		//&& (oldNumSamples > 0.8f * newNumSamples)
		//&& (notValid < 1.0f)
		)
	{
		// increase the weight for convergence
		newWeight = oldWeight + 1.0f;
		if (notValid > 1.0f) newWeight = 4.0f;
		//if (notValid > 1.0f) newWeight = max(15.0f - notValid * 2.0f, 1.0f);
	}
	else
	{	
		newWeight = 1.0f;
	}

	//if (oldPixel.y >= 2000)
	//	newWeight = min(temporalCoherence + 1, max(oldPixel.y - 70, 50));
	//if (newWeight >= 2000) newWeight = 1000;
	//newWeight -= step(512.0f, newWeight) * 256.0f;

	return float2(oldPixel.x, newWeight);
}


/** The ssao shader returning the an intensity value between 0 and 1
*/
float2 ssao(fragment IN,
			sampler2D colors,
			sampler2D noiseTex,
			float2 samples[NUM_SAMPLES],
			float3 normal,
			float3 centerPosition,
			float scaleFactor,
			float3 bl,
			float3 br,
			float3 tl,
			float3 tr, 
			float3 viewDir
			, float2 noiseOffs
			, sampler2D noiseTex1D
			)
{
	// Check in a circular area around the current position.
	// Shoot vectors to the positions there, and check the angle to these positions.
	// Summing up these angles gives an estimation of the occlusion at the current position.

	float total_ao = .0f;
	float numSamples = .0f;


	//float2 jitter = tex2Dlod(noiseTex1D, float4(IN.texCoord.x * 4.0f + noiseOffs.x, 0.5f, 0, 0)).xy;
	//float2 jitter = tex2Dlod(noiseTex1D, float4(noiseOffs.x, 0.5f, 0, 0)).xy;

	for (int i = 0; i < NUM_SAMPLES; ++ i) 
	{
		const float2 offset = samples[i];

#if 1
		////////////////////
		//-- add random noise: reflect around random normal vector (rather slow!)

		//float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f + noiseOffs, 0, 0)).xy;
		float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord, .0f, .0f)).xy;
		//float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
		const float2 offsetTransformed = myreflect(offset, mynoise);
#else
		const float2 offsetTransformed = offset;
#endif
		// weight with projected coordinate to reach similar kernel size for near and far
		//const float2 texcoord = IN.texCoord.xy + offsetTransformed * scaleFactor + jitter;
		const float2 texcoord = IN.texCoord.xy + offsetTransformed * scaleFactor;

		//if ((texcoord.x <= 1.0f) && (texcoord.x >= 0.0f) && (texcoord.y <= 1.0f) && (texcoord.y >= 0.0f)) ++ numSamples;
		const float3 samplePos = ReconstructSamplePos(colors, texcoord, bl, br, tl, tr);


		////////////////
		//-- compute contribution of sample using the direction and angle

		float3 dirSample = samplePos - centerPosition;
		const float lengthToSample = max(length(dirSample), 1e-6f);

		dirSample /= lengthToSample; // normalize

		// angle between current normal and direction to sample controls AO intensity.
		float cosAngle = max(dot(dirSample, normal), .0f);
	
		// the distance_scale offset is used to avoid singularity that occurs at global illumination when 
		// the distance to a sample approaches zero
		const float aoContrib = SAMPLE_INTENSITY / (DISTANCE_SCALE + lengthToSample * lengthToSample);
		//const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;

#if 1
		// if surface normal perpenticular to view dir, approx. half of the samples will not count
		// => compensate for this (on the other hand, projected sampling area could be larger!)

		const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
		total_ao += cosAngle * aoContrib * viewCorrection;
#else
		total_ao += cosAngle * aoContrib;
#endif
	}

	return float2(max(0.0f, 1.0f - total_ao), numSamples);
}


/** The mrt shader for screen space ambient occlusion
*/
pixel main(fragment IN, 
		   uniform sampler2D colors,
		   uniform sampler2D normals,
		   uniform sampler2D noiseTex,
		   uniform float2 samples[NUM_SAMPLES],
		   uniform sampler2D oldTex,
		   uniform float4x4 modelViewProj,
		   uniform float4x4 oldModelViewProj,
		   uniform float temporalCoherence,
		   uniform float3 bl,
		   uniform float3 br,
		   uniform float3 tl,
		   uniform float3 tr,
		   uniform float3 oldEyePos,
		   uniform float3 oldbl,
		   uniform float3 oldbr,
		   uniform float3 oldtl,
		   uniform float3 oldtr,
		   uniform sampler2D attribsTex,
		   uniform sampler2D noiseTex1D
		   )
{
	pixel OUT;

	const float3 normal = normalize(tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz);

	// reconstruct position from the eye space depth
	const float3 viewDir = IN.view;
	const float eyeSpaceDepth = tex2Dlod(colors, float4(IN.texCoord, 0, 0)).w;
	const float4 eyeSpacePos = float4(-viewDir * eyeSpaceDepth, 1.0f);

	/*const float xoffs = 2.0f / 1024.0f;
	const float yoffs = 2.0f / 768.0f;

	//float3 id = tex2Dlod(attribsTex, float4(IN.texCoord, 0, 0)).xyz;
	float3 x1 = tex2Dlod(attribsTex, float4(IN.texCoord, 0, 0)).xyz;
	float3 x2 = tex2Dlod(attribsTex, float4(IN.texCoord + float2(xoffs, 0), 0, 0)).xyz;
	float3 x3 = tex2Dlod(attribsTex, float4(IN.texCoord + float2(0, yoffs), 0, 0)).xyz;
	float3 x4 = tex2Dlod(attribsTex, float4(IN.texCoord + float2(-xoffs, 0), 0, 0)).xyz;
	float3 x5 = tex2Dlod(attribsTex, float4(IN.texCoord + float2(0, -yoffs), 0, 0)).xyz;

	float3 diffVec = (x1+x2+x3+x4+x5) * .25f;
*/
	float3 diffVec = tex2Dlod(attribsTex, float4(IN.texCoord, 0, 0)).xyz;
	

	////////////////
	//-- calculcate the current projected posiion (also used for next frame)
	
	float4 projPos = mul(modelViewProj, eyeSpacePos);
	const float invw = 1.0f / projPos.w;
	projPos *= invw;
	float scaleFactor = SAMPLE_RADIUS * invw;

	
	/////////////////
	//-- compute temporal reprojection

	float2 temporalVals = temporalSmoothing(eyeSpacePos, eyeSpaceDepth, IN.texCoord, oldEyePos,
	                                        oldTex, oldModelViewProj, temporalCoherence,
											colors, 
											bl, br, tl, tr, 
											projPos.xyz, 
											invw, 
											noiseTex, 
											samples, 
											scaleFactor, 
											oldbl, oldbr, oldtl, oldtr,
											diffVec
											);

	const float oldSsao = temporalVals.x;
	//const float newWeight = clamp(temporalVals.y, 1.0f, temporalCoherence);
	const float newWeight = temporalVals.y;

	//float2 noiseOffs = float2((temporalVals.y - 1)/ 139.0f, .0f);
	float2 noiseOffs = float2(.0f);

	float2 ao;

	// note: this should be done with the stencil buffer
	if (eyeSpaceDepth < 1e10f)
	{
		ao = ssao(IN, colors, noiseTex, samples, normal, 
		          eyeSpacePos.xyz, scaleFactor, bl, br, tl, tr, normalize(viewDir), noiseOffs, noiseTex1D);
	}
	else
	{
		 ao = float2(1.0f, 0);
	}

	OUT.illum_col.x = (ao.x + oldSsao * (newWeight - 1.0f)) / newWeight;
	OUT.illum_col.y = newWeight;
	OUT.illum_col.z = invw;
	OUT.illum_col.w = eyeSpaceDepth;

	return OUT;
}