#include "../shaderenv.h"
#include "common.h"

////////////////////
// Screen Spaced Ambient Occlusion shader
// based on shader of Alexander Kusternig


#define USE_EYESPACE_DEPTH 1


struct fragment
{
	float2 texCoord: TEXCOORD0; 
	float3 view: TEXCOORD1;
};


struct pixel2
{
	float4 illum_col: COLOR0;
	//float4 col: COLOR1;
};


// this function is inspired from the paper of shamulgaan in order
// to get a physical expression for the occlusion culling
inline float occlusionPower(float radius, float dist)
{
	return 6.283185307179586476925286766559f * (1.0f - cos(asin(radius / dist)));
}



// reconstruct world space position
inline float3 ReconstructSamplePos(float eyeSpaceDepth,
								   float2 texcoord, 
								   float3 bl, float3 br, float3 tl, float3 tr)
{
	float3 viewVec = Interpol(texcoord, bl, br, tl, tr);
	float3 samplePos = -viewVec * eyeSpaceDepth;

	return samplePos;
}


float ComputeConvergence(uniform sampler2D tex, float2 texCoord, float2 res)
{
	// get the minimum convergence by exactly sampling the 4 surrounding
	// texels in the old texture, otherwise flickering because convergence
	// will be interpolated when upsampling and filter size does not match!

	const float2 invRes = float2(1.0f / res.x, 1.0f / res.y);

	// get position exactly between texel centers
	float2 center = (floor(texCoord * res) + float2(.5f)) * texCoord;
	//center.x = (floor(texCoord.x * res.x - .5f) + 1.0f) / res.x;
	//center.y = (floor(texCoord.y * res.y - .5f) + 1.0f) / res.y;
	//center.y = (floor(texCoord.y * res.y) + .5f) * yOffs;

	/*texelCenterConv.x = tex2Dlod(tex, float4(center + float2( xoffs,  yoffs), 0, 0)).y;
	texelCenterConv.y = tex2Dlod(tex, float4(center + float2( xoffs, -yoffs), 0, 0)).y;
	texelCenterConv.z = tex2Dlod(tex, float4(center + float2(-xoffs, -yoffs), 0, 0)).y;
	texelCenterConv.w = tex2Dlod(tex, float4(center + float2(-xoffs,  yoffs), 0, 0)).y;

	const float m1 = min(texelCenterConv.x, texelCenterConv.y);
	const float m2 = min(texelCenterConv.z, texelCenterConv.w);

	const float convergence = min(m1, m2);*/

	//const float convergence = tex2Dlod(tex, float4(center, 0, 0)).y;
	const float convergence = tex2Dlod(tex, float4(texCoord, 0, 0)).y;

	return convergence;
}

/** This shader computes the reprojection and stores 
	the ssao value of the old pixel as well as the
	weight of the pixel in the new frame.
*/
inline float3 Reproject(float4 worldPos,
						float eyeSpaceDepth,
						float2 texcoord0,
						float3 oldEyePos,
						sampler2D oldTex,
						float4x4 oldModelViewProj,
						sampler2D colors,
						float3 projPos,
						float invW,
						float3 oldbl,
						float3 oldbr,
						float3 oldtl,
						float3 oldtr,
						float3 diffVec
						)
{
	// compute position from old frame for dynamic objects + translational portion
	const float3 translatedPos = diffVec - oldEyePos + worldPos.xyz;


	/////////////////
	//-- reproject into old frame and calculate texture position of sample in old frame

	// note: the old model view matrix only holds the view orientation part
	float4 backProjPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
	backProjPos /= backProjPos.w;
	
	// fit from unit cube into 0 .. 1
	const float2 oldTexCoords = backProjPos.xy * 0.5f + 0.5f;
	// retrieve the sample from the last frame
	const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));

	// the ssao value in the old frame
	const float ssao = oldPixel.x;

	// calculate eye space position of sample in old frame
	const float oldEyeSpaceDepth = oldPixel.w;

	// vector from eye pos to old sample
	const float3 viewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
	const float invLen = 1.0f / length(viewVec);
	const float projectedEyeSpaceDepth = invLen * length(translatedPos);
	//const float projectedEyeSpaceDepth = length(translatedPos);
	
	const float depthDif = abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);

	// the weight of the accumulated samples from the previous frames
	float w;
	float idx;


	//////////////
	//-- reuse old value only if it was still valid in the old frame

	if (1
		&& (oldTexCoords.x > 0) && (oldTexCoords.x < 1.0f)
		&& (oldTexCoords.y > 0) && (oldTexCoords.y < 1.0f)
		&& (depthDif <= MIN_DEPTH_DIFF) 
		)
	{
		// pixel valid => retrieve the convergence weight
		/*float w1 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
		float w2 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
		float w3 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0, 0.5f / 768.0f), .0f, .0f)).y;
		float w4 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0, 0.5f / 768.0f), .0f, .0f)).y;

		w = min(min(w1, w2), min(w3, w4));*/
		
		//w = ComputeConvergence(oldTex, oldTexCoords, float2(1024.0f, 768.0f));
		//w = floor(oldPixel.y);
		//w = oldPixel.y;
		idx = floor(oldPixel.y);
	}
	else
	{	
		w = .0f;
		idx = .0f;
	}

	return float3(ssao, w, idx);
}


/** The ssao shader returning the an intensity value between 0 and 1.
	This version of the ssao shader uses the dotproduct between 
	pixel-to-sample direction and sample normal as weight.

    The algorithm works like the following:
	1) Check in a circular area around the current position.
	2) Shoot vectors to the positions there, and check the angle to these positions.
	3) Summing up these angles gives an estimation of the occlusion at the current position.
*/
float3 ssao2(fragment IN,
			 sampler2D colors,
			 sampler2D noiseTex,
			 sampler2D samples,
			 float3 normal,
			 float3 centerPosition,
			 float radius,
			 float3 bl,
			 float3 br,
			 float3 tl,
			 float3 tr, 
			 float3 viewDir,
			 float convergence,
			 float sampleIntensity,
			 bool isMovingObject,
			 sampler2D normalTex,
			 float idx
			 )
{
	float total_ao = .0f;
	float validSamples = .0f;
	float numSamples = .0f;

	for (int i = 0; i < NUM_SAMPLES; ++ i) 
	{
		float2 offset;

		const float2 ssaoOffset = 
			tex2Dlod(samples, float4((0.5f + i + idx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;

		////////////////////
		//-- add random noise: reflect around random normal vector 
		//-- (affects performance for some reason!)

		if (convergence < SSAO_CONVERGENCE_THRESHOLD)
		{
			float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
			//offset = myreflect(samples[i], mynoise);
			//offset = myrotate(samples[i], mynoise.x);
			offset = myrotate(ssaoOffset, mynoise.x);
		}
		else
		{
			offset = ssaoOffset;
		}
		
		// weight with projected coordinate to reach similar kernel size for near and far
		const float2 texcoord = IN.texCoord.xy + offset * radius;

		const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
		const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
		

		////////////////
		//-- compute contribution of sample using the direction and angle

		float3 dirSample = samplePos - centerPosition;

		const float minDist = 1e-6f;
		const float delta = 1e-3f;

		const float lengthToSample = length(dirSample);
		const float sampleWeight = 1.0f / (lengthToSample + delta);

		dirSample /= max(lengthToSample, minDist); // normalize


		// angle between current normal and direction to sample controls AO intensity.
		const float cosAngle = dot(dirSample, normal);

		// the normal of the current sample
		const float3 sampleNormal = normalize(tex2Dlod(normalTex, float4(texcoord, 0, 0)).xyz);
		
		// angle between current normal and direction to sample controls AO intensity.
		//const float cosAngle2 = dot(-dirSample, sampleNormal);
		const float cosAngle2 = .5f + dot(sampleNormal, -normal) * .5f;

		dirSample *= minDist;
		const float aoContrib = sampleIntensity * sampleWeight;

		//const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
		//total_ao += max(cosAngle, .0f) * max(cosAngle2, .0f) * aoContrib;
		total_ao += max(cosAngle, .0f) * cosAngle2 * aoContrib;

		++ numSamples;

		// check if the samples have been valid in the last frame
		// only mark sample as invalid if in the last / current frame
		// they possibly have any influence on the ao

		const float changeFactor = sampleColor.y;
		const float pixelValid = sampleColor.x;

		// hack:
		// we check if the sample could have been near enough 
		// to the current pixel or if the angle is small enough
		// to have any influence in the current or last frame
#if 1
		const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
		const float partlyResetThres = 1.0f;

		if (pixelValid <= partlyResetThres)
			validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
		else
			validSamples = max(validSamples, pixelValid);
#endif

#ifdef USE_GTX
		// we can bail out early and use a minimal #samples)
		// if some conditions are met as long as the hardware supports it
		if (numSamples >= MIN_SAMPLES)
		{
			//break;
			// if the pixel belongs to a static object and all the samples stay valid in the current frame
			if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
			// if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high 
			// (=> there was no discontinuity recently)
			//else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
			else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
		}
#endif
	}

	// "normalize" ao contribution
	total_ao /= numSamples;

#if 1
	// if surface normal perpenticular to view dir, approx. half of the samples will not count
	// => compensate for this (on the other hand, projected sampling area could be larger!)
	const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
	total_ao *= viewCorrection;
#endif

	//return float3(total_ao, validSamples, numSamples);
	return float3(min(1.0f, total_ao), validSamples, numSamples);
}


/** The ssao shader returning the an intensity value between 0 and 1.
	This version of the ssao shader uses the dotproduct between 
	pixel-to-sample direction and sample normal as weight.

    The algorithm works like the following:
	1) Check in a circular area around the current position.
	2) Shoot vectors to the positions there, and check the angle to these positions.
	3) Summing up these angles gives an estimation of the occlusion at the current position.
*/
float3 ssao(fragment IN,
			sampler2D colors,
			sampler2D noiseTex,
			sampler2D samples,
			float3 normal,
			float3 centerPosition,
			float radius,
			float3 bl,
			float3 br,
			float3 tl,
			float3 tr, 
			float3 viewDir,
			float convergence,
			float sampleIntensity,
			bool isMovingObject,
			float oldIdx
			)
{
	float total_ao = .0f;
	float validSamples = .0f;
	float numSamples = .0f;

	for (int i = 0; i < NUM_SAMPLES; ++ i) 
	{
		float2 offset;

		const float2 ssaoOffset = 
			tex2Dlod(samples, float4((0.5f + i + oldIdx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;

		////////////////////
		//-- add random noise: reflect around random normal vector 
		//-- (affects performance for some reason!)

		if (convergence < SSAO_CONVERGENCE_THRESHOLD)
		{
			float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
			//offset = myreflect(samples[i], mynoise);
			//offset = myrotate(samples[i], mynoise.x);
			offset = myrotate(ssaoOffset, mynoise.x);
		}
		else
		{
			offset = ssaoOffset;
		}


		// weight with projected coordinate to reach similar kernel size for near and far
		const float2 texcoord = IN.texCoord.xy + offset * radius;

		const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
		const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
		

		////////////////
		//-- compute contribution of sample using the direction and angle

		float3 dirSample = samplePos - centerPosition;

		const float minDist = 1e-6f;
		const float eps = 1e-3f;

		const float lengthToSample = length(dirSample);
		const float sampleWeight = 1.0f / max(lengthToSample, eps);

		dirSample /= max(length(dirSample), minDist); // normalize

		// angle between current normal and direction to sample controls AO intensity.
		const float cosAngle = dot(dirSample, normal);

		//const float aoContrib = sampleIntensity / sqrLen;
		const float aoContrib = sampleIntensity * sampleWeight;
		//const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;

		total_ao += max(cosAngle, .0f) * aoContrib;

		++ numSamples;

#ifdef PERFORMANCE_TEST
		// check if the samples have been valid in the last frame
		// only mark sample as invalid if in the last / current frame
		// they possibly have any influence on the ao

		const float changeFactor = sampleColor.y;
		const float pixelValid = sampleColor.x;

		// hack:
		// we check if the sample could have been near enough to the current pixel 
		// or if the angle is small enough
		// to have any influence in the current or last frame

#if 1
		const float partlyResetThres = 1.0f;

		const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
		if (0)//pixelValid <= partlyResetThres)
			validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
		else
			validSamples = max(validSamples, pixelValid);
#endif

#ifdef USE_GTX
		// we can bail out early and use a minimal #samples)
		// if some conditions are met as long as the hardware supports it
		if (numSamples >= MIN_SAMPLES)
		{
			//break;
			// if the pixel belongs to a static object and all the samples stay valid in the current frame
			if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
			// if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high 
			// (=> there was no discontinuity recently)
			//else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
			else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
		}
#endif

#endif // PERFORMANCE_TEST
	}

	// "normalize" ao contribution
	total_ao /= numSamples;

#if 1
	// if surface normal perpenticular to view dir, approx. half of the samples will not count
	// => compensate for this (on the other hand, projected sampling area could be larger!)
	const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
	total_ao *= viewCorrection;
#endif

	//return float3(total_ao, validSamples, numSamples);
	return float3(min(1.0f, total_ao), validSamples, numSamples);
}



/** The mrt shader for screen space ambient occlusion
*/
pixel2 main(fragment IN, 
			uniform sampler2D colors,
			uniform sampler2D normals,
			uniform sampler2D noiseTex,
			uniform sampler2D samples,
			uniform sampler2D oldTex,
			uniform float4x4 modelViewProj,
			uniform float4x4 oldModelViewProj,
			uniform float temporalCoherence,
			uniform float3 bl,
			uniform float3 br,
			uniform float3 tl,
			uniform float3 tr,
			uniform float3 oldEyePos,
			uniform float3 oldbl,
			uniform float3 oldbr,
			uniform float3 oldtl,
			uniform float3 oldtr,
			uniform sampler2D attribsTex,
			uniform float kernelRadius,
			uniform float sampleIntensity
			)
{
	pixel2 OUT;

	//const float3 normal = normalize(tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz);
	const float3 normal = tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz;

	// reconstruct position from the eye space depth
	const float3 viewDir = IN.view;
	const float eyeSpaceDepth = tex2Dlod(colors, float4(IN.texCoord, 0, 0)).w;
	const float4 eyeSpacePos = float4(-viewDir * eyeSpaceDepth, 1.0f);


	////////////////
	//-- calculcate the current projected posiion (also used for next frame)
	
	float4 projPos = mul(modelViewProj, eyeSpacePos);
	const float invw = 1.0f / projPos.w;
	projPos *= invw;
	
	//const float radiusMult = kernelRadius;
	//const float radiusMult = 3e-2;
	const float radiusMult = kernelRadius * invw;
	
#ifdef PERFORMANCE_TEST	

	float3 diffVec = tex2Dlod(attribsTex, float4(IN.texCoord, .0f, .0f)).xyz;

	const float sqrMoveSpeed = SqrLen(diffVec);
	const bool isMovingObject = (sqrMoveSpeed > DYNAMIC_OBJECTS_THRESHOLD);

	
	/////////////////
	//-- compute temporal reprojection

	float3 temporalVals = Reproject(eyeSpacePos, eyeSpaceDepth, IN.texCoord, oldEyePos,
	                                oldTex, oldModelViewProj, 
									colors, 
									projPos.xyz, 
									invw, 
									oldbl, oldbr, oldtl, oldtr,
									diffVec
									);
	
	const float oldSsao = temporalVals.x;
	
	//float oldIdx = temporalCoherence > 1 ? temporalVals.y : .0f;
	float oldIdx = temporalVals.y;
	//float oldWeight = temporalVals.y;
	float oldWeight = clamp(oldIdx, 0, temporalCoherence);

#else

	const bool isMovingObject = false;
	const float oldSsao = .0f;
	
	float oldWeight = .0f;
	float oldIdx = .0f;
	
#endif

	float3 ao;

	// cull background note: this should be done with the stencil buffer
	if (eyeSpaceDepth < DEPTH_THRESHOLD)
	{
		if (1)
		{
			ao = ssao(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, 
				      radiusMult, bl, br, tl, tr, normalize(viewDir),
					  oldWeight, sampleIntensity, isMovingObject, oldIdx);
		}
		else
		{
			ao = ssao2(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, radiusMult, 
			           bl, br, tl, tr, normalize(viewDir), oldWeight, sampleIntensity, 
					   isMovingObject, normals, oldIdx);
		}
	}
	else
	{
		 ao = float3(1.0f, 1.0f, 1.0f);
	}


#ifdef PERFORMANCE_TEST

	///////////
	//-- check if we have to reset pixel because one of the sample points was invalid
	//-- only do this if the current pixel does not belong to a moving object

	// the weight equals the number of sampled shot in this pass
	const float newWeight = ao.z;

	// completely reset the ao in this pixel
	const float completelyResetThres = 20.0f;
	// don't fully reset the ao in this pixel, but give low weight to old solution
	const float partlyResetThres = 1.0f;
	
	// don't check for moving objects, otherwise almost no coherence
	if (!isMovingObject)
	{
		if (ao.y > completelyResetThres) 
		{
			oldWeight = .0f;
			oldIdx = .0f;
		}
		else if (ao.y > partlyResetThres)
		{
			//oldWeight = min(oldWeight, 4.0f * NUM_SAMPLES); oldIdx = oldWeight;
			//oldWeight = .0f; oldIdx = .0f;
		}
	}


	//////////
	//-- blend ao between old and new samples (and avoid division by zero)

	OUT.illum_col.x = (ao.x * newWeight + oldSsao * oldWeight);
	OUT.illum_col.x /= (newWeight + oldWeight);

	// the new weight for the next frame
	const float newIdx = newWeight + oldIdx;
	//const float combinedWeight = clamp(newIdx, .0f, temporalCoherence);
	const float combinedWeight = clamp(newIdx, .0f, min(newWeight + oldWeight, temporalCoherence));

	//OUT.illum_col.y = combinedWeight;
	OUT.illum_col.y = newIdx; // the new index
	OUT.illum_col.w = eyeSpaceDepth;

	//if (OUT.illum_col.z > 1000) OUT.illum_col.z = 0;

	// this value can be used to check if this pixel belongs to a moving object
	//OUT.col.x = SqrLen(diffVec);
	OUT.illum_col.z = SqrLen(diffVec);

#else

	OUT.illum_col.x = ao.x;
	OUT.illum_col.w = eyeSpaceDepth;
	
#endif

	return OUT;
}