Context Navigation

ssao.cg @ 3342

Revision 3342, 19.0 KB checked in by mattausch, 15 years ago (diff)
found error

Line
1	#include "../shaderenv.h"
2	#include "common.h"
3
4	////////////////////
5	// Screen Spaced Ambient Occlusion shader
6	// based on shader of Alexander Kusternig
7
8
9	#define USE_EYESPACE_DEPTH 1
10
11
12	struct fragment
13	{
14	float2 texCoord: TEXCOORD0;
15	float3 view: TEXCOORD1;
16	};
17
18
19	struct pixel2
20	{
21	float4 illum_col: COLOR0;
22	float4 col: COLOR1;
23	};
24
25
26	// this function is inspired from the paper of shamulgaan in order
27	// to get a physical expression for the occlusion culling
28	inline float occlusionPower(float radius, float dist)
29	{
30	return 6.283185307179586476925286766559f * (1.0f - cos(asin(radius / dist)));
31	}
32
33
34
35	// reconstruct world space position
36	inline float3 ReconstructSamplePos(float eyeSpaceDepth,
37	float2 texcoord,
38	float3 bl, float3 br, float3 tl, float3 tr)
39	{
40	float3 viewVec = Interpol(texcoord, bl, br, tl, tr);
41	float3 samplePos = -viewVec * eyeSpaceDepth;
42
43	return samplePos;
44	}
45
46
47	float ComputeConvergence(uniform sampler2D tex, float2 texCoord, float2 res)
48	{
49	// get the minimum convergence by exactly sampling the 4 surrounding
50	// texels in the old texture, otherwise flickering because convergence
51	// will be interpolated when upsampling and filter size does not match!
52
53	const float2 invRes = float2(1.0f / res.x, 1.0f / res.y);
54
55	// get position exactly between texel centers
56	float2 center = (floor(texCoord * res) + float2(.5f)) * texCoord;
57	//center.x = (floor(texCoord.x * res.x - .5f) + 1.0f) / res.x;
58	//center.y = (floor(texCoord.y * res.y - .5f) + 1.0f) / res.y;
59	//center.y = (floor(texCoord.y * res.y) + .5f) * yOffs;
60
61	/*texelCenterConv.x = tex2Dlod(tex, float4(center + float2( xoffs, yoffs), 0, 0)).y;
62	texelCenterConv.y = tex2Dlod(tex, float4(center + float2( xoffs, -yoffs), 0, 0)).y;
63	texelCenterConv.z = tex2Dlod(tex, float4(center + float2(-xoffs, -yoffs), 0, 0)).y;
64	texelCenterConv.w = tex2Dlod(tex, float4(center + float2(-xoffs, yoffs), 0, 0)).y;
65
66	const float m1 = min(texelCenterConv.x, texelCenterConv.y);
67	const float m2 = min(texelCenterConv.z, texelCenterConv.w);
68
69	const float convergence = min(m1, m2);*/
70
71	//const float convergence = tex2Dlod(tex, float4(center, 0, 0)).y;
72	const float convergence = tex2Dlod(tex, float4(texCoord, 0, 0)).y;
73
74	return convergence;
75	}
76
77	/** This shader computes the reprojection and stores
78	the ssao value of the old pixel as well as the
79	weight of the pixel in the new frame.
80	*/
81	inline float3 Reproject(float4 worldPos,
82	float eyeSpaceDepth,
83	float2 texcoord0,
84	float3 oldEyePos,
85	sampler2D oldTex,
86	float4x4 oldModelViewProj,
87	sampler2D colors,
88	float3 projPos,
89	float invW,
90	float3 oldbl,
91	float3 oldbr,
92	float3 oldtl,
93	float3 oldtr,
94	float3 diffVec
95	)
96	{
97	// compute position from old frame for dynamic objects + translational portion
98	const float3 translatedPos = diffVec - oldEyePos + worldPos.xyz;
99
100
101	/////////////////
102	//-- reproject into old frame and calculate texture position of sample in old frame
103
104	// note: the old model view matrix only holds the view orientation part
105	float4 backProjPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
106	backProjPos /= backProjPos.w;
107
108	// fit from unit cube into 0 .. 1
109	const float2 oldTexCoords = backProjPos.xy * 0.5f + 0.5f;
110	// retrieve the sample from the last frame
111	const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));
112
113	// the ssao value in the old frame
114	const float ssao = oldPixel.x;
115
116	// calculate eye space position of sample in old frame
117	const float oldEyeSpaceDepth = oldPixel.w;
118
119	// vector from eye pos to old sample
120	const float3 viewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
121	const float invLen = 1.0f / length(viewVec);
122	const float projectedEyeSpaceDepth = invLen * length(translatedPos);
123	//const float projectedEyeSpaceDepth = length(translatedPos);
124
125	const float depthDif = abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);
126
127	// the weight of the accumulated samples from the previous frames
128	float w;
129	float idx;
130
131
132	//////////////
133	//-- reuse old value only if it was still valid in the old frame
134
135	if (1
136	&& (oldTexCoords.x > 0) && (oldTexCoords.x < 1.0f)
137	&& (oldTexCoords.y > 0) && (oldTexCoords.y < 1.0f)
138	&& (depthDif <= MIN_DEPTH_DIFF)
139	)
140	{
141	// pixel valid => retrieve the convergence weight
142	/*float w1 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
143	float w2 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
144	float w3 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0, 0.5f / 768.0f), .0f, .0f)).y;
145	float w4 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0, 0.5f / 768.0f), .0f, .0f)).y;
146
147	w = min(min(w1, w2), min(w3, w4));*/
148
149	//w = ComputeConvergence(oldTex, oldTexCoords, float2(1024.0f, 768.0f));
150	w = oldPixel.y;
151	idx = floor(oldPixel.z);
152
153	}
154	else
155	{
156	w = 0.0f;
157	idx = .0f;
158	}
159
160	return float3(ssao, w, idx);
161	}
162
163
164	/** The ssao shader returning the an intensity value between 0 and 1.
165	This version of the ssao shader uses the dotproduct between
166	pixel-to-sample direction and sample normal as weight.
167
168	The algorithm works like the following:
169	1) Check in a circular area around the current position.
170	2) Shoot vectors to the positions there, and check the angle to these positions.
171	3) Summing up these angles gives an estimation of the occlusion at the current position.
172	*/
173	float3 ssao2(fragment IN,
174	sampler2D colors,
175	sampler2D noiseTex,
176	sampler2D samples,
177	float3 normal,
178	float3 centerPosition,
179	float radius,
180	float3 bl,
181	float3 br,
182	float3 tl,
183	float3 tr,
184	float3 viewDir,
185	float convergence,
186	float sampleIntensity,
187	bool isMovingObject,
188	sampler2D normalTex,
189	float idx
190	)
191	{
192	float total_ao = .0f;
193	float validSamples = .0f;
194	float numSamples = .0f;
195
196	for (int i = 0; i < NUM_SAMPLES; ++ i)
197	{
198	float2 offset;
199
200	const float2 ssaoOffset =
201	tex2Dlod(samples, float4((0.5f + i + idx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;
202
203	////////////////////
204	//-- add random noise: reflect around random normal vector
205	//-- (affects performance for some reason!)
206
207	if (convergence < SSAO_CONVERGENCE_THRESHOLD)
208	{
209	float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
210	//offset = myreflect(samples[i], mynoise);
211	//offset = myrotate(samples[i], mynoise.x);
212	offset = myrotate(ssaoOffset, mynoise.x);
213	}
214	else
215	{
216	offset = ssaoOffset;
217	}
218
219	// weight with projected coordinate to reach similar kernel size for near and far
220	const float2 texcoord = IN.texCoord.xy + offset * radius;
221
222	const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
223	const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
224
225
226	////////////////
227	//-- compute contribution of sample using the direction and angle
228
229	float3 dirSample = samplePos - centerPosition;
230
231	const float minDist = 1e-6f;
232	const float delta = 1e-3f;
233
234	const float lengthToSample = length(dirSample);
235	const float sampleWeight = 1.0f / (lengthToSample + delta);
236
237	dirSample /= max(lengthToSample, minDist); // normalize
238
239
240	// angle between current normal and direction to sample controls AO intensity.
241	const float cosAngle = dot(dirSample, normal);
242
243	// the normal of the current sample
244	const float3 sampleNormal = normalize(tex2Dlod(normalTex, float4(texcoord, 0, 0)).xyz);
245
246	// angle between current normal and direction to sample controls AO intensity.
247	//const float cosAngle2 = dot(-dirSample, sampleNormal);
248	const float cosAngle2 = .5f + dot(sampleNormal, -normal) * .5f;
249
250	dirSample *= minDist;
251	const float aoContrib = sampleIntensity * sampleWeight;
252
253	//const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
254	//total_ao += max(cosAngle, .0f) * max(cosAngle2, .0f) * aoContrib;
255	total_ao += max(cosAngle, .0f) * cosAngle2 * aoContrib;
256
257	++ numSamples;
258
259	// check if the samples have been valid in the last frame
260	// only mark sample as invalid if in the last / current frame
261	// they possibly have any influence on the ao
262
263	const float changeFactor = sampleColor.y;
264	const float pixelValid = sampleColor.x;
265
266	// hack:
267	// we check if the sample could have been near enough
268	// to the current pixel or if the angle is small enough
269	// to have any influence in the current or last frame
270	#if 1
271	const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
272	const float partlyResetThres = 1.0f;
273
274	if (pixelValid <= partlyResetThres)
275	validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
276	else
277	validSamples = max(validSamples, pixelValid);
278	#endif
279
280	#ifdef USE_GTX
281	// we can bail out early and use a minimal #samples)
282	// if some conditions are met as long as the hardware supports it
283	if (numSamples >= MIN_SAMPLES)
284	{
285	//break;
286	// if the pixel belongs to a static object and all the samples stay valid in the current frame
287	if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
288	// if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high
289	// (=> there was no discontinuity recently)
290	//else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
291	else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
292	}
293	#endif
294	}
295
296	// "normalize" ao contribution
297	total_ao /= numSamples;
298
299	#if 1
300	// if surface normal perpenticular to view dir, approx. half of the samples will not count
301	// => compensate for this (on the other hand, projected sampling area could be larger!)
302	const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
303	total_ao *= viewCorrection;
304	#endif
305
306	//return float3(total_ao, validSamples, numSamples);
307	return float3(min(1.0f, total_ao), validSamples, numSamples);
308	}
309
310
311	/** The ssao shader returning the an intensity value between 0 and 1.
312	This version of the ssao shader uses the dotproduct between
313	pixel-to-sample direction and sample normal as weight.
314
315	The algorithm works like the following:
316	1) Check in a circular area around the current position.
317	2) Shoot vectors to the positions there, and check the angle to these positions.
318	3) Summing up these angles gives an estimation of the occlusion at the current position.
319	*/
320	float3 ssao(fragment IN,
321	sampler2D colors,
322	sampler2D noiseTex,
323	sampler2D samples,
324	float3 normal,
325	float3 centerPosition,
326	float radius,
327	float3 bl,
328	float3 br,
329	float3 tl,
330	float3 tr,
331	float3 viewDir,
332	float convergence,
333	float sampleIntensity,
334	bool isMovingObject,
335	float oldIdx
336	)
337	{
338	float total_ao = .0f;
339	float validSamples = .0f;
340	float numSamples = .0f;
341
342	for (int i = 0; i < NUM_SAMPLES; ++ i)
343	{
344	float2 offset;
345
346	const float2 ssaoOffset =
347	tex2Dlod(samples, float4((0.5f + i + oldIdx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;
348
349	////////////////////
350	//-- add random noise: reflect around random normal vector
351	//-- (affects performance for some reason!)
352
353	if (convergence < SSAO_CONVERGENCE_THRESHOLD)
354	{
355	float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
356	//offset = myreflect(samples[i], mynoise);
357	//offset = myrotate(samples[i], mynoise.x);
358	offset = myrotate(ssaoOffset, mynoise.x);
359	}
360	else
361	{
362	offset = ssaoOffset;
363	}
364
365
366	// weight with projected coordinate to reach similar kernel size for near and far
367	const float2 texcoord = IN.texCoord.xy + offset * radius;
368
369	const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
370	const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
371
372
373	////////////////
374	//-- compute contribution of sample using the direction and angle
375
376	float3 dirSample = samplePos - centerPosition;
377
378	const float minDist = 1e-6f;
379	const float delta = 1e-3f;
380
381	const float lengthToSample = length(dirSample);
382	const float sampleWeight = 1.0f / (lengthToSample + delta);
383
384	dirSample /= max(length(dirSample), minDist); // normalize
385
386	// angle between current normal and direction to sample controls AO intensity.
387	const float cosAngle = dot(dirSample, normal);
388
389	//const float aoContrib = sampleIntensity / sqrLen;
390	const float aoContrib = sampleIntensity * sampleWeight;
391	//const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
392
393	total_ao += max(cosAngle, .0f) * aoContrib;
394
395	++ numSamples;
396
397	#ifdef PERFORMANCE_TEST
398	// check if the samples have been valid in the last frame
399	// only mark sample as invalid if in the last / current frame
400	// they possibly have any influence on the ao
401
402	const float changeFactor = sampleColor.y;
403	const float pixelValid = sampleColor.x;
404
405	// hack:
406	// we check if the sample could have been near enough to the current pixel
407	// or if the angle is small enough
408	// to have any influence in the current or last frame
409
410	#if 1
411	const float partlyResetThres = 1.0f;
412
413	const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
414	if (0)//pixelValid <= partlyResetThres)
415	validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
416	else
417	validSamples = max(validSamples, pixelValid);
418	#endif
419
420	#ifdef USE_GTX
421	// we can bail out early and use a minimal #samples)
422	// if some conditions are met as long as the hardware supports it
423	if (numSamples >= MIN_SAMPLES)
424	{
425	//break;
426	// if the pixel belongs to a static object and all the samples stay valid in the current frame
427	if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
428	// if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high
429	// (=> there was no discontinuity recently)
430	//else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
431	else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
432	}
433	#endif
434
435	#endif // PERFORMANCE_TEST
436	}
437
438	// "normalize" ao contribution
439	total_ao /= numSamples;
440
441	#if 1
442	// if surface normal perpenticular to view dir, approx. half of the samples will not count
443	// => compensate for this (on the other hand, projected sampling area could be larger!)
444	const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
445	total_ao *= viewCorrection;
446	#endif
447
448	//return float3(total_ao, validSamples, numSamples);
449	return float3(min(1.0f, total_ao), validSamples, numSamples);
450	}
451
452
453
454	/** The mrt shader for screen space ambient occlusion
455	*/
456	pixel2 main(fragment IN,
457	uniform sampler2D colors,
458	uniform sampler2D normals,
459	uniform sampler2D noiseTex,
460	uniform sampler2D samples,
461	uniform sampler2D oldTex,
462	uniform float4x4 modelViewProj,
463	uniform float4x4 oldModelViewProj,
464	uniform float temporalCoherence,
465	uniform float3 bl,
466	uniform float3 br,
467	uniform float3 tl,
468	uniform float3 tr,
469	uniform float3 oldEyePos,
470	uniform float3 oldbl,
471	uniform float3 oldbr,
472	uniform float3 oldtl,
473	uniform float3 oldtr,
474	uniform sampler2D attribsTex,
475	uniform float kernelRadius,
476	uniform float sampleIntensity
477	)
478	{
479	pixel2 OUT;
480
481	//const float3 normal = normalize(tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz);
482	const float3 normal = tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz;
483
484	// reconstruct position from the eye space depth
485	const float3 viewDir = IN.view;
486	const float eyeSpaceDepth = tex2Dlod(colors, float4(IN.texCoord, 0, 0)).w;
487	const float4 eyeSpacePos = float4(-viewDir * eyeSpaceDepth, 1.0f);
488
489
490	////////////////
491	//-- calculcate the current projected posiion (also used for next frame)
492
493	float4 projPos = mul(modelViewProj, eyeSpacePos);
494	const float invw = 1.0f / projPos.w;
495	projPos *= invw;
496
497	//const float radiusMult = kernelRadius;
498	//const float radiusMult = 3e-2;
499	const float radiusMult = kernelRadius * invw;
500
501	#ifdef PERFORMANCE_TEST
502
503	float3 diffVec = tex2Dlod(attribsTex, float4(IN.texCoord, 0, 0)).xyz;
504
505	const float sqrMoveSpeed = SqrLen(diffVec);
506	const bool isMovingObject = (sqrMoveSpeed > DYNAMIC_OBJECTS_THRESHOLD);
507
508
509	/////////////////
510	//-- compute temporal reprojection
511
512	float3 temporalVals = Reproject(eyeSpacePos, eyeSpaceDepth, IN.texCoord, oldEyePos,
513	oldTex, oldModelViewProj,
514	colors,
515	projPos.xyz,
516	invw,
517	oldbl, oldbr, oldtl, oldtr,
518	diffVec
519	);
520
521	const float oldSsao = temporalVals.x;
522
523	float oldWeight = temporalVals.y;
524	float oldIdx = /temporalCoherence > 1 ? /temporalVals.z/* : 0*/;
525
526	#else
527
528	const bool isMovingObject = false;
529	const float oldSsao = 0;
530
531	float oldWeight = 0;
532	float oldIdx = 0;
533
534	#endif
535
536	float3 ao;
537
538	// cull background note: this should be done with the stencil buffer
539	if (eyeSpaceDepth < DEPTH_THRESHOLD)
540	{
541	if (1)
542	{
543	ao = ssao(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz,
544	radiusMult, bl, br, tl, tr, normalize(viewDir),
545	oldWeight, sampleIntensity, isMovingObject, oldIdx);
546	}
547	else
548	{
549	ao = ssao2(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, radiusMult,
550	bl, br, tl, tr, normalize(viewDir), oldWeight, sampleIntensity,
551	isMovingObject, normals, oldIdx);
552	}
553	}
554	else
555	{
556	ao = float3(1.0f, 1.0f, 1.0f);
557	}
558
559
560	#ifdef PERFORMANCE_TEST
561
562	///////////
563	//-- check if we have to reset pixel because one of the sample points was invalid
564	//-- only do this if the current pixel does not belong to a moving object
565
566	// the weight equals the number of sampled shot in this pass
567	const float newWeight = ao.z;
568
569	// completely reset the ao in this pixel
570	const float completelyResetThres = 20.0f;
571	// don't fully reset the ao in this pixel, but give low weight to old solution
572	const float partlyResetThres = 1.0f;
573
574	// don't check for moving objects, otherwise almost no coherence
575	if (!isMovingObject)
576	{
577	if (ao.y > completelyResetThres)
578	{
579	oldWeight = .0f;
580	oldIdx = .0f;
581	}
582	else if (ao.y > partlyResetThres)
583	{
584	oldWeight = min(oldWeight, 4.0f * newWeight);
585	//oldWeight = .0f;
586	//oldIdx = .0f;
587	}
588	}
589
590
591	//////////
592	//-- blend ao between old and new samples (and avoid division by zero)
593
594	OUT.illum_col.x = (ao.x * newWeight + oldSsao * oldWeight);
595	OUT.illum_col.x /= (newWeight + oldWeight);
596
597	// the new weight for the next frame
598	const float combinedWeight = clamp(newWeight + oldWeight, .0f, temporalCoherence);
599
600	OUT.illum_col.y = combinedWeight;
601	OUT.illum_col.z = oldIdx + 8;//newWeight; // the new index
602	OUT.illum_col.w = eyeSpaceDepth;
603
604	//if (OUT.illum_col.z > 1000) OUT.illum_col.z = 0;
605
606	// this value can be used to check if this pixel belongs to a moving object
607	OUT.col.x = SqrLen(diffVec);
608	//OUT.illum_col.z = SqrLen(diffVec);
609
610	#else
611
612	OUT.illum_col.x = ao.x;
613	OUT.illum_col.w = eyeSpaceDepth;
614
615	#endif
616
617	return OUT;
618	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format