source: GTP/trunk/App/Demos/Vis/FriendlyCulling/src/shaders/ssao.cg @ 3348

Revision 3348, 19.3 KB checked in by mattausch, 15 years ago (diff)

saving then changing back to 2 targets

Line 
1#include "../shaderenv.h"
2#include "common.h"
3
4////////////////////
5// Screen Spaced Ambient Occlusion shader
6// based on shader of Alexander Kusternig
7
8
9#define USE_EYESPACE_DEPTH 1
10
11
12struct fragment
13{
14        float2 texCoord: TEXCOORD0;
15        float3 view: TEXCOORD1;
16};
17
18
19struct pixel2
20{
21        float4 illum_col: COLOR0;
22        //float4 col: COLOR1;
23};
24
25
26// this function is inspired from the paper of shamulgaan in order
27// to get a physical expression for the occlusion culling
28inline float occlusionPower(float radius, float dist)
29{
30        return 6.283185307179586476925286766559f * (1.0f - cos(asin(radius / dist)));
31}
32
33
34
35// reconstruct world space position
36inline float3 ReconstructSamplePos(float eyeSpaceDepth,
37                                                                   float2 texcoord,
38                                                                   float3 bl, float3 br, float3 tl, float3 tr)
39{
40        float3 viewVec = Interpol(texcoord, bl, br, tl, tr);
41        float3 samplePos = -viewVec * eyeSpaceDepth;
42
43        return samplePos;
44}
45
46
47float ComputeConvergence(uniform sampler2D tex, float2 texCoord, float2 res)
48{
49        // get the minimum convergence by exactly sampling the 4 surrounding
50        // texels in the old texture, otherwise flickering because convergence
51        // will be interpolated when upsampling and filter size does not match!
52
53        const float2 invRes = float2(1.0f / res.x, 1.0f / res.y);
54
55        // get position exactly between texel centers
56        float2 center = (floor(texCoord * res) + float2(.5f)) * texCoord;
57        //center.x = (floor(texCoord.x * res.x - .5f) + 1.0f) / res.x;
58        //center.y = (floor(texCoord.y * res.y - .5f) + 1.0f) / res.y;
59        //center.y = (floor(texCoord.y * res.y) + .5f) * yOffs;
60
61        /*texelCenterConv.x = tex2Dlod(tex, float4(center + float2( xoffs,  yoffs), 0, 0)).y;
62        texelCenterConv.y = tex2Dlod(tex, float4(center + float2( xoffs, -yoffs), 0, 0)).y;
63        texelCenterConv.z = tex2Dlod(tex, float4(center + float2(-xoffs, -yoffs), 0, 0)).y;
64        texelCenterConv.w = tex2Dlod(tex, float4(center + float2(-xoffs,  yoffs), 0, 0)).y;
65
66        const float m1 = min(texelCenterConv.x, texelCenterConv.y);
67        const float m2 = min(texelCenterConv.z, texelCenterConv.w);
68
69        const float convergence = min(m1, m2);*/
70
71        //const float convergence = tex2Dlod(tex, float4(center, 0, 0)).y;
72        const float convergence = tex2Dlod(tex, float4(texCoord, 0, 0)).y;
73
74        return convergence;
75}
76
77/** This shader computes the reprojection and stores
78        the ssao value of the old pixel as well as the
79        weight of the pixel in the new frame.
80*/
81inline float3 Reproject(float4 worldPos,
82                                                float eyeSpaceDepth,
83                                                float2 texcoord0,
84                                                float3 oldEyePos,
85                                                sampler2D oldTex,
86                                                float4x4 oldModelViewProj,
87                                                sampler2D colors,
88                                                float3 projPos,
89                                                float invW,
90                                                float3 oldbl,
91                                                float3 oldbr,
92                                                float3 oldtl,
93                                                float3 oldtr,
94                                                float3 diffVec
95                                                )
96{
97        // compute position from old frame for dynamic objects + translational portion
98        const float3 translatedPos = diffVec - oldEyePos + worldPos.xyz;
99
100
101        /////////////////
102        //-- reproject into old frame and calculate texture position of sample in old frame
103
104        // note: the old model view matrix only holds the view orientation part
105        float4 backProjPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
106        backProjPos /= backProjPos.w;
107       
108        // fit from unit cube into 0 .. 1
109        const float2 oldTexCoords = backProjPos.xy * 0.5f + 0.5f;
110        // retrieve the sample from the last frame
111        const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));
112
113        // the ssao value in the old frame
114        const float ssao = oldPixel.x;
115
116        // calculate eye space position of sample in old frame
117        const float oldEyeSpaceDepth = oldPixel.w;
118
119        // vector from eye pos to old sample
120        const float3 viewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
121        const float invLen = 1.0f / length(viewVec);
122        const float projectedEyeSpaceDepth = invLen * length(translatedPos);
123        //const float projectedEyeSpaceDepth = length(translatedPos);
124       
125        const float depthDif = abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);
126
127        // the weight of the accumulated samples from the previous frames
128        float w;
129        float idx;
130
131
132        //////////////
133        //-- reuse old value only if it was still valid in the old frame
134
135        if (1
136                && (oldTexCoords.x > 0) && (oldTexCoords.x < 1.0f)
137                && (oldTexCoords.y > 0) && (oldTexCoords.y < 1.0f)
138                && (depthDif <= MIN_DEPTH_DIFF)
139                )
140        {
141                // pixel valid => retrieve the convergence weight
142                /*float w1 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
143                float w2 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0.5f / 1024.0f, 0), .0f, .0f)).y;
144                float w3 = tex2Dlod(oldTex, float4(oldTexCoords + float2(0, 0.5f / 768.0f), .0f, .0f)).y;
145                float w4 = tex2Dlod(oldTex, float4(oldTexCoords - float2(0, 0.5f / 768.0f), .0f, .0f)).y;
146
147                w = min(min(w1, w2), min(w3, w4));*/
148               
149                //w = ComputeConvergence(oldTex, oldTexCoords, float2(1024.0f, 768.0f));
150                //w = floor(oldPixel.y);
151                //w = oldPixel.y;
152                idx = floor(oldPixel.y);
153        }
154        else
155        {       
156                w   = .0f;
157                idx = .0f;
158        }
159
160        return float3(ssao, idx, w);
161}
162
163
164/** The ssao shader returning the an intensity value between 0 and 1.
165        This version of the ssao shader uses the dotproduct between
166        pixel-to-sample direction and sample normal as weight.
167
168    The algorithm works like the following:
169        1) Check in a circular area around the current position.
170        2) Shoot vectors to the positions there, and check the angle to these positions.
171        3) Summing up these angles gives an estimation of the occlusion at the current position.
172*/
173float3 ssao2(fragment IN,
174                         sampler2D colors,
175                         sampler2D noiseTex,
176                         sampler2D samples,
177                         float3 normal,
178                         float3 centerPosition,
179                         float radius,
180                         float3 bl,
181                         float3 br,
182                         float3 tl,
183                         float3 tr,
184                         float3 viewDir,
185                         float convergence,
186                         float sampleIntensity,
187                         bool isMovingObject,
188                         sampler2D normalTex,
189                         float idx
190                         )
191{
192        float total_ao = .0f;
193        float validSamples = .0f;
194        float numSamples = .0f;
195
196        for (int i = 0; i < NUM_SAMPLES; ++ i)
197        {
198                float2 offset;
199
200                const float2 ssaoOffset =
201                        tex2Dlod(samples, float4((0.5f + i + idx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;
202
203
204                ////////////////////
205                //-- add random noise: reflect around random normal vector
206                //-- (affects performance for some reason!)
207
208                if (convergence < SSAO_CONVERGENCE_THRESHOLD)
209                {
210                        float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, .0f, .0f)).xy;
211                        //offset = myreflect(samples[i], mynoise);
212                        //offset = myrotate(samples[i], mynoise.x);
213                        offset = myrotate(ssaoOffset, mynoise.x);
214                }
215                else
216                {
217                        offset = ssaoOffset;
218                }
219               
220                // weight with projected coordinate to reach similar kernel size for near and far
221                const float2 texcoord = IN.texCoord.xy + offset * radius;
222
223                const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
224                const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
225               
226
227                ////////////////
228                //-- compute contribution of sample using the direction and angle
229
230                float3 dirSample = samplePos - centerPosition;
231
232                const float minDist = 1e-6f;
233                const float delta = 1e-3f;
234
235                const float lengthToSample = length(dirSample);
236                const float sampleWeight = 1.0f / (lengthToSample + delta);
237
238                dirSample /= max(lengthToSample, minDist); // normalize
239
240
241                // angle between current normal and direction to sample controls AO intensity.
242                const float cosAngle = dot(dirSample, normal);
243
244                // the normal of the current sample
245                const float3 sampleNormal = normalize(tex2Dlod(normalTex, float4(texcoord, 0, 0)).xyz);
246               
247                // angle between current normal and direction to sample controls AO intensity.
248                //const float cosAngle2 = dot(-dirSample, sampleNormal);
249                const float cosAngle2 = .5f + dot(sampleNormal, -normal) * .5f;
250
251                dirSample *= minDist;
252                const float aoContrib = sampleIntensity * sampleWeight;
253
254                //const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
255                //total_ao += max(cosAngle, .0f) * max(cosAngle2, .0f) * aoContrib;
256                total_ao += max(cosAngle, .0f) * cosAngle2 * aoContrib;
257
258                ++ numSamples;
259
260                // check if the samples have been valid in the last frame
261                // only mark sample as invalid if in the last / current frame
262                // they possibly have any influence on the ao
263
264                const float changeFactor = sampleColor.y;
265                const float pixelValid = sampleColor.x;
266
267                // hack:
268                // we check if the sample could have been near enough
269                // to the current pixel or if the angle is small enough
270                // to have any influence in the current or last frame
271#if 1
272                const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
273                const float partlyResetThres = 1.0f;
274
275                if (pixelValid <= partlyResetThres)
276                        validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
277                else
278                        validSamples = max(validSamples, pixelValid);
279#endif
280
281#ifdef USE_GTX
282                // we can bail out early and use a minimal #samples)
283                // if some conditions are met as long as the hardware supports it
284                if (numSamples >= MIN_SAMPLES)
285                {
286                        //break;
287                        // if the pixel belongs to a static object and all the samples stay valid in the current frame
288                        if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
289                        // if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high
290                        // (=> there was no discontinuity recently)
291                        //else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
292                        else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
293                }
294#endif
295        }
296
297        // "normalize" ao contribution
298        total_ao /= numSamples;
299
300#if 1
301        // if surface normal perpenticular to view dir, approx. half of the samples will not count
302        // => compensate for this (on the other hand, projected sampling area could be larger!)
303        const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
304        total_ao *= viewCorrection;
305#endif
306
307        //return float3(total_ao, validSamples, numSamples);
308        return float3(min(1.0f, total_ao), validSamples, numSamples);
309}
310
311
312/** The ssao shader returning the an intensity value between 0 and 1.
313        This version of the ssao shader uses the dotproduct between
314        pixel-to-sample direction and sample normal as weight.
315
316    The algorithm works like the following:
317        1) Check in a circular area around the current position.
318        2) Shoot vectors to the positions there, and check the angle to these positions.
319        3) Summing up these angles gives an estimation of the occlusion at the current position.
320*/
321float3 ssao(fragment IN,
322                        sampler2D colors,
323                        sampler2D noiseTex,
324                        sampler2D samples,
325                        float3 normal,
326                        float3 centerPosition,
327                        float radius,
328                        float3 bl,
329                        float3 br,
330                        float3 tl,
331                        float3 tr,
332                        float3 viewDir,
333                        float convergence,
334                        float sampleIntensity,
335                        bool isMovingObject,
336                        float oldIdx
337                        )
338{
339        float total_ao = .0f;
340        float validSamples = .0f;
341        float numSamples = .0f;
342
343        for (int i = 0; i < NUM_SAMPLES; ++ i)
344        {
345                float2 offset;
346
347                const float2 ssaoOffset =
348                        tex2Dlod(samples, float4((0.5f + i + oldIdx) / NUM_PRECOMPUTED_SAMPLES, 0.5f, .0f, .0f)).xy;
349
350                ////////////////////
351                //-- add random noise: reflect around random normal vector
352                //-- (affects performance for some reason!)
353
354                if (convergence < SSAO_CONVERGENCE_THRESHOLD)
355                {
356                        float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
357                        //offset = myreflect(samples[i], mynoise);
358                        //offset = myrotate(samples[i], mynoise.x);
359                        offset = myrotate(ssaoOffset, mynoise.x);
360                }
361                else
362                {
363                        offset = ssaoOffset;
364                }
365
366
367                // weight with projected coordinate to reach similar kernel size for near and far
368                const float2 texcoord = IN.texCoord.xy + offset * radius;
369
370                const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
371                const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
372               
373
374                ////////////////
375                //-- compute contribution of sample using the direction and angle
376
377                float3 dirSample = samplePos - centerPosition;
378
379                const float minDist = 1e-6f;
380                const float eps = 1e-3f;
381
382                const float lengthToSample = length(dirSample);
383                const float sampleWeight = 1.0f / max(lengthToSample, eps);
384
385                dirSample /= max(length(dirSample), minDist); // normalize
386
387                // angle between current normal and direction to sample controls AO intensity.
388                const float cosAngle = dot(dirSample, normal);
389
390                //const float aoContrib = sampleIntensity / sqrLen;
391                const float aoContrib = sampleIntensity * sampleWeight;
392                //const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
393
394                total_ao += max(cosAngle, .0f) * aoContrib;
395
396                ++ numSamples;
397
398#ifdef PERFORMANCE_TEST
399                // check if the samples have been valid in the last frame
400                // only mark sample as invalid if in the last / current frame
401                // they possibly have any influence on the ao
402
403                const float changeFactor = sampleColor.y;
404                const float pixelValid = sampleColor.x;
405
406                // hack:
407                // we check if the sample could have been near enough to the current pixel
408                // or if the angle is small enough
409                // to have any influence in the current or last frame
410
411#if 1
412                const float partlyResetThres = 1.0f;
413
414                const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
415                if (0)//pixelValid <= partlyResetThres)
416                        validSamples = max(validSamples, pixelValid * (1.0f - tooFarAway) * step(-0.1f, cosAngle));
417                else
418                        validSamples = max(validSamples, pixelValid);
419#endif
420
421#ifdef USE_GTX
422                // we can bail out early and use a minimal #samples)
423                // if some conditions are met as long as the hardware supports it
424                if (numSamples >= MIN_SAMPLES)
425                {
426                        //break;
427                        // if the pixel belongs to a static object and all the samples stay valid in the current frame
428                        if (!isMovingObject && (validSamples < 1.0f) && (convergence > NUM_SAMPLES)) break;
429                        // if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high
430                        // (=> there was no discontinuity recently)
431                        //else if (isMovingObject && (convergence > SSAO_CONVERGENCE_THRESHOLD)) break;
432                        else if (isMovingObject && (convergence > NUM_SAMPLES * 5)) break;
433                }
434#endif
435
436#endif // PERFORMANCE_TEST
437        }
438
439        // "normalize" ao contribution
440        total_ao /= numSamples;
441
442#if 1
443        // if surface normal perpenticular to view dir, approx. half of the samples will not count
444        // => compensate for this (on the other hand, projected sampling area could be larger!)
445        const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
446        total_ao *= viewCorrection;
447#endif
448
449        //return float3(total_ao, validSamples, numSamples);
450        return float3(min(1.0f, total_ao), validSamples, numSamples);
451}
452
453
454
455/** The mrt shader for screen space ambient occlusion
456*/
457pixel2 main(fragment IN,
458                        uniform sampler2D colors,
459                        uniform sampler2D normals,
460                        uniform sampler2D noiseTex,
461                        uniform sampler2D samples,
462                        uniform sampler2D oldTex,
463                        uniform float4x4 modelViewProj,
464                        uniform float4x4 oldModelViewProj,
465                        uniform float temporalCoherence,
466                        uniform float3 bl,
467                        uniform float3 br,
468                        uniform float3 tl,
469                        uniform float3 tr,
470                        uniform float3 oldEyePos,
471                        uniform float3 oldbl,
472                        uniform float3 oldbr,
473                        uniform float3 oldtl,
474                        uniform float3 oldtr,
475                        uniform sampler2D attribsTex,
476                        uniform float kernelRadius,
477                        uniform float sampleIntensity
478                        )
479{
480        pixel2 OUT;
481
482        //const float3 normal = normalize(tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz);
483        const float3 normal = tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz;
484
485        // reconstruct position from the eye space depth
486        const float3 viewDir = IN.view;
487        const float eyeSpaceDepth = tex2Dlod(colors, float4(IN.texCoord, 0, 0)).w;
488        const float4 eyeSpacePos = float4(-viewDir * eyeSpaceDepth, 1.0f);
489
490
491        ////////////////
492        //-- calculcate the current projected posiion (also used for next frame)
493       
494        float4 projPos = mul(modelViewProj, eyeSpacePos);
495        const float invw = 1.0f / projPos.w;
496        projPos *= invw;
497       
498        //const float radiusMult = kernelRadius;
499        //const float radiusMult = 3e-2;
500        const float radiusMult = kernelRadius * invw;
501       
502#ifdef PERFORMANCE_TEST
503
504        float3 diffVec = tex2Dlod(attribsTex, float4(IN.texCoord, .0f, .0f)).xyz;
505
506        const float sqrMoveSpeed = SqrLen(diffVec);
507        const bool isMovingObject = (sqrMoveSpeed > DYNAMIC_OBJECTS_THRESHOLD);
508
509       
510        /////////////////
511        //-- compute temporal reprojection
512
513        float3 temporalVals = Reproject(eyeSpacePos, eyeSpaceDepth, IN.texCoord, oldEyePos,
514                                        oldTex, oldModelViewProj,
515                                                                        colors,
516                                                                        projPos.xyz,
517                                                                        invw,
518                                                                        oldbl, oldbr, oldtl, oldtr,
519                                                                        diffVec
520                                                                        );
521       
522        const float oldSsao = temporalVals.x;
523       
524        //float oldIdx = temporalCoherence > 1 ? temporalVals.y : .0f;
525        float oldIdx = temporalVals.y;
526        //float oldWeight = temporalVals.y;
527        float oldWeight = clamp(oldIdx, 0, temporalCoherence);
528
529#else
530
531        const bool isMovingObject = false;
532        const float oldSsao = .0f;
533       
534        float oldWeight = .0f;
535        float oldIdx = .0f;
536       
537#endif
538
539        float3 ao;
540
541        // cull background note: this should be done with the stencil buffer
542        if (eyeSpaceDepth < DEPTH_THRESHOLD)
543        {
544                if (1)
545                {
546                        ao = ssao(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz,
547                                      radiusMult, bl, br, tl, tr, normalize(viewDir),
548                                          oldWeight, sampleIntensity, isMovingObject, oldIdx);
549                }
550                else
551                {
552                        ao = ssao2(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, radiusMult,
553                                   bl, br, tl, tr, normalize(viewDir), oldWeight, sampleIntensity,
554                                           isMovingObject, normals, oldIdx);
555                }
556        }
557        else
558        {
559                 ao = float3(1.0f, 1.0f, 1.0f);
560        }
561
562
563#ifdef PERFORMANCE_TEST
564
565        ///////////
566        //-- check if we have to reset pixel because one of the sample points was invalid
567        //-- only do this if the current pixel does not belong to a moving object
568
569        // the weight equals the number of sampled shot in this pass
570        const float newWeight = ao.z;
571
572        // completely reset the ao in this pixel
573        const float completelyResetThres = 20.0f;
574        // don't fully reset the ao in this pixel, but give low weight to old solution
575        const float partlyResetThres = 1.0f;
576       
577        // don't check for moving objects, otherwise almost no coherence
578        if (!isMovingObject)
579        {
580                if (ao.y > completelyResetThres)
581                {
582                        oldWeight = .0f;
583                        oldIdx = .0f;
584                }
585                else if (ao.y > partlyResetThres)
586                {
587                        //if (oldIdx > 4.0f * NUM_SAMPLES) oldIdx = 0;
588                        oldWeight = min(oldWeight, 4.0f * NUM_SAMPLES); oldIdx = oldWeight;
589                        //oldWeight = oldIdx;//.0f; oldIdx = .0f;
590                }
591        }
592
593
594        //////////
595        //-- blend ao between old and new samples (and avoid division by zero)
596
597        OUT.illum_col.x = (ao.x * newWeight + oldSsao * oldWeight);
598        OUT.illum_col.x /= (newWeight + oldWeight);
599
600        // the new weight for the next frame
601        const float newIdx = newWeight + oldIdx;
602        //const float combinedWeight = clamp(newIdx, .0f, temporalCoherence);
603        const float combinedWeight = clamp(newIdx, .0f, min(newWeight + oldWeight, temporalCoherence));
604
605        //OUT.illum_col.y = combinedWeight;
606        OUT.illum_col.y = newIdx; // the new index
607        OUT.illum_col.w = eyeSpaceDepth;
608
609        //if (OUT.illum_col.z > 1000) OUT.illum_col.z = 0;
610
611        // this value can be used to check if this pixel belongs to a moving object
612        //OUT.col.x = SqrLen(diffVec);
613        OUT.illum_col.z = SqrLen(diffVec);
614
615#else
616
617        OUT.illum_col.x = ao.x;
618        OUT.illum_col.w = eyeSpaceDepth;
619       
620#endif
621
622        return OUT;
623}
Note: See TracBrowser for help on using the repository browser.