source: GTP/trunk/App/Demos/Vis/FriendlyCulling/src/shaders/ssao.cg @ 3221

Revision 3221, 13.8 KB checked in by mattausch, 16 years ago (diff)

added some statistics stuff, but ssao slower than before (probably because made ssao width and intensity variable and not hardcoded anymore) from >180 frames to < 130 frames!!!

RevLine 
[2884]1#include "../shaderenv.h"
2
[3144]3
[2881]4////////////////////
5// Screen Spaced Ambient Occlusion shader
6// based on shader of Alexander Kusternig
7
[3144]8
[3106]9#define USE_EYESPACE_DEPTH 1
[3105]10
11
[2881]12struct fragment
13{
[2889]14        float2 texCoord: TEXCOORD0;
15        float3 view: TEXCOORD1;
[2881]16};
17
18
19struct pixel
20{
21        float4 illum_col: COLOR0;
22};
23
24
[3081]25inline float occlusionPower(float radius, float dist)
26{
27        return 6.283185307179586476925286766559f * (1.0f - cos(asin(radius / dist)));
28}
29
30
[3159]31inline float SqrLen(float3 v)
32{
33        return v.x * v.x + v.y * v.y + v.z * v.z;
34}
35
36
[2990]37inline float2 myreflect(float2 pt, float2 n)
[2881]38{
39        // distance to plane
40        float d = dot(n, pt);
41        // reflect around plane
42        float2 rpt = pt - d * 2.0f * n;
[2886]43
[2881]44        return rpt;
45}
46
47
[2990]48inline float3 Interpol(float2 w, float3 bl, float3 br, float3 tl, float3 tr)
[2986]49{
[2991]50        float3 x1 = lerp(bl, tl, w.y);
51        float3 x2 = lerp(br, tr, w.y);
52        float3 v = lerp(x1, x2, w.x);
[2987]53
54        return v;
55}
56
[2988]57
[2992]58// reconstruct world space position
[3155]59inline float3 ReconstructSamplePos(float eyeSpaceDepth,
[3017]60                                                                   float2 texcoord,
61                                                                   float3 bl, float3 br, float3 tl, float3 tr)
[2988]62{
[3097]63        float3 viewVec = Interpol(texcoord, bl, br, tl, tr);
[3017]64        float3 samplePos = -viewVec * eyeSpaceDepth;
65
[2999]66        return samplePos;
[2988]67}
68
69
[3087]70
[3115]71/** This shader computes the reprojection and stores
[3155]72        the ssao value of the old pixel as well as the
73        weight of the pixel in the new frame.
[3082]74*/
[3137]75inline float2 temporalSmoothing(float4 worldPos,
[3095]76                                                                float eyeSpaceDepth,
77                                                                float2 texcoord0,
78                                                                float3 oldEyePos,
[3113]79                                                                sampler2D oldTex,
80                                                                float4x4 oldModelViewProj,
81                                                                sampler2D colors,
[3112]82                                                                float3 projPos,
[3109]83                                                                float invW,
[3113]84                                                                float3 oldbl,
85                                                                float3 oldbr,
86                                                                float3 oldtl,
87                                                                float3 oldtr,
[3192]88                                                                float3 diffVec
[3109]89                                                                )
[3082]90{
[3113]91        // compute position from old frame for dynamic objects + translational portion
[3133]92        const float3 translatedPos = diffVec - oldEyePos + worldPos.xyz;
[3111]93
[3082]94
[3109]95        /////////////////
96        //-- reproject into old frame and calculate texture position of sample in old frame
97
98        // note: the old model view matrix only holds the view orientation part
[3115]99        float4 backProjPos = mul(oldModelViewProj, float4(translatedPos, 1.0f));
[3083]100        backProjPos /= backProjPos.w;
[3109]101       
[3082]102        // fit from unit cube into 0 .. 1
[3085]103        const float2 oldTexCoords = backProjPos.xy * 0.5f + 0.5f;
[3082]104        // retrieve the sample from the last frame
[3095]105        const float4 oldPixel = tex2Dlod(oldTex, float4(oldTexCoords, .0f, .0f));
[3105]106
[3204]107        // the ssao value in the old frame
108        const float ssao = oldPixel.x;
109
[3095]110        // calculate eye space position of sample in old frame
111        const float oldEyeSpaceDepth = oldPixel.w;
[3082]112
[3095]113        // vector from eye pos to old sample
[3097]114        const float3 viewVec = Interpol(oldTexCoords, oldbl, oldbr, oldtl, oldtr);
[3109]115        const float invLen = 1.0f / length(viewVec);
[3115]116        const float projectedEyeSpaceDepth = invLen * length(translatedPos);
[3137]117        //const float projectedEyeSpaceDepth = length(translatedPos);
[3099]118       
[3109]119        const float depthDif = abs(1.0f - oldEyeSpaceDepth / projectedEyeSpaceDepth);
[3106]120
[3204]121        const float xOffs = 1.0f / 1024.0f;
122        const float yOffs = 1.0f / 768.0f;
123        const float eps = 1e-6f;
[3089]124
[3204]125        // the weight of the old value
126        float w;
127
128        //////////////
129        //-- reuse old value only if it was still valid in the old frame
130
[3192]131        if (1
[3204]132                && (oldTexCoords.x + eps >= xOffs) && (oldTexCoords.x <= 1.0f - xOffs + eps)
133                && (oldTexCoords.y + eps >= yOffs) && (oldTexCoords.y <= 1.0f - yOffs + eps)
[3103]134                && (depthDif <= MIN_DEPTH_DIFF)
[3082]135                )
136        {
[3204]137                // pixel valid => retrieve the convergence weight
[3205]138                w = oldPixel.y;
[3082]139        }
140        else
141        {       
[3204]142                w = 0.0f;
[3082]143        }
[3087]144
[3204]145        return float2(ssao, w);
[3082]146}
147
148
[2881]149/** The ssao shader returning the an intensity value between 0 and 1
[3151]150        This version of the ssao shader uses the dotproduct between pixel and
151        sample normal as weight.
[2881]152*/
[3193]153float3 ssao2(fragment IN,
[3150]154                         sampler2D colors,
155                         sampler2D noiseTex,
156                         float2 samples[NUM_SAMPLES],
157                         float3 normal,
158                         float3 centerPosition,
159                         float scaleFactor,
160                         float3 bl,
161                         float3 br,
162                         float3 tl,
163                         float3 tr,
164                         float3 viewDir,
[3212]165                         sampler2D normalTex,
166                         float sampleIntensity
[3150]167                         )
168{
169        float total_ao = .0f;
170        float numSamples = .0f;
[3203]171        float validSamples = .0f;
[3150]172
173        for (int i = 0; i < NUM_SAMPLES; ++ i)
174        {
175                const float2 offset = samples[i];
176
177#if 1
178                ////////////////////
179                //-- add random noise: reflect around random normal vector (rather slow!)
180
181                const float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
182                const float2 offsetTransformed = myreflect(offset, mynoise);
183#else
184                const float2 offsetTransformed = offset;
185#endif
186                // weight with projected coordinate to reach similar kernel size for near and far
187                //const float2 texcoord = IN.texCoord.xy + offsetTransformed * scaleFactor + jitter;
188                const float2 texcoord = IN.texCoord.xy + offsetTransformed * scaleFactor;
189
190                //if ((texcoord.x <= 1.0f) && (texcoord.x >= 0.0f) && (texcoord.y <= 1.0f) && (texcoord.y >= 0.0f)) ++ numSamples;
[3155]191                float4 sampleColor = tex2Dlod(colors, float4(texcoord, 0, 0));
192
193                const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
[3159]194                // the normal of the current sample
[3167]195                const float3 sampleNormal = tex2Dlod(normalTex, float4(texcoord, 0, 0)).xyz;
[3150]196
197
198                ////////////////
199                //-- compute contribution of sample using the direction and angle
200
201                float3 dirSample = samplePos - centerPosition;
202
[3199]203                const float sqrLen = max(SqrLen(dirSample), 1e-2f);
204                const float lengthToSample = sqrt(sqrLen);
205                //const float lengthToSample = max(length(dirSample), 1e-6f);
206
[3150]207                dirSample /= lengthToSample; // normalize
208
209                // angle between current normal and direction to sample controls AO intensity.
[3151]210                float cosAngle = .5f + dot(sampleNormal, -normal) * 0.5f;
[3155]211                // use binary decision to cull samples that are behind current shading point
212                cosAngle *= step(0.0f, dot(dirSample, normal));
[3150]213       
[3212]214                const float aoContrib = sampleIntensity / sqrLen;
[3150]215                //const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
216
217#if 1
218                // if surface normal perpenticular to view dir, approx. half of the samples will not count
219                // => compensate for this (on the other hand, projected sampling area could be larger!)
220
221                const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
222                total_ao += cosAngle * aoContrib * viewCorrection;
223#else
224                total_ao += cosAngle * aoContrib;
225#endif
[3157]226                // check if the samples have been valid in the last frame
[3203]227                validSamples += (1.0f - step(1.0f, lengthToSample)) * sampleColor.x;
228
[3193]229                ++ numSamples;
[3150]230        }
231
[3193]232        total_ao /= numSamples;
233
[3203]234        return float3(max(0.0f, 1.0f - total_ao), validSamples, numSamples);
[3150]235}
236
237
[3151]238/** The ssao shader returning the an intensity value between 0 and 1.
239        This version of the ssao shader uses the dotproduct between
240        pixel-to-sample direction and sample normal as weight.
[3204]241
242    The algorithm works like the following:
243        1) Check in a circular area around the current position.
244        2) Shoot vectors to the positions there, and check the angle to these positions.
245        3) Summing up these angles gives an estimation of the occlusion at the current position.
[3150]246*/
[3192]247float3 ssao(fragment IN,
[3117]248                        sampler2D colors,
249                        sampler2D noiseTex,
250                        float2 samples[NUM_SAMPLES],
251                        float3 normal,
252                        float3 centerPosition,
253                        float scaleFactor,
254                        float3 bl,
255                        float3 br,
256                        float3 tl,
257                        float3 tr,
[3192]258                        float3 viewDir,
[3208]259                        float newWeight,
[3213]260                        float sampleIntensity,
261                        bool isMovingObject
[3083]262                        )
[2881]263{
[3084]264        float total_ao = .0f;
[3192]265        float validSamples = .0f;
[3084]266        float numSamples = .0f;
[2881]267
268        for (int i = 0; i < NUM_SAMPLES; ++ i)
269        {
[2892]270                const float2 offset = samples[i];
[2881]271
[3175]272#if 1
[2881]273                ////////////////////
[3204]274                //-- add random noise: reflect around random normal vector
275                //-- (slows down the computation for some reason!)
[2985]276
[3150]277                float2 mynoise = tex2Dlod(noiseTex, float4(IN.texCoord * 4.0f, 0, 0)).xy;
[2892]278                const float2 offsetTransformed = myreflect(offset, mynoise);
[2903]279#else
280                const float2 offsetTransformed = offset;
281#endif
[2881]282                // weight with projected coordinate to reach similar kernel size for near and far
[3019]283                const float2 texcoord = IN.texCoord.xy + offsetTransformed * scaleFactor;
[2881]284
[3203]285                const float4 sampleColor = tex2Dlod(colors, float4(texcoord, .0f, .0f));
[3155]286                const float3 samplePos = ReconstructSamplePos(sampleColor.w, texcoord, bl, br, tl, tr);
[3150]287               
[2989]288
[3017]289                ////////////////
290                //-- compute contribution of sample using the direction and angle
[2881]291
[3017]292                float3 dirSample = samplePos - centerPosition;
[2999]293
[3198]294                const float sqrLen = max(SqrLen(dirSample), 1e-2f);
[3197]295                const float lengthToSample = sqrt(sqrLen);
296
[3095]297                dirSample /= lengthToSample; // normalize
298
[2885]299                // angle between current normal and direction to sample controls AO intensity.
[3151]300                const float cosAngle = max(dot(dirSample, normal), .0f);
[3212]301                const float aoContrib = sampleIntensity / sqrLen;
[3089]302                //const float aoContrib = (1.0f > lengthToSample) ? occlusionPower(9e-2f, DISTANCE_SCALE + lengthToSample): .0f;
[2881]303
[3017]304#if 1
[2885]305                // if surface normal perpenticular to view dir, approx. half of the samples will not count
306                // => compensate for this (on the other hand, projected sampling area could be larger!)
[3095]307
[3103]308                const float viewCorrection = 1.0f + VIEW_CORRECTION_SCALE * max(dot(viewDir, normal), 0.0f);
[3081]309                total_ao += cosAngle * aoContrib * viewCorrection;
[3017]310#else
[3098]311                total_ao += cosAngle * aoContrib;
[2911]312#endif
[3157]313
[3206]314                ++ numSamples;
[3213]315
[3157]316                // check if the samples have been valid in the last frame
[3213]317                // only mark sample as invalid if in the last / current frame
318                // they possibly have any influence on the ao
[3206]319                const float changeFactor = sampleColor.y;
320                const float pixelValid = sampleColor.x;
[3204]321
[3213]322                // we check if the sample could have been near enough to the current pixel
323                // to have any influence in the current or last frame
[3206]324                const float tooFarAway = step(0.5f, lengthToSample - changeFactor);
325                validSamples = max(validSamples, (1.0f - tooFarAway) * pixelValid);
[3192]326
[3221]327#ifdef QWQ//USE_GTX
[3213]328                // we can bail out early and use a minimal #samples)
329                // if some conditions are met as long as the hardware supports it
330                if (numSamples >= 8)
331                {
332                        // if the pixel belongs to a static object and all the samples stay valid in the current frame
333                        if (!isMovingObject && (validSamples < 1.0f)) break;
334                        // if the pixel belongs to a dynamic object but the #accumulated samples for this pixel is sufficiently high
335                        // (=> there was no discontinuity recently)
336                        else if (isMovingObject && (newWeight > NUM_SAMPLES * 5)) break;
337                }
338#endif
339
[2881]340        }
341
[3213]342        // scale ao contribution
[3192]343        total_ao /= numSamples;
344
[3213]345        return float3(total_ao, validSamples, numSamples);
[2881]346}
347
[3121]348
[3150]349
[2881]350/** The mrt shader for screen space ambient occlusion
351*/
352pixel main(fragment IN,
353                   uniform sampler2D colors,
354                   uniform sampler2D normals,
[3084]355                   uniform sampler2D noiseTex,
[2881]356                   uniform float2 samples[NUM_SAMPLES],
357                   uniform sampler2D oldTex,
[3085]358                   uniform float4x4 modelViewProj,
359                   uniform float4x4 oldModelViewProj,
[2985]360                   uniform float temporalCoherence,
[2986]361                   uniform float3 bl,
362                   uniform float3 br,
363                   uniform float3 tl,
[3085]364                   uniform float3 tr,
365                   uniform float3 oldEyePos,
366                   uniform float3 oldbl,
367                   uniform float3 oldbr,
368                   uniform float3 oldtl,
[3109]369                   uniform float3 oldtr,
[3212]370                   uniform sampler2D attribsTex,
371                   uniform float kernelRadius,
372                   uniform float sampleIntensity
[2881]373                   )
374{
375        pixel OUT;
376
[3167]377        //const float3 normal = normalize(tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz);
378        const float3 normal = tex2Dlod(normals, float4(IN.texCoord, 0 ,0)).xyz;
[2975]379
[3082]380        // reconstruct position from the eye space depth
[3097]381        const float3 viewDir = IN.view;
[3089]382        const float eyeSpaceDepth = tex2Dlod(colors, float4(IN.texCoord, 0, 0)).w;
[3097]383        const float4 eyeSpacePos = float4(-viewDir * eyeSpaceDepth, 1.0f);
[3014]384
[3121]385        float3 diffVec = tex2Dlod(attribsTex, float4(IN.texCoord, 0, 0)).xyz;
386       
[3001]387
[3017]388        ////////////////
[3080]389        //-- calculcate the current projected posiion (also used for next frame)
[3017]390       
[3094]391        float4 projPos = mul(modelViewProj, eyeSpacePos);
[3112]392        const float invw = 1.0f / projPos.w;
393        projPos *= invw;
[3212]394        float scaleFactor = kernelRadius * invw;
[3121]395
[3213]396        const float sqrMoveSpeed = SqrLen(diffVec);
397        const bool isMovingObject = (sqrMoveSpeed > DYNAMIC_OBJECTS_THRESHOLD);
398
[3017]399       
[3121]400        /////////////////
401        //-- compute temporal reprojection
402
[3137]403        float2 temporalVals = temporalSmoothing(eyeSpacePos, eyeSpaceDepth, IN.texCoord, oldEyePos,
[3192]404                                                oldTex, oldModelViewProj,
[3121]405                                                                                        colors,
406                                                                                        projPos.xyz,
407                                                                                        invw,
408                                                                                        oldbl, oldbr, oldtl, oldtr,
[3192]409                                                                                        diffVec
[3129]410                                                                                        );
[3121]411
412        const float oldSsao = temporalVals.x;
[3192]413        float oldWeight = temporalVals.y;
414       
415        float3 ao;
[3137]416
[3192]417        // cull background note: this should be done with the stencil buffer
418        if (eyeSpaceDepth < 1e10f)
419        {
[3213]420                ao = ssao(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, scaleFactor, bl, br, tl, tr, normalize(viewDir), oldWeight, sampleIntensity, isMovingObject);
[3212]421                //ao = ssao2(IN, colors, noiseTex, samples, normal, eyeSpacePos.xyz, scaleFactor, bl, br, tl, tr, normalize(viewDir), normals, sampleIntensity);
[3192]422        }
423        else
424        {
[3198]425                 ao = float3(1.0f, 1.0f, 1.0f);
[3192]426        }
[3122]427
[3213]428       
429        ///////////
430        //-- check if we have to reset pixel because one of the sample points was invalid
431        //-- only do this if the current pixel does not belong to a moving object
[3205]432
[3213]433        // the weight equals the number of sampled shot in this pass
434        const float newWeight = ao.z;
435
436        const float completelyResetThres = 4.0f;
437        const float partlyResetThres = 1.0f;
438       
439        if (!isMovingObject)
[3206]440        {
[3213]441                if (ao.y > completelyResetThres)
442                        oldWeight = .0f;
443                else if (ao.y > partlyResetThres)
444                        oldWeight = min(oldWeight, 4.0f * newWeight);
[3206]445        }
446
[3213]447        // the new weight for the next frame
[3206]448        const float combinedWeight = clamp(newWeight + oldWeight, .0f, temporalCoherence);
[3192]449
[3213]450        //////////
451        //-- blend ao between old and new samples (and avoid division by zero)
[3206]452        OUT.illum_col.x = (ao.x * newWeight + oldSsao * oldWeight) / max(1e-6f, newWeight + oldWeight);
[3213]453
[3192]454        OUT.illum_col.z = SqrLen(diffVec);
[3206]455        OUT.illum_col.y = combinedWeight;
[3137]456        OUT.illum_col.w = eyeSpaceDepth;
[3120]457
[2881]458        return OUT;
[3104]459}
Note: See TracBrowser for help on using the repository browser.