source: GTP/trunk/Lib/Vis/Preprocessing/src/havran/ktbftrav.cpp @ 2583

Revision 2583, 44.8 KB checked in by bittner, 17 years ago (diff)

Havran ray caster functional except enhanced features used by mutation and object based pvs

Line 
1// ===================================================================
2// $Id: $
3//
4// ktbftrav.cpp
5//
6// class: CKTBTraversal
7//
8// REPLACEMENT_STRING
9//
10// Copyright by Vlastimil Havran, 2007 - email to "vhavran AT seznam.cz"
11// Initial coding by Vlasta Havran, February 2007 (copy from kdrtrav.cpp)
12
13// GOLEM headers
14#include "ktbconf.h"
15#include "ktbtrav.h"
16#include "Intersectable.h"
17
18namespace GtpVisibilityPreprocessor {
19
20#ifdef TRV00F
21
22#if 0
23// the depth of traversal when kd-trees are nested. The global (the highest
24// level) kd-tree is at the depth 0.
25int
26CKTBTraversal::traversalDepth = 0;
27
28// sets the stack pointers that are used for the traversal.
29void
30CKTBTraversal::GetStackPointers(struct SStackElem *&entryPointer,
31                                struct SStackElem *&exitPointer)
32{
33  assert(traversalDepth < MAX_NESTING);
34 
35  int index = traversalDepth * MAX_HEIGHT;
36  entryPointer = &(stack[index]);
37  exitPointer = &(stack[index + 1]);
38
39  traversalDepth++; 
40  return;
41}
42
43// sets the stack pointers that are used for the traversal.
44void
45CKTBTraversal::RestoreStackPointers()
46{
47  assert(traversalDepth >= 0);
48  traversalDepth--; 
49  return;
50}
51
52
53// default constructor
54CKTBTraversal::CKTBTraversal()
55{
56  bbox = AxisAlignedBox3(Vector3(MAXFLOAT),
57                         Vector3(-MAXFLOAT));
58  root = 0;
59  epsilon = 0;
60
61#ifdef __TRAVERSAL_STATISTICS
62  _allNodesTraversed = 0L;
63  _fullLeavesTraversed = 0L;
64  _emptyLeavesTraversed = 0L;
65#endif
66}
67 
68// Here we find the node (preferably minbox node) containing
69// the point
70const SKTBNodeT*
71CKTBTraversal::Locate(const Vector3 &position)
72{
73  const SKTBNodeT *current, *next = root;
74  const SKTBNodeT *returnNode = 0;
75
76  do {
77    current = next;
78    switch (GetNodeType(current)) {
79      // -------------------------------------------------
80      case CKTBAxes::EE_X_axis: {
81        if (position[CKTBAxes::EE_X_axis] < current->splitPlane)
82          next =  GetLeft(current);
83        else
84          next = GetRight(current);
85        break;
86      }
87      case CKTBAxes::EE_Y_axis: {
88        if (position[CKTBAxes::EE_Y_axis] < current->splitPlane)
89          next =  GetLeft(current);
90        else
91          next = GetRight(current);
92        break;
93      }
94      case CKTBAxes::EE_Z_axis: {
95        if (position[CKTBAxes::EE_Z_axis] < current->splitPlane)
96          next =  GetLeft(current);
97        else
98          next = GetRight(current);
99        break;
100      }
101      case CKTBAxes::EE_X_axisBox: {
102        if (position[CKTBAxes::EE_X_axis] < current->splitPlane)
103          next =  GetLeft(current);
104        else
105          next = GetRight(current);
106        returnNode = current;
107        break;
108      }
109      case CKTBAxes::EE_Y_axisBox: {
110        if (position[CKTBAxes::EE_Y_axis] < current->splitPlane)
111          next =  GetLeft(current);
112        else
113          next = GetRight(current);
114        returnNode = current;
115        break;
116      }
117      case CKTBAxes::EE_Z_axisBox: {
118        if (position[CKTBAxes::EE_Z_axis] < current->splitPlane)
119          next =  GetLeft(current);
120        else
121          next = GetRight(current);
122        returnNode = current;
123        break;
124      }
125      case CKTBAxes::EE_Leaf: {
126        next = 0; // finishing
127        break;
128      }
129      case CKTBAxes::EE_Link:{
130        next = GetLinkNode(current);
131        // cout << "Link node was accessed" << endl;
132        break;
133      }
134    } // switch
135  }
136  while (next != NULL);
137
138  if (returnNode)
139    return returnNode;
140
141  // return the last (leaf node) visited on the path
142  return current;
143}
144#endif
145 
146 
147#if 1
148int
149CKTBTraversal::FindNearestI(const SimpleRay &ray)
150{
151#if 0
152  static int counter = 0;
153  counter++;
154  bool debug = false;
155  if (counter == 530) {
156    debug = true;
157    cout << "COUNTER = " << counter << endl;
158    cout << "DEBUG starts" << endl;   
159  }
160#endif
161 
162  // passing through parameters
163  float tmin, tmax;
164 
165  // test if the whole CKTB tree is missed by the input ray
166  if ( (!root) ||
167       (!bbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
168       (tmax <= tmin) ||
169       (tmax <= 0.f) ) {
170    SimpleRay::IntersectionRes[0].intersectable = 0;
171    return 0; // no object can be intersected
172  }
173
174//#define _DEBUGKTB
175#ifdef _DEBUGKTB
176  int ib = 0;
177  int depth = 0;
178#endif
179 
180#ifdef __TRAVERSAL_STATISTICS
181  int allNodesTraversed = 0L;
182  int fullLeavesTraversed = 0L;
183  int emptyLeavesTraversed = 0L;
184#endif // __TRAVERSAL_STATISTICS
185
186  Vector3 invertedDir;
187  invertedDir.x = 1.0f / ray.mDirection.x;
188  invertedDir.y = 1.0f / ray.mDirection.y;
189  invertedDir.z = 1.0f / ray.mDirection.z;
190 
191  // start from the root node
192  if (tmin < 0.f)
193    tmin = 0.f;
194
195  int index = 1;
196  stack3[1].nodep = root;
197  stack3[1].tmax = tmax;
198  tmax = tmin;
199  SKTBNodeT * childNodes[2];
200  int RayDirs[3];
201  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
202  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
203  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
204 
205  // we have to check the node
206  // current node is not the leaf, empty leaves are NULL pointers
207  while (index) {
208    register SKTBNodeT *currNode = stack3[index].nodep;
209    tmin = tmax;
210    tmax = stack3[index].tmax;
211#if 0
212    if (debug) {
213      cout << "node = " << (void*)currNode
214           << " tmin = " << tmin << " tmax = " << tmax
215           << endl;
216    }
217#endif
218   
219    assert(tmin <= tmax);
220   
221#ifdef __TRAVERSAL_STATISTICS
222    allNodesTraversed++;
223#endif // __TRAVERSAL_STATISTICS
224    register const int nodeType = GetNodeType(currNode);
225       
226    if (nodeType < CKTBAxes::EE_Leaf) {
227      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
228      tval *= invertedDir[nodeType];
229      SKTBNodeT *near, *far;
230      childNodes[0] = GetLeft(currNode);
231      childNodes[1] = GetRight(currNode);
232      int rayDir = RayDirs[nodeType];
233      near = childNodes[rayDir];
234      far = childNodes[rayDir ^ 0x1];
235     
236      stack3[index].nodep = far;
237      // stack3[index].tmax = tmax; // not necessary, this is already there !
238      // int c = tval < tmax ? 1 : 0;
239      index += tval < tmax ? 1 : 0;
240      stack3[index].nodep = near;
241      stack3[index].tmax = Min(tval, tmax);
242      tmax = tmin;
243      // int d = tval < tmin ? -1 : 0;
244      index += tval < tmin ? -1 : 0;
245    }
246    else {
247      if (nodeType == CKTBAxes::EE_Leaf) {
248        // test objects for intersection
249#ifdef _DEBUGKTB
250        cout << "Leaf " << endl;
251        depth++;
252#endif
253#ifdef _DEBUGKTB
254        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
255              << " extp.t = " << extp->t << endl;
256#endif
257        if (!IsEmptyLeaf_(currNode)) {
258#ifdef _DEBUGKTB
259          cout << "Full leaf at depth= " << depth << endl;
260#endif     
261
262#ifdef __TRAVERSAL_STATISTICS
263          fullLeavesTraversed++;
264#endif // __TRAVERSAL_STATISTICS
265          // test the objects in the full leaf against the ray
266         
267          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
268#if 0
269          // using subroutine
270          if (TestFullLeaf(ray, currNode))
271#else
272          // Avoiding function call by copying the code
273          const ObjectContainer * const list = GetObjList(currNode);
274          int intersected = 0;
275          // iterate the whole list and find out the nearest intersection
276          ObjectContainer::const_iterator sc_end = list->end();
277          for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
278            // if the intersection realy lies in the node       
279            intersected += ((*sc)->CastSimpleRay(ray));
280          } // for all objects
281          if (intersected)
282#endif
283          {
284#ifdef _DEBUGKTB
285            cout << "Full leaf HIT " << endl;
286#endif 
287             
288#ifdef __TRAVERSAL_STATISTICS
289            _allNodesTraversed += allNodesTraversed;
290            _fullLeavesTraversed += fullLeavesTraversed;
291            _emptyLeavesTraversed += emptyLeavesTraversed;
292#endif // __TRAVERSAL_STATISTICS
293           
294            // signed distance should be already set in TestFullLeaf
295            // the first object intersected was found   
296            return 1;
297          }
298        } // full leaf
299#ifdef __TRAVERSAL_STATISTICS
300        else {
301#ifdef _DEBUGKTB
302          cout << "Empty leaf at depth= " << depth << endl;
303#endif     
304          emptyLeavesTraversed++;
305        }
306#endif // __TRAVERSAL_STATISTICS
307
308#ifdef _DEBUGKTB
309        cout << "Pop the node" << endl;
310#endif     
311   
312        // pop farChild from the stack
313        // restore the current values
314        index--;
315        continue;
316      }
317      else {
318        assert(nodeType == CKTBAxes::EE_Link);
319#ifdef _DEBUGKTB
320        cout << "Link " << endl;
321#endif
322        stack3[index].nodep = GetLinkNode(currNode);
323        // cout << "Link node was accessed" << endl;
324        continue;
325      }
326    }
327  } // while current node is not the leaf
328
329#ifdef __TRAVERSAL_STATISTICS
330  _allNodesTraversed += allNodesTraversed;
331  _fullLeavesTraversed += fullLeavesTraversed;
332  _emptyLeavesTraversed += emptyLeavesTraversed;
333#endif // __TRAVERSAL_STATISTICS
334
335  // no objects found along the ray path
336  return 0;
337} // FindNearestI - single ray
338#endif
339
340#if 1
341int
342CKTBTraversal::FindNearestI(const SimpleRay &ray, const AxisAlignedBox3 &localbox)
343{
344#if 0
345  static int counter = 0;
346  counter++;
347  bool debug = false;
348  if (counter == 530) {
349    debug = true;
350    cout << "COUNTER = " << counter << endl;
351    cout << "DEBUG starts" << endl;   
352  }
353#endif
354 
355  // passing through parameters
356  float tmin, tmax;
357 
358  // test if the whole CKTB tree is missed by the input ray
359  if ( (!root) ||
360       (!localbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
361       (tmax <= tmin) ||
362       (tmax <= 0.f) ) {
363    SimpleRay::IntersectionRes[0].intersectable = 0;
364    return 0; // no object can be intersected
365  }
366
367//#define _DEBUGKTB
368#ifdef _DEBUGKTB
369  int ib = 0;
370  int depth = 0;
371#endif
372 
373#ifdef __TRAVERSAL_STATISTICS
374  int allNodesTraversed = 0L;
375  int fullLeavesTraversed = 0L;
376  int emptyLeavesTraversed = 0L;
377#endif // __TRAVERSAL_STATISTICS
378
379  Vector3 invertedDir;
380  invertedDir.x = 1.0f / ray.mDirection.x;
381  invertedDir.y = 1.0f / ray.mDirection.y;
382  invertedDir.z = 1.0f / ray.mDirection.z;
383 
384  // start from the root node
385  if (tmin < 0.f)
386    tmin = 0.f;
387
388  int index = 1;
389  stack3[1].nodep = root;
390  stack3[1].tmax = tmax;
391  tmax = tmin;
392  SKTBNodeT * childNodes[2];
393  int RayDirs[3];
394  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
395  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
396  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
397 
398  // we have to check the node
399  // current node is not the leaf, empty leaves are NULL pointers
400  while (index) {
401    register SKTBNodeT *currNode = stack3[index].nodep;
402    tmin = tmax;
403    tmax = stack3[index].tmax;
404#if 0
405    if (debug) {
406      cout << "node = " << (void*)currNode
407           << " tmin = " << tmin << " tmax = " << tmax
408           << endl;
409    }
410#endif
411   
412    assert(tmin <= tmax);
413   
414#ifdef __TRAVERSAL_STATISTICS
415    allNodesTraversed++;
416#endif // __TRAVERSAL_STATISTICS
417    register const int nodeType = GetNodeType(currNode);
418    if (nodeType < CKTBAxes::EE_Leaf) {
419      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
420      tval *= invertedDir[nodeType];
421      SKTBNodeT *near, *far;
422      childNodes[0] = GetLeft(currNode);
423      childNodes[1] = GetRight(currNode);
424      int rayDir = RayDirs[nodeType];
425      near = childNodes[rayDir];
426      far = childNodes[rayDir ^ 0x1];
427     
428      stack3[index].nodep = far;
429      // stack3[index].tmax = tmax; // not necessary, this is already there !
430      // int c = tval < tmax ? 1 : 0;
431      index += tval < tmax ? 1 : 0;
432      stack3[index].nodep = near;
433      stack3[index].tmax = Min(tval, tmax);
434      tmax = tmin;
435      // int d = tval < tmin ? -1 : 0;
436      index += tval < tmin ? -1 : 0;
437    }
438    else {
439      if (nodeType == CKTBAxes::EE_Leaf) {
440        // test objects for intersection
441#ifdef _DEBUGKTB
442        cout << "Leaf " << endl;
443        depth++;
444#endif
445#ifdef _DEBUGKTB
446        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
447              << " extp.t = " << extp->t << endl;
448#endif
449        if (!IsEmptyLeaf_(currNode)) {
450#ifdef _DEBUGKTB
451          cout << "Full leaf at depth= " << depth << endl;
452#endif     
453
454#ifdef __TRAVERSAL_STATISTICS
455          fullLeavesTraversed++;
456#endif // __TRAVERSAL_STATISTICS
457          // test the objects in the full leaf against the ray
458         
459          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
460#if 0
461          // using subroutine
462          if (TestFullLeaf(ray, currNode))
463#else
464          // Avoiding function call by copying the code
465          const ObjectContainer * const list = GetObjList(currNode);
466          int intersected = 0;
467          // iterate the whole list and find out the nearest intersection
468          ObjectContainer::const_iterator sc_end = list->end();
469          for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
470            // if the intersection realy lies in the node       
471            intersected += ((*sc)->CastSimpleRay(ray));
472          } // for all objects
473          if (intersected)
474#endif
475          {
476#ifdef _DEBUGKTB
477            cout << "Full leaf HIT " << endl;
478#endif 
479             
480#ifdef __TRAVERSAL_STATISTICS
481            _allNodesTraversed += allNodesTraversed;
482            _fullLeavesTraversed += fullLeavesTraversed;
483            _emptyLeavesTraversed += emptyLeavesTraversed;
484#endif // __TRAVERSAL_STATISTICS
485           
486            // signed distance should be already set in TestFullLeaf
487            // the first object intersected was found   
488            return 1;
489          }
490        } // full leaf
491#ifdef __TRAVERSAL_STATISTICS
492        else {
493#ifdef _DEBUGKTB
494          cout << "Empty leaf at depth= " << depth << endl;
495#endif     
496          emptyLeavesTraversed++;
497        }
498#endif // __TRAVERSAL_STATISTICS
499
500#ifdef _DEBUGKTB
501        cout << "Pop the node" << endl;
502#endif     
503   
504        // pop farChild from the stack
505        // restore the current values
506        index--;
507        continue;
508      }
509      else {
510        assert(nodeType == CKTBAxes::EE_Link);
511#ifdef _DEBUGKTB
512        cout << "Link " << endl;
513#endif
514        stack3[index].nodep = GetLinkNode(currNode);
515        // cout << "Link node was accessed" << endl;
516        continue;
517      }
518    }
519  } // while current node is not the leaf
520
521#ifdef __TRAVERSAL_STATISTICS
522  _allNodesTraversed += allNodesTraversed;
523  _fullLeavesTraversed += fullLeavesTraversed;
524  _emptyLeavesTraversed += emptyLeavesTraversed;
525#endif // __TRAVERSAL_STATISTICS
526
527  // no objects found along the ray path
528  return 0;
529} // FindNearestI - single ray
530#endif
531 
532
533// ---------------------------------------------------------------------------
534// This is an attempt using SSE instructions - not successfull 31/12/2007
535#if 0
536int
537CKTBTraversal::FindNearestI(const SimpleRay &ray)
538{
539#if 0
540  static int counter = 0;
541  counter++;
542  bool debug = false;
543  if (counter == 530) {
544    debug = true;
545    cout << "COUNTER = " << counter << endl;
546    cout << "DEBUG starts" << endl;   
547  }
548#endif
549 
550  // passing through parameters
551  float tmin, tmax;
552 
553  // test if the whole CKTB tree is missed by the input ray
554  if ( (!root) ||
555       (!bbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
556       (tmax <= tmin) ||
557       (tmax <= 0.f) ) {
558    SimpleRay::IntersectionRes[0].intersectable = 0;
559    return 0; // no object can be intersected
560  }
561
562//#define _DEBUGKTB
563#ifdef _DEBUGKTB
564  int ib = 0;
565  int depth = 0;
566#endif
567 
568#ifdef __TRAVERSAL_STATISTICS
569  int allNodesTraversed = 0L;
570  int fullLeavesTraversed = 0L;
571  int emptyLeavesTraversed = 0L;
572#endif // __TRAVERSAL_STATISTICS
573
574  Vector3 invertedDir;
575  invertedDir.x = 1.0f / ray.mDirection.x;
576  invertedDir.y = 1.0f / ray.mDirection.y;
577  invertedDir.z = 1.0f / ray.mDirection.z;
578 
579  // start from the root node
580  if (tmin < 0.f)
581    tmin = 0.f;
582
583  int index = 1;
584  stack3[1].nodep = root;
585  stack3[1].tmax = tmax;
586  tmax = tmin;
587  SKTBNodeT * childNodes[2];
588  int RayDirs[3];
589  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
590  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
591  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
592 
593  // we have to check the node
594  // current node is not the leaf, empty leaves are NULL pointers
595  while (index) {
596    SKTBNodeT *currNode = stack3[index].nodep;
597    tmin = tmax;
598    tmax = stack3[index].tmax;
599#if 0
600    if (debug) {
601      cout << "node = " << (void*)currNode
602           << " tmin = " << tmin << " tmax = " << tmax
603           << endl;
604    }
605#endif
606   
607    assert(tmin <= tmax);
608   
609#ifdef __TRAVERSAL_STATISTICS
610    allNodesTraversed++;
611#endif // __TRAVERSAL_STATISTICS
612    const unsigned int nodeType = GetNodeType(currNode);
613    if (nodeType < CKTBAxes::EE_Leaf) {
614      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
615      tval *= invertedDir[nodeType];
616      SKTBNodeT *near, *far;
617      childNodes[0] = GetLeft(currNode);
618      childNodes[1] = GetRight(currNode);
619      int rayDir = RayDirs[nodeType];
620      near = childNodes[rayDir];
621      far = childNodes[rayDir ^ 0x1];
622      // This code is slower than above !!!!!
623      stack3[index].nodep = far;
624      // stack3[index].tmax = tmax; // this is already there, not necessary!
625      __m128 tsim = _mm_set_ss(tval);
626      // index += tval < tmax ? 1 : 0;
627      __m128 tother = _mm_set_ss(tmax);
628      index += _mm_ucomilt_ss(tsim, tother);
629      stack3[index].nodep = near;
630      // stack3[index].tmax = Min(tval, tmax);
631      _mm_store_ss(&(stack3[index].tmax), _mm_min_ps(tsim, tother));
632      tmax = tmin;
633      // index += tval < tmin ? -1 : 0;
634      __m128 tother2 = _mm_set_ss(tmin);
635      index -= _mm_ucomilt_ss(tsim, tother2);
636    }
637    else {
638      if (nodeType == CKTBAxes::EE_Leaf) {
639        // test objects for intersection
640#ifdef _DEBUGKTB
641        cout << "Leaf " << endl;
642        depth++;
643#endif
644#ifdef _DEBUGKTB
645        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
646              << " extp.t = " << extp->t << endl;
647#endif
648        if (!IsEmptyLeaf_(currNode)) {
649#ifdef _DEBUGKTB
650          cout << "Full leaf at depth= " << depth << endl;
651#endif     
652
653#ifdef __TRAVERSAL_STATISTICS
654          fullLeavesTraversed++;
655#endif // __TRAVERSAL_STATISTICS
656          // test the objects in the full leaf against the ray
657         
658          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
659          if (TestFullLeaf(ray, currNode)) {
660#ifdef _DEBUGKTB
661            cout << "Full leaf HIT " << endl;
662#endif 
663             
664#ifdef __TRAVERSAL_STATISTICS
665            _allNodesTraversed += allNodesTraversed;
666            _fullLeavesTraversed += fullLeavesTraversed;
667            _emptyLeavesTraversed += emptyLeavesTraversed;
668#endif // __TRAVERSAL_STATISTICS
669           
670            // signed distance should be already set in TestFullLeaf
671            // the first object intersected was found   
672            return 1;
673          }
674        } // full leaf
675#ifdef __TRAVERSAL_STATISTICS
676        else {
677#ifdef _DEBUGKTB
678          cout << "Empty leaf at depth= " << depth << endl;
679#endif     
680          emptyLeavesTraversed++;
681        }
682#endif // __TRAVERSAL_STATISTICS
683
684#ifdef _DEBUGKTB
685        cout << "Pop the node" << endl;
686#endif     
687   
688        // pop farChild from the stack
689        // restore the current values
690        index--;
691        continue;
692      }
693      else {
694        assert(nodeType == CKTBAxes::EE_Link);
695#ifdef _DEBUGKTB
696        cout << "Link " << endl;
697#endif
698        stack3[index].nodep = GetLinkNode(currNode);
699        // cout << "Link node was accessed" << endl;
700        continue;
701      }
702    }
703  } // while current node is not the leaf
704
705#ifdef __TRAVERSAL_STATISTICS
706  _allNodesTraversed += allNodesTraversed;
707  _fullLeavesTraversed += fullLeavesTraversed;
708  _emptyLeavesTraversed += emptyLeavesTraversed;
709#endif // __TRAVERSAL_STATISTICS
710
711  // no objects found along the ray path
712  return 0;
713} // FindNearestI - single ray
714#endif
715
716#if 0
717// Reasonably fast - about 101,500 rays per second for single dir!
718// It allows fast switching context from one ray to the next ray so it it
719// virtually independent of memory latency !
720int
721CKTBTraversal::FindNearestI_16oneDir(SimpleRayContainer &rays, int offset)
722{
723  // passing through parameters
724  int cntRays = 0;
725  if (!root)
726    return 0;
727
728  // The auxiliary variables to be precomputed
729  static float invertedDir[cntMaxRays * 4];
730  static float rayOrig[cntMaxRays * 4];
731  static int rayDirs[cntMaxRays * 4];
732  static int indexRay[cntMaxRays];
733  static int indexArray[cntMaxRays];
734  static int indexStack[cntMaxRays];
735  const int LOG2_MAX_HEIGHT = 5;
736  const int MAX_HEIGHT = 1 << LOG2_MAX_HEIGHT;
737  assert(MAX_HEIGHT == 32);
738  static struct SStackElem3 stackA[cntMaxRays * MAX_HEIGHT];
739  static float tmaxArray[cntMaxRays];
740  int cntHits = 0;
741 
742  float tmin, tmax;
743  for (int i = 0; i < cntMaxRays; i++) {
744  // test if the whole CKTB tree is missed by the input ray
745    if ((!bbox.ComputeMinMaxT(rays[i+offset].mOrigin,
746                              rays[i+offset].mDirection,
747                              &tmin, &tmax)) ||
748        (tmax <= tmin) ||
749        (tmax <= 0.f) ) {
750      SimpleRay::IntersectionRes[i].intersectable = 0;
751    }
752    else {
753      int indexR = (cntRays << 2);
754      rayOrig[indexR + 0] = rays[i+offset].mOrigin.x;
755      rayOrig[indexR + 1] = rays[i+offset].mOrigin.y;
756      rayOrig[indexR + 2] = rays[i+offset].mOrigin.z;
757      //rayOrig[indexR + 3] = 0.f;
758      invertedDir[indexR + 0] = 1.0f / (rays[i+offset].mDirection.x);
759      invertedDir[indexR + 1] = 1.0f / (rays[i+offset].mDirection.y);
760      invertedDir[indexR + 2] = 1.0f / (rays[i+offset].mDirection.z);
761      //invertedDir[indexR + 2] = 0.f;
762      rayDirs[indexR + 0] = rays[i+offset].mDirection.x < 0.f ? 1 : 0;
763      rayDirs[indexR + 1] = rays[i+offset].mDirection.y < 0.f ? 1 : 0;
764      rayDirs[indexR + 2] = rays[i+offset].mDirection.z < 0.f ? 1 : 0;
765      //rayDirs[indexR + 3] = 0;
766      indexRay[cntRays] = i; // the index to the ray
767      indexArray[cntRays] = cntRays; // the index to the array
768      int indexS = (cntRays << LOG2_MAX_HEIGHT) + 1;
769      indexStack[cntRays] = indexS; // the index in the stack
770      stackA[indexS].nodep = root; // we start from the root
771      stackA[indexS].tmax = tmax; // maximum distance
772      if (tmin < 0.f) tmin = 0.f;
773      tmaxArray[cntRays] = tmin;
774      cntRays++;
775    }
776  }
777
778//#define _DEBUGKTB
779#ifdef _DEBUGKTB
780  int ib = 0;
781  int depth = 0;
782#endif
783 
784#ifdef __TRAVERSAL_STATISTICS
785  int allNodesTraversed = 0L;
786  int fullLeavesTraversed = 0L;
787  int emptyLeavesTraversed = 0L;
788#endif // __TRAVERSAL_STATISTICS
789 
790  SKTBNodeT * childNodes[2];
791
792#define PREF_DEFAULT _MM_HINT_T0
793 
794  // we have to check the node
795  // current node is not the leaf, empty leaves are NULL pointers
796  while (cntRays) {
797    // we assume that all the nodes are interior nodes
798    for (int i = 0; i < cntRays; i++) {
799      // which indices to array should be used
800      int indexA = indexArray[i];
801      float tmin = tmaxArray[indexA];
802
803      // the stack indexing is here
804      int indexSA = indexStack[indexA];
805      SKTBNodeT *currNode = stackA[indexSA].nodep;
806     
807#if 0
808      if (debug) {
809        cout << "node = " << (void*)currNode
810             << " tmin = " << tmin << " tmax = " << tmax
811             << endl;
812      }
813#endif
814
815#if 0
816      if (tmin > tmax) {
817        cout << "PROBLEM tmin = " << tmin << " tmax = " << tmax;
818        cout << endl;
819      }
820#endif     
821      assert(tmin <= tmax);
822   
823#ifdef __TRAVERSAL_STATISTICS
824      allNodesTraversed++;
825#endif // __TRAVERSAL_STATISTICS
826      const unsigned int nodeType = (GetNodeType(currNode)) & 0x7;
827      if (nodeType < CKTBAxes::EE_Leaf) {
828        int indexRayOD = (indexA << 2) + nodeType;
829        float tval = (GetSplitValue(currNode) - rayOrig[indexRayOD])
830          * invertedDir[indexRayOD];
831        SKTBNodeT *near, *far;
832        childNodes[0] = GetLeft(currNode);
833        childNodes[1] = GetRight(currNode);
834        int rayDir = rayDirs[indexRayOD];
835        near = childNodes[rayDir];
836        far = childNodes[rayDir ^ 0x1];
837       
838        stackA[indexSA].nodep = far; // store far node
839        //stackA[indexSA].tmax = tmax; // with correct dist - the same as before
840        float tmax = tmaxArray[indexA] = stackA[indexSA].tmax;
841        int c = tval < tmax ? 1 : 0;
842        indexSA += c;
843        stackA[indexSA].nodep = near; // store near node
844        stackA[indexSA].tmax = Min(tval, tmax); // with correct dist
845        int d = tval < tmin ? 1 : 0;
846        indexSA -= d;
847        // This is prefetching - so the next time we have it
848        // in the cache !
849        GPREFETCH(stackA[indexSA].nodep, PREF_DEFAULT);
850        // store tmax and index to the stack
851        indexStack[indexA] = indexSA;
852        tmaxArray[indexA] = tmin;       
853      }
854      else {
855        if (nodeType == CKTBAxes::EE_Leaf) {
856          // test objects for intersection
857#ifdef _DEBUGKTB
858          cout << "Leaf " << endl;
859          depth++;
860#endif
861#ifdef _DEBUGKTB
862          DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
863                << " extp.t = " << extp->t << endl;
864#endif
865          float tmax = tmaxArray[indexA] = stackA[indexSA].tmax;
866          if (!IsEmptyLeaf_(currNode)) {
867#ifdef _DEBUGKTB
868            cout << "Full leaf at depth= " << depth << endl;
869#endif     
870
871#ifdef __TRAVERSAL_STATISTICS
872            fullLeavesTraversed++;
873#endif // __TRAVERSAL_STATISTICS
874          // test the objects in the full leaf against the ray
875         
876            // which ray is processed
877            int indexR = indexRay[indexA];
878            SimpleRay::IntersectionRes[indexR].maxt = tmax;
879            if (TestFullLeaf(rays[indexR+offset], currNode, indexR)) {
880
881              // we remove the ray from the calculation
882              indexArray[i] = indexArray[cntRays-1];
883              cntRays--; // we decrease the number of rays
884              cntHits++;
885#ifdef _DEBUGKTB
886              cout << "Full leaf HIT " << endl;
887#endif 
888             
889#ifdef __TRAVERSAL_STATISTICS
890              _allNodesTraversed += allNodesTraversed;
891              _fullLeavesTraversed += fullLeavesTraversed;
892              _emptyLeavesTraversed += emptyLeavesTraversed;
893#endif // __TRAVERSAL_STATISTICS           
894           
895              // signed distance should be already set in TestFullLeaf
896              // the first object intersected was found
897              continue;
898            }
899          } // full leaf
900#ifdef __TRAVERSAL_STATISTICS
901          else {
902#ifdef _DEBUGKTB
903            cout << "Empty leaf at depth= " << depth << endl;
904#endif     
905            emptyLeavesTraversed++;
906          }
907#endif // __TRAVERSAL_STATISTICS
908
909#ifdef _DEBUGKTB
910          cout << "Pop the node" << endl;
911#endif     
912   
913          // pop farChild from the stack
914          // restore the current values
915          --indexSA;
916          // This is bits 0,1,2,3,4,5 - the stack depth = 32 !
917          if ( (indexSA & 0x1f) == 0) {
918            // we remove the ray from the calculation
919            indexArray[i] = indexArray[cntRays-1];
920            cntRays--; // we decrease the number of rays
921          }
922          else {
923            indexStack[indexA] = indexSA;
924            // we prefetch the data to be accessible
925            // the next time
926            GPREFETCH(stackA[indexSA].nodep, PREF_DEFAULT);
927          }
928          continue;
929        }
930        else {
931          assert(nodeType == CKTBAxes::EE_Link);
932#ifdef _DEBUGKTB
933          cout << "Link " << endl;
934#endif
935          stackA[indexSA].nodep = GetLinkNode(currNode);
936          // cout << "Link node was accessed" << endl;
937          continue;
938        }
939      } // empty leaf or link
940    } // for all active rays
941  } // while cntRays
942
943#ifdef __TRAVERSAL_STATISTICS
944  _allNodesTraversed += allNodesTraversed;
945  _fullLeavesTraversed += fullLeavesTraversed;
946  _emptyLeavesTraversed += emptyLeavesTraversed;
947#endif // __TRAVERSAL_STATISTICS
948
949  // no objects found along the ray path
950  return cntHits;
951}
952#endif
953
954#ifdef __SSE__
955
956#if 1
957// Even faster - about 125,500 rays per second for single dir and 164 rps
958// for double dir !
959int
960CKTBTraversal::FindNearestI_16oneDir(SimpleRayContainer &rays, int offset)
961{
962  static RayPacket2x2 raypack;
963  struct SResultI {
964    Intersectable *intersectable;
965    float          tdist;   
966  };
967  static SResultI results[16];
968   
969  for (int i = 0; i < 4; i++) {
970    int k = i * 4 + offset;
971    for (int j = 0; j < 4; j++, k++) {
972      raypack.SetLoc(j, rays[k].mOrigin);
973      raypack.SetDir(j, rays[k].mDirection);     
974    }
975    // Here either use ray packet traversal or
976    // casting individual rays
977    FindNearestI(raypack);
978    k = i * 4;
979    for (int j = 0; j < 4; j++, k++) {
980      results[k].intersectable = raypack.GetObject(j);
981      results[k].tdist = raypack.GetT(j);
982    } // for j
983  } // for i
984
985  // Copy the results to the output array
986  for (int i = 0; i < 16; i++) {
987    SimpleRay::IntersectionRes[i].intersectable =
988      results[i].intersectable;
989    SimpleRay::IntersectionRes[i].tdist =
990      results[i].tdist;
991  } // for i
992  return 0;
993}
994#endif
995 
996#if 0
997// This code works well 1/1/2008 - 11:00
998// The same operations for packets of rays for the same signs,
999// otherwise it is emulated by decomposition
1000// of a packet to individual rays and traced individually.
1001void
1002CKTBTraversal::FindNearestI(RayPacket2x2 &rp)
1003{
1004  int m1 = _mm_movemask_ps(rp.dx4);
1005  if ((m1 == 0)||(m1 == 15)) {
1006  m1 = _mm_movemask_ps(rp.dy4);
1007  if ((m1 == 0)||(m1 == 15)) {
1008  m1 = _mm_movemask_ps(rp.dz4);
1009  if ((m1 == 0)||(m1 == 15)) {
1010    rp.Init();
1011    // all the signs for 4 rays are the same, use
1012    // ray packet traversal
1013    // Compute min and max distances
1014    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1015    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1016    SimpleRay sray[4];
1017    int maxIntersections = 4;
1018    GALIGN16 union { int inters[4]; __m128 inters_4; };
1019    inters[0] = inters[1] = inters[2] = inters[3] = 1;
1020    unsigned int inters32 = 0xf;
1021    for (int i = 0; i < 4; i++) {
1022      bbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1023      if ( (tmin4[i] >= tmax4[i]) ||
1024           (tmax4[i] < 0.f) ) {
1025        inters[i] = 0; // finished
1026        inters32 &= ~(1 << i); // bit zero when ray is invalid
1027        maxIntersections--;
1028      }     
1029      if (tmin4[i] < 0.f)
1030        tmin4[i] = 0.f;
1031      sray[i].mOrigin = rp.GetLoc(i);
1032      sray[i].mDirection = rp.GetDir(i);
1033    } // for i
1034    if (maxIntersections == 0)
1035      return;
1036
1037    SKTBNodeT * childNodes[2];
1038    int RayDirs[3];
1039    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1040    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1041    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1042    //int activeMask=_mm_movemask_ps(_mm_cmplt_ps( tmin_4, tmax_4 ))&inters32;
1043    int activeMask = inters32;
1044    int indexStack = 0;
1045    SKTBNodeT *currNode = root;
1046    unsigned int k = GetNodeType(currNode);
1047    for (;;) {     
1048      while (k < CKTBAxes::EE_Leaf) {
1049        // the 3 operations below can be brought down to 3 simple float
1050        // calculations by precomputing min/max of the inverse dir
1051        const __m128 node_split = _mm_set_ps1(GetSplitValue(currNode));
1052        const __m128 t4 =
1053          _mm_mul_ps(_mm_sub_ps(node_split, rp.orig[k]), rp.idir[k]);
1054        childNodes[0] = GetLeft(currNode);
1055        childNodes[1] = GetRight(currNode);
1056        int rayDir = RayDirs[k];
1057        SKTBNodeT *far = childNodes[rayDir];
1058        if (!(_mm_movemask_ps(_mm_cmpgt_ps(t4, tmin_4)) & activeMask))
1059        {
1060          currNode = far;
1061          k = GetNodeType(currNode);
1062          continue;
1063        }
1064        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1065        k = GetNodeType(currNode);     
1066        if (! (_mm_movemask_ps(_mm_cmplt_ps( t4, tmax_4)) & activeMask))
1067          continue;
1068
1069        // pop far node to the stack
1070        stack4[indexStack].nodep = far;
1071        stack4[indexStack].tmax_4 = tmax_4;
1072        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1073        // stack4[indexStack].mask = activeMask;
1074        indexStack++;
1075       
1076        tmax_4 = _mm_min_ps(t4, tmax_4);
1077        activeMask &= _mm_movemask_ps(_mm_cmplt_ps( tmin_4, tmax_4 ));
1078      } // while this is an interior node
1079
1080      // either a leaf or a link
1081      if (k == CKTBAxes::EE_Leaf) {
1082        // test objects for intersection
1083        if (!IsEmptyLeaf_(currNode)) {
1084          // cout << "Full leaf" << endl;
1085         
1086          // test the objects in the full leaf against the ray   
1087          for (int i = 0; i < 4; i++) {
1088            if (inters[i] ) {
1089              // no intersection so far !
1090              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1091              // Test only rays that were not finished
1092              if (TestFullLeaf(sray[i], currNode, i)) {
1093                // intersection for this ray found
1094                inters[i] = 0;
1095                inters32 &= ~(1 << i);
1096                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1097                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);           
1098                // signed distance should be already set in TestFullLeaf
1099                // the first object intersected was found
1100                if (--maxIntersections == 0)
1101                  return;
1102              }
1103            } // if this ray did not hit the triangle so far
1104          } // for all 4 rays
1105        } // full leaf
1106        // pop farChild from the stack
1107        // restore the current values
1108        // update the minimum distance since we traverse to the next one
1109
1110        if (indexStack == 0)
1111          return;
1112        indexStack--;
1113        currNode = stack4[indexStack].nodep;
1114        k = GetNodeType(currNode);
1115        tmin_4 = stack4[indexStack].tmin_4;
1116        tmax_4 = stack4[indexStack].tmax_4;
1117        activeMask = _mm_movemask_ps(_mm_cmple_ps( tmin_4, tmax_4 )) & inters32;
1118        continue;
1119      }
1120      // cout << "Link node was accessed" << endl;
1121      assert(k == CKTBAxes::EE_Link);
1122      currNode = GetLinkNode(currNode);
1123      k = GetNodeType(currNode);
1124    } // for
1125    return;
1126  }}}
1127
1128  // Trace ray by ray
1129  SimpleRay ray;
1130  for (int i = 0; i < 4; i++) {
1131    ray.mOrigin = rp.GetLoc(i);
1132    ray.mDirection = rp.GetDir(i);
1133    FindNearestI(ray);
1134    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1135    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1136    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1137  } // for
1138}
1139#endif
1140
1141
1142#if 1
1143// This code also works well 1/1/2008 - 14:00
1144// Using mask of 128-bits width - the code works as well, only a bit
1145// faster than the code above
1146void
1147CKTBTraversal::FindNearestI(RayPacket2x2 &rp)
1148{
1149  int m1 = _mm_movemask_ps(rp.dx4);
1150  if ((m1 == 0)||(m1 == 15)) {
1151  m1 = _mm_movemask_ps(rp.dy4);
1152  if ((m1 == 0)||(m1 == 15)) {
1153  m1 = _mm_movemask_ps(rp.dz4);
1154  if ((m1 == 0)||(m1 == 15)) {
1155    rp.Init();
1156    // all the signs for 4 rays are the same, use
1157    // ray packet traversal
1158    // Compute min and max distances
1159    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1160    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1161    GALIGN16 union { float activeMask[4]; __m128 activeMask_4; };
1162    GALIGN16 union { float liveMask[4]; __m128 liveMask_4; };
1163    liveMask[0] = liveMask[1] = liveMask[2] = liveMask[3] = 0xffffffff;
1164
1165    GALIGN16 SimpleRay sray[4];
1166    int maxIntersections = 4;
1167    // unsigned int inters32 = 0xf;
1168    for (int i = 0; i < 4; i++) {
1169      rp.SetObject(i, 0);
1170      bbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1171      if ( (tmin4[i] >= tmax4[i]) ||
1172           (tmax4[i] < 0.f) ) {
1173        liveMask[i] = 0; // finished
1174        // inters32 &= ~(1 << i); // bit zero when ray is invalid
1175        maxIntersections--;
1176      }
1177      if (tmin4[i] < 0.f)
1178        tmin4[i] = 0.f;
1179      sray[i].mOrigin = rp.GetLoc(i);
1180      sray[i].mDirection = rp.GetDir(i);
1181    } // for i
1182    if (maxIntersections == 0)
1183      return;
1184
1185    // This is the mask 128 bits witdth
1186    //activeMask_4 =
1187    //  _mm_and_ps(_mm_cmple_ps(tmin_4, tmax_4),
1188    //           _mm_cmplt_ps(tmax_4, _mm_setzero_ps()));
1189    activeMask_4 = liveMask_4;
1190
1191    SKTBNodeT * childNodes[2];
1192    int RayDirs[4];
1193    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1194    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1195    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1196    int indexStack = 0;
1197    SKTBNodeT *currNode = root;
1198    unsigned int k = GetNodeType(currNode);
1199    for (;;) {
1200      // traverse until we find a leaf
1201      while (k < CKTBAxes::EE_Leaf) {
1202        // the 3 operations below can be brought down to 3 simple float
1203        // calculations by precomputing min/max of the inverse dir
1204        // const __m128 node_split = ;
1205        const __m128 t4 =
1206          _mm_mul_ps(_mm_sub_ps(_mm_set_ps1(GetSplitValue(currNode)),
1207                                rp.orig[k]), rp.idir[k]);
1208        childNodes[0] = GetLeft(currNode);
1209        childNodes[1] = GetRight(currNode);
1210        int rayDir = RayDirs[k];
1211        SKTBNodeT *far = childNodes[rayDir];
1212        if (_mm_movemask_ps(_mm_and_ps(_mm_cmpge_ps(t4, tmin_4),
1213                                       activeMask_4))) {         
1214          currNode = far;
1215          k = GetNodeType(currNode);
1216          continue;
1217        }
1218
1219        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1220        k = GetNodeType(currNode);     
1221        if (_mm_movemask_ps(_mm_and_ps(_mm_cmple_ps(t4, tmax_4),
1222                                       activeMask_4)))
1223          continue;
1224
1225        // pop far node to the stack
1226        stack4[indexStack].nodep = far;
1227        stack4[indexStack].tmax_4 = tmax_4;
1228
1229// Uncomenting this macro is unsafe!
1230// Not convinced if for packet of 4 rays we can say that since when
1231// one ray is different than the others, it could bring to wrong state
1232// It is surely true for one ray when tmin < t < tmax, but for a packet
1233// of rays this condition can be true only for a single ray
1234//   tmin4 = max(t4, tmin4) = min(t4, tmax4)
1235//#define _NOT_STORE_MINT
1236
1237#ifdef _NOT_STORE_MINT 
1238#else
1239        // store mint onto the stack
1240        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1241#endif 
1242        // stack4[indexStack].mask = activeMask;
1243        indexStack++;
1244       
1245        tmax_4 = _mm_min_ps(t4, tmax_4);
1246        activeMask_4 = _mm_cmplt_ps( tmin_4, tmax_4 );
1247      } // while this is an interior node
1248
1249      // either a leaf or a link
1250      if (k == CKTBAxes::EE_Leaf) {
1251        // test objects for intersection
1252        if (!IsEmptyLeaf_(currNode)) {
1253          // cout << "Full leaf" << endl;
1254         
1255          // test the objects in the full leaf against the ray   
1256          for (int i = 0; i < 4; i++) {
1257            if (liveMask[i] ) {
1258              // no intersection so far !
1259              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1260#if 0
1261              // Using subroutine
1262              // Test only rays that were not finished
1263              if (TestFullLeaf(sray[i], currNode, i))
1264#else
1265              // avoiding one call
1266              const ObjectContainer * const list = GetObjList(currNode);
1267              int intersected = 0;
1268              // iterate the whole list and find out the nearest intersection
1269              ObjectContainer::const_iterator sc_end = list->end();
1270              for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
1271                // if the intersection realy lies in the node       
1272                intersected |= ((*sc)->CastSimpleRay(sray[i], i));
1273              } // for all objects
1274              if (intersected)
1275#endif
1276              {
1277                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1278                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1279                // signed distance should be already set in TestFullLeaf
1280                // the first object intersected was found
1281                if (--maxIntersections == 0)
1282                  return;
1283                // inters32 &= ~(1 << i);
1284                liveMask[i] = 0;
1285              }
1286            } // if this ray did not hit the triangle so far
1287          } // for all 4 rays
1288        } // full leaf
1289
1290        // pop farChild from the stack
1291        // restore the current values
1292        // update the minimum distance since we traverse to the next one
1293        do {
1294          if (indexStack == 0)
1295            return;
1296          indexStack--;
1297          currNode = stack4[indexStack].nodep;
1298          k = GetNodeType(currNode);
1299#ifdef _NOT_STORE_MINT
1300          // this is an attempt !
1301          tmin_4 = tmax_4;
1302#else
1303          // This surrely works
1304          tmin_4 = stack4[indexStack].tmin_4;
1305#endif   
1306          tmax_4 = stack4[indexStack].tmax_4;
1307          activeMask_4 = _mm_and_ps(_mm_cmple_ps( tmin_4, tmax_4 ), liveMask_4);
1308        }
1309        while (_mm_movemask_ps(activeMask_4) == 0);
1310      }
1311      else {
1312        // cout << "Link node was accessed" << endl;
1313        assert(k == CKTBAxes::EE_Link);
1314        currNode = GetLinkNode(currNode);
1315        k = GetNodeType(currNode);
1316      }
1317    } // for(;;)
1318    return;
1319  }}}
1320
1321  // Trace ray by ray
1322  SimpleRay ray;
1323  for (int i = 0; i < 4; i++) {
1324    ray.mOrigin = rp.GetLoc(i);
1325    ray.mDirection = rp.GetDir(i);
1326    FindNearestI(ray);
1327    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1328    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1329    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1330  } // for
1331}
1332#endif
1333
1334#if 1
1335// This code also works well 1/1/2008 - 14:00
1336// Using mask of 128-bits width - the code works as well, only a bit
1337// faster than the code above
1338void
1339CKTBTraversal::FindNearestI(RayPacket2x2 &rp, Vector3 &boxmin, Vector3 &boxmax)
1340{
1341
1342  static AxisAlignedBox3 localbox;
1343  localbox.SetMin(boxmin);
1344  localbox.SetMax(boxmax);
1345
1346#define USE_SIMPLE_VERSION 1
1347
1348#if !USE_SIMPLE_VERSION
1349  int m1 = _mm_movemask_ps(rp.dx4);
1350  if ((m1 == 0)||(m1 == 15)) {
1351  m1 = _mm_movemask_ps(rp.dy4);
1352  if ((m1 == 0)||(m1 == 15)) {
1353  m1 = _mm_movemask_ps(rp.dz4);
1354  if ((m1 == 0)||(m1 == 15)) {
1355    rp.Init();
1356   
1357    // all the signs for 4 rays are the same, use
1358    // ray packet traversal
1359    // Compute min and max distances
1360    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1361    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1362    GALIGN16 union { float activeMask[4]; __m128 activeMask_4; };
1363    GALIGN16 union { float liveMask[4]; __m128 liveMask_4; };
1364    liveMask[0] = liveMask[1] = liveMask[2] = liveMask[3] = 0xffffffff;
1365
1366    GALIGN16 SimpleRay sray[4];
1367    int maxIntersections = 4;
1368    // unsigned int inters32 = 0xf;
1369    for (int i = 0; i < 4; i++) {
1370      rp.SetObject(i, 0);
1371      localbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1372      if ( (tmin4[i] >= tmax4[i]) ||
1373           (tmax4[i] < 0.f) ) {
1374        liveMask[i] = 0; // finished
1375        // inters32 &= ~(1 << i); // bit zero when ray is invalid
1376        maxIntersections--;
1377      }
1378      if (tmin4[i] < 0.f)
1379        tmin4[i] = 0.f;
1380      sray[i].mOrigin = rp.GetLoc(i);
1381      sray[i].mDirection = rp.GetDir(i);
1382    } // for i
1383    if (maxIntersections == 0)
1384      return;
1385
1386    // This is the mask 128 bits witdth
1387    //activeMask_4 =
1388    //  _mm_and_ps(_mm_cmple_ps(tmin_4, tmax_4),
1389    //           _mm_cmplt_ps(tmax_4, _mm_setzero_ps()));
1390    activeMask_4 = liveMask_4;
1391
1392    SKTBNodeT * childNodes[2];
1393    int RayDirs[4];
1394    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1395    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1396    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1397    int indexStack = 0;
1398    SKTBNodeT *currNode = root;
1399    unsigned int k = GetNodeType(currNode);
1400    for (;;) {
1401      // traverse until we find a leaf
1402      while (k < CKTBAxes::EE_Leaf) {
1403        // the 3 operations below can be brought down to 3 simple float
1404        // calculations by precomputing min/max of the inverse dir
1405        // const __m128 node_split = ;
1406        const __m128 t4 =
1407          _mm_mul_ps(_mm_sub_ps(_mm_set_ps1(GetSplitValue(currNode)),
1408                                rp.orig[k]), rp.idir[k]);
1409        childNodes[0] = GetLeft(currNode);
1410        childNodes[1] = GetRight(currNode);
1411        int rayDir = RayDirs[k];
1412        SKTBNodeT *far = childNodes[rayDir];
1413        if (_mm_movemask_ps(_mm_and_ps(_mm_cmpge_ps(t4, tmin_4),
1414                                       activeMask_4))) {         
1415          currNode = far;
1416          k = GetNodeType(currNode);
1417          continue;
1418        }
1419
1420        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1421        k = GetNodeType(currNode);     
1422        if (_mm_movemask_ps(_mm_and_ps(_mm_cmple_ps(t4, tmax_4),
1423                                       activeMask_4)))
1424          continue;
1425
1426        // pop far node to the stack
1427        stack4[indexStack].nodep = far;
1428        stack4[indexStack].tmax_4 = tmax_4;
1429
1430// Uncomenting this macro is unsafe!
1431// Not convinced if for packet of 4 rays we can say that since when
1432// one ray is different than the others, it could bring to wrong state
1433// It is surely true for one ray when tmin < t < tmax, but for a packet
1434// of rays this condition can be true only for a single ray
1435//   tmin4 = max(t4, tmin4) = min(t4, tmax4)
1436//#define _NOT_STORE_MINT
1437
1438#ifdef _NOT_STORE_MINT 
1439#else
1440        // store mint onto the stack
1441        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1442#endif 
1443        // stack4[indexStack].mask = activeMask;
1444        indexStack++;
1445       
1446        tmax_4 = _mm_min_ps(t4, tmax_4);
1447        activeMask_4 = _mm_cmplt_ps( tmin_4, tmax_4 );
1448      } // while this is an interior node
1449
1450      // either a leaf or a link
1451      if (k == CKTBAxes::EE_Leaf) {
1452        // test objects for intersection
1453        if (!IsEmptyLeaf_(currNode)) {
1454          // cout << "Full leaf" << endl;
1455         
1456          // test the objects in the full leaf against the ray   
1457          for (int i = 0; i < 4; i++) {
1458            if (liveMask[i] ) {
1459              // no intersection so far !
1460              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1461#if 0
1462              // Using subroutine
1463              // Test only rays that were not finished
1464              if (TestFullLeaf(sray[i], currNode, i))
1465#else
1466              // avoiding one call
1467              const ObjectContainer * const list = GetObjList(currNode);
1468              int intersected = 0;
1469              // iterate the whole list and find out the nearest intersection
1470              ObjectContainer::const_iterator sc_end = list->end();
1471              for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
1472                // if the intersection realy lies in the node       
1473                intersected |= ((*sc)->CastSimpleRay(sray[i], i));
1474              } // for all objects
1475              if (intersected)
1476#endif
1477              {
1478                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1479                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1480                // signed distance should be already set in TestFullLeaf
1481                // the first object intersected was found
1482                if (--maxIntersections == 0)
1483                  return;
1484                // inters32 &= ~(1 << i);
1485                liveMask[i] = 0;
1486              }
1487            } // if this ray did not hit the triangle so far
1488          } // for all 4 rays
1489        } // full leaf
1490
1491        // pop farChild from the stack
1492        // restore the current values
1493        // update the minimum distance since we traverse to the next one
1494        do {
1495          if (indexStack == 0)
1496            return;
1497          indexStack--;
1498          currNode = stack4[indexStack].nodep;
1499          k = GetNodeType(currNode);
1500#ifdef _NOT_STORE_MINT
1501          // this is an attempt !
1502          tmin_4 = tmax_4;
1503#else
1504          // This surrely works
1505          tmin_4 = stack4[indexStack].tmin_4;
1506#endif   
1507          tmax_4 = stack4[indexStack].tmax_4;
1508          activeMask_4 = _mm_and_ps(_mm_cmple_ps( tmin_4, tmax_4 ), liveMask_4);
1509        }
1510        while (_mm_movemask_ps(activeMask_4) == 0);
1511      }
1512      else {
1513        // cout << "Link node was accessed" << endl;
1514        assert(k == CKTBAxes::EE_Link);
1515        currNode = GetLinkNode(currNode);
1516        k = GetNodeType(currNode);
1517      }
1518    } // for(;;)
1519    return;
1520  }}}
1521#endif
1522  // Trace ray by ray
1523  SimpleRay ray;
1524  for (int i = 0; i < 4; i++) {
1525    ray.mOrigin = rp.GetLoc(i);
1526    ray.mDirection = rp.GetDir(i);
1527    FindNearestI(ray, localbox);
1528    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1529    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1530    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1531  } // for
1532}
1533#endif
1534
1535#endif // __SSE__
1536
1537#endif //  TRV00F
1538
1539} // namespace
1540
Note: See TracBrowser for help on using the repository browser.