source: GTP/trunk/Lib/Vis/Preprocessing/src/havran/ktbftrav.cpp @ 2582

Revision 2582, 44.8 KB checked in by bittner, 16 years ago (diff)

Havran Ray Caster compiles and links, but still does not work

Line 
1// ===================================================================
2// $Id: $
3//
4// ktbftrav.cpp
5//
6// class: CKTBTraversal
7//
8// REPLACEMENT_STRING
9//
10// Copyright by Vlastimil Havran, 2007 - email to "vhavran AT seznam.cz"
11// Initial coding by Vlasta Havran, February 2007 (copy from kdrtrav.cpp)
12
13// GOLEM headers
14#include "ktbconf.h"
15#include "ktbtrav.h"
16#include "Intersectable.h"
17
18namespace GtpVisibilityPreprocessor {
19
20#ifdef TRV00F
21
22#if 0
23// the depth of traversal when kd-trees are nested. The global (the highest
24// level) kd-tree is at the depth 0.
25int
26CKTBTraversal::traversalDepth = 0;
27
28// sets the stack pointers that are used for the traversal.
29void
30CKTBTraversal::GetStackPointers(struct SStackElem *&entryPointer,
31                                struct SStackElem *&exitPointer)
32{
33  assert(traversalDepth < MAX_NESTING);
34 
35  int index = traversalDepth * MAX_HEIGHT;
36  entryPointer = &(stack[index]);
37  exitPointer = &(stack[index + 1]);
38
39  traversalDepth++; 
40  return;
41}
42
43// sets the stack pointers that are used for the traversal.
44void
45CKTBTraversal::RestoreStackPointers()
46{
47  assert(traversalDepth >= 0);
48  traversalDepth--; 
49  return;
50}
51
52
53// default constructor
54CKTBTraversal::CKTBTraversal()
55{
56  bbox = AxisAlignedBox3(Vector3(MAXFLOAT),
57                         Vector3(-MAXFLOAT));
58  root = 0;
59  epsilon = 0;
60
61#ifdef __TRAVERSAL_STATISTICS
62  _allNodesTraversed = 0L;
63  _fullLeavesTraversed = 0L;
64  _emptyLeavesTraversed = 0L;
65#endif
66}
67 
68// Here we find the node (preferably minbox node) containing
69// the point
70const SKTBNodeT*
71CKTBTraversal::Locate(const Vector3 &position)
72{
73  const SKTBNodeT *current, *next = root;
74  const SKTBNodeT *returnNode = 0;
75
76  do {
77    current = next;
78    switch (GetNodeType(current)) {
79      // -------------------------------------------------
80      case CKTBAxes::EE_X_axis: {
81        if (position[CKTBAxes::EE_X_axis] < current->splitPlane)
82          next =  GetLeft(current);
83        else
84          next = GetRight(current);
85        break;
86      }
87      case CKTBAxes::EE_Y_axis: {
88        if (position[CKTBAxes::EE_Y_axis] < current->splitPlane)
89          next =  GetLeft(current);
90        else
91          next = GetRight(current);
92        break;
93      }
94      case CKTBAxes::EE_Z_axis: {
95        if (position[CKTBAxes::EE_Z_axis] < current->splitPlane)
96          next =  GetLeft(current);
97        else
98          next = GetRight(current);
99        break;
100      }
101      case CKTBAxes::EE_X_axisBox: {
102        if (position[CKTBAxes::EE_X_axis] < current->splitPlane)
103          next =  GetLeft(current);
104        else
105          next = GetRight(current);
106        returnNode = current;
107        break;
108      }
109      case CKTBAxes::EE_Y_axisBox: {
110        if (position[CKTBAxes::EE_Y_axis] < current->splitPlane)
111          next =  GetLeft(current);
112        else
113          next = GetRight(current);
114        returnNode = current;
115        break;
116      }
117      case CKTBAxes::EE_Z_axisBox: {
118        if (position[CKTBAxes::EE_Z_axis] < current->splitPlane)
119          next =  GetLeft(current);
120        else
121          next = GetRight(current);
122        returnNode = current;
123        break;
124      }
125      case CKTBAxes::EE_Leaf: {
126        next = 0; // finishing
127        break;
128      }
129      case CKTBAxes::EE_Link:{
130        next = GetLinkNode(current);
131        // cout << "Link node was accessed" << endl;
132        break;
133      }
134    } // switch
135  }
136  while (next != NULL);
137
138  if (returnNode)
139    return returnNode;
140
141  // return the last (leaf node) visited on the path
142  return current;
143}
144#endif
145 
146 
147#if 1
148int
149CKTBTraversal::FindNearestI(const SimpleRay &ray)
150{
151#if 0
152  static int counter = 0;
153  counter++;
154  bool debug = false;
155  if (counter == 530) {
156    debug = true;
157    cout << "COUNTER = " << counter << endl;
158    cout << "DEBUG starts" << endl;   
159  }
160#endif
161 
162  // passing through parameters
163  float tmin, tmax;
164 
165  // test if the whole CKTB tree is missed by the input ray
166  if ( (!root) ||
167       (!bbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
168       (tmax <= tmin) ||
169       (tmax <= 0.f) ) {
170    SimpleRay::IntersectionRes[0].intersectable = 0;
171    return 0; // no object can be intersected
172  }
173
174//#define _DEBUGKTB
175#ifdef _DEBUGKTB
176  int ib = 0;
177  int depth = 0;
178#endif
179 
180#ifdef __TRAVERSAL_STATISTICS
181  int allNodesTraversed = 0L;
182  int fullLeavesTraversed = 0L;
183  int emptyLeavesTraversed = 0L;
184#endif // __TRAVERSAL_STATISTICS
185
186  Vector3 invertedDir;
187  invertedDir.x = 1.0f / ray.mDirection.x;
188  invertedDir.y = 1.0f / ray.mDirection.y;
189  invertedDir.z = 1.0f / ray.mDirection.z;
190 
191  // start from the root node
192  if (tmin < 0.f)
193    tmin = 0.f;
194
195  int index = 1;
196  stack3[1].nodep = root;
197  stack3[1].tmax = tmax;
198  tmax = tmin;
199  SKTBNodeT * childNodes[2];
200  int RayDirs[3];
201  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
202  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
203  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
204 
205  // we have to check the node
206  // current node is not the leaf, empty leaves are NULL pointers
207  while (index) {
208    register SKTBNodeT *currNode = stack3[index].nodep;
209    tmin = tmax;
210    tmax = stack3[index].tmax;
211#if 0
212    if (debug) {
213      cout << "node = " << (void*)currNode
214           << " tmin = " << tmin << " tmax = " << tmax
215           << endl;
216    }
217#endif
218   
219    assert(tmin <= tmax);
220   
221#ifdef __TRAVERSAL_STATISTICS
222    allNodesTraversed++;
223#endif // __TRAVERSAL_STATISTICS
224    register const int nodeType = GetNodeType(currNode);
225       
226    if (nodeType < CKTBAxes::EE_Leaf) {
227      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
228      tval *= invertedDir[nodeType];
229      SKTBNodeT *near, *far;
230      childNodes[0] = GetLeft(currNode);
231      childNodes[1] = GetRight(currNode);
232      int rayDir = RayDirs[nodeType];
233      near = childNodes[rayDir];
234      far = childNodes[rayDir ^ 0x1];
235     
236      stack3[index].nodep = far;
237      // stack3[index].tmax = tmax; // not necessary, this is already there !
238      // int c = tval < tmax ? 1 : 0;
239      index += tval < tmax ? 1 : 0;
240      stack3[index].nodep = near;
241      stack3[index].tmax = Min(tval, tmax);
242      tmax = tmin;
243      // int d = tval < tmin ? -1 : 0;
244      index += tval < tmin ? -1 : 0;
245    }
246    else {
247      if (nodeType == CKTBAxes::EE_Leaf) {
248        // test objects for intersection
249#ifdef _DEBUGKTB
250        cout << "Leaf " << endl;
251        depth++;
252#endif
253#ifdef _DEBUGKTB
254        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
255              << " extp.t = " << extp->t << endl;
256#endif
257        if (!IsEmptyLeaf_(currNode)) {
258#ifdef _DEBUGKTB
259          cout << "Full leaf at depth= " << depth << endl;
260#endif     
261
262#ifdef __TRAVERSAL_STATISTICS
263          fullLeavesTraversed++;
264#endif // __TRAVERSAL_STATISTICS
265          // test the objects in the full leaf against the ray
266         
267          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
268#if 0
269          // using subroutine
270          if (TestFullLeaf(ray, currNode))
271#else
272          // Avoiding function call by copying the code
273          const ObjectContainer * const list = GetObjList(currNode);
274          int intersected = 0;
275          // iterate the whole list and find out the nearest intersection
276          ObjectContainer::const_iterator sc_end = list->end();
277          for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
278            // if the intersection realy lies in the node       
279            intersected += ((*sc)->CastSimpleRay(ray));
280          } // for all objects
281          if (intersected)
282#endif
283          {
284#ifdef _DEBUGKTB
285            cout << "Full leaf HIT " << endl;
286#endif 
287             
288#ifdef __TRAVERSAL_STATISTICS
289            _allNodesTraversed += allNodesTraversed;
290            _fullLeavesTraversed += fullLeavesTraversed;
291            _emptyLeavesTraversed += emptyLeavesTraversed;
292#endif // __TRAVERSAL_STATISTICS
293           
294            // signed distance should be already set in TestFullLeaf
295            // the first object intersected was found   
296            return 1;
297          }
298        } // full leaf
299#ifdef __TRAVERSAL_STATISTICS
300        else {
301#ifdef _DEBUGKTB
302          cout << "Empty leaf at depth= " << depth << endl;
303#endif     
304          emptyLeavesTraversed++;
305        }
306#endif // __TRAVERSAL_STATISTICS
307
308#ifdef _DEBUGKTB
309        cout << "Pop the node" << endl;
310#endif     
311   
312        // pop farChild from the stack
313        // restore the current values
314        index--;
315        continue;
316      }
317      else {
318        assert(nodeType == CKTBAxes::EE_Link);
319#ifdef _DEBUGKTB
320        cout << "Link " << endl;
321#endif
322        stack3[index].nodep = GetLinkNode(currNode);
323        // cout << "Link node was accessed" << endl;
324        continue;
325      }
326    }
327  } // while current node is not the leaf
328
329#ifdef __TRAVERSAL_STATISTICS
330  _allNodesTraversed += allNodesTraversed;
331  _fullLeavesTraversed += fullLeavesTraversed;
332  _emptyLeavesTraversed += emptyLeavesTraversed;
333#endif // __TRAVERSAL_STATISTICS
334
335  // no objects found along the ray path
336  return 0;
337} // FindNearestI - single ray
338#endif
339
340#if 1
341int
342CKTBTraversal::FindNearestI(const SimpleRay &ray, const AxisAlignedBox3 &localbox)
343{
344#if 0
345  static int counter = 0;
346  counter++;
347  bool debug = false;
348  if (counter == 530) {
349    debug = true;
350    cout << "COUNTER = " << counter << endl;
351    cout << "DEBUG starts" << endl;   
352  }
353#endif
354 
355  // passing through parameters
356  float tmin, tmax;
357 
358  // test if the whole CKTB tree is missed by the input ray
359  if ( (!root) ||
360       (!localbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
361       (tmax <= tmin) ||
362       (tmax <= 0.f) ) {
363    SimpleRay::IntersectionRes[0].intersectable = 0;
364    return 0; // no object can be intersected
365  }
366
367//#define _DEBUGKTB
368#ifdef _DEBUGKTB
369  int ib = 0;
370  int depth = 0;
371#endif
372 
373#ifdef __TRAVERSAL_STATISTICS
374  int allNodesTraversed = 0L;
375  int fullLeavesTraversed = 0L;
376  int emptyLeavesTraversed = 0L;
377#endif // __TRAVERSAL_STATISTICS
378
379  Vector3 invertedDir;
380  invertedDir.x = 1.0f / ray.mDirection.x;
381  invertedDir.y = 1.0f / ray.mDirection.y;
382  invertedDir.z = 1.0f / ray.mDirection.z;
383 
384  // start from the root node
385  if (tmin < 0.f)
386    tmin = 0.f;
387
388  int index = 1;
389  stack3[1].nodep = root;
390  stack3[1].tmax = tmax;
391  tmax = tmin;
392  SKTBNodeT * childNodes[2];
393  int RayDirs[3];
394  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
395  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
396  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
397 
398  // we have to check the node
399  // current node is not the leaf, empty leaves are NULL pointers
400  while (index) {
401    register SKTBNodeT *currNode = stack3[index].nodep;
402    tmin = tmax;
403    tmax = stack3[index].tmax;
404#if 0
405    if (debug) {
406      cout << "node = " << (void*)currNode
407           << " tmin = " << tmin << " tmax = " << tmax
408           << endl;
409    }
410#endif
411   
412    assert(tmin <= tmax);
413   
414#ifdef __TRAVERSAL_STATISTICS
415    allNodesTraversed++;
416#endif // __TRAVERSAL_STATISTICS
417    register const int nodeType = GetNodeType(currNode);
418    if (nodeType < CKTBAxes::EE_Leaf) {
419      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
420      tval *= invertedDir[nodeType];
421      SKTBNodeT *near, *far;
422      childNodes[0] = GetLeft(currNode);
423      childNodes[1] = GetRight(currNode);
424      int rayDir = RayDirs[nodeType];
425      near = childNodes[rayDir];
426      far = childNodes[rayDir ^ 0x1];
427     
428      stack3[index].nodep = far;
429      // stack3[index].tmax = tmax; // not necessary, this is already there !
430      // int c = tval < tmax ? 1 : 0;
431      index += tval < tmax ? 1 : 0;
432      stack3[index].nodep = near;
433      stack3[index].tmax = Min(tval, tmax);
434      tmax = tmin;
435      // int d = tval < tmin ? -1 : 0;
436      index += tval < tmin ? -1 : 0;
437    }
438    else {
439      if (nodeType == CKTBAxes::EE_Leaf) {
440        // test objects for intersection
441#ifdef _DEBUGKTB
442        cout << "Leaf " << endl;
443        depth++;
444#endif
445#ifdef _DEBUGKTB
446        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
447              << " extp.t = " << extp->t << endl;
448#endif
449        if (!IsEmptyLeaf_(currNode)) {
450#ifdef _DEBUGKTB
451          cout << "Full leaf at depth= " << depth << endl;
452#endif     
453
454#ifdef __TRAVERSAL_STATISTICS
455          fullLeavesTraversed++;
456#endif // __TRAVERSAL_STATISTICS
457          // test the objects in the full leaf against the ray
458         
459          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
460#if 0
461          // using subroutine
462          if (TestFullLeaf(ray, currNode))
463#else
464          // Avoiding function call by copying the code
465          const ObjectContainer * const list = GetObjList(currNode);
466          int intersected = 0;
467          // iterate the whole list and find out the nearest intersection
468          ObjectContainer::const_iterator sc_end = list->end();
469          for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
470            // if the intersection realy lies in the node       
471            intersected += ((*sc)->CastSimpleRay(ray));
472          } // for all objects
473          if (intersected)
474#endif
475          {
476#ifdef _DEBUGKTB
477            cout << "Full leaf HIT " << endl;
478#endif 
479             
480#ifdef __TRAVERSAL_STATISTICS
481            _allNodesTraversed += allNodesTraversed;
482            _fullLeavesTraversed += fullLeavesTraversed;
483            _emptyLeavesTraversed += emptyLeavesTraversed;
484#endif // __TRAVERSAL_STATISTICS
485           
486            // signed distance should be already set in TestFullLeaf
487            // the first object intersected was found   
488            return 1;
489          }
490        } // full leaf
491#ifdef __TRAVERSAL_STATISTICS
492        else {
493#ifdef _DEBUGKTB
494          cout << "Empty leaf at depth= " << depth << endl;
495#endif     
496          emptyLeavesTraversed++;
497        }
498#endif // __TRAVERSAL_STATISTICS
499
500#ifdef _DEBUGKTB
501        cout << "Pop the node" << endl;
502#endif     
503   
504        // pop farChild from the stack
505        // restore the current values
506        index--;
507        continue;
508      }
509      else {
510        assert(nodeType == CKTBAxes::EE_Link);
511#ifdef _DEBUGKTB
512        cout << "Link " << endl;
513#endif
514        stack3[index].nodep = GetLinkNode(currNode);
515        // cout << "Link node was accessed" << endl;
516        continue;
517      }
518    }
519  } // while current node is not the leaf
520
521#ifdef __TRAVERSAL_STATISTICS
522  _allNodesTraversed += allNodesTraversed;
523  _fullLeavesTraversed += fullLeavesTraversed;
524  _emptyLeavesTraversed += emptyLeavesTraversed;
525#endif // __TRAVERSAL_STATISTICS
526
527  // no objects found along the ray path
528  return 0;
529} // FindNearestI - single ray
530#endif
531 
532
533// ---------------------------------------------------------------------------
534// This is an attempt using SSE instructions - not successfull 31/12/2007
535#if 0
536int
537CKTBTraversal::FindNearestI(const SimpleRay &ray)
538{
539#if 0
540  static int counter = 0;
541  counter++;
542  bool debug = false;
543  if (counter == 530) {
544    debug = true;
545    cout << "COUNTER = " << counter << endl;
546    cout << "DEBUG starts" << endl;   
547  }
548#endif
549 
550  // passing through parameters
551  float tmin, tmax;
552 
553  // test if the whole CKTB tree is missed by the input ray
554  if ( (!root) ||
555       (!bbox.ComputeMinMaxT(ray.mOrigin, ray.mDirection, &tmin, &tmax)) ||
556       (tmax <= tmin) ||
557       (tmax <= 0.f) ) {
558    SimpleRay::IntersectionRes[0].intersectable = 0;
559    return 0; // no object can be intersected
560  }
561
562//#define _DEBUGKTB
563#ifdef _DEBUGKTB
564  int ib = 0;
565  int depth = 0;
566#endif
567 
568#ifdef __TRAVERSAL_STATISTICS
569  int allNodesTraversed = 0L;
570  int fullLeavesTraversed = 0L;
571  int emptyLeavesTraversed = 0L;
572#endif // __TRAVERSAL_STATISTICS
573
574  Vector3 invertedDir;
575  invertedDir.x = 1.0f / ray.mDirection.x;
576  invertedDir.y = 1.0f / ray.mDirection.y;
577  invertedDir.z = 1.0f / ray.mDirection.z;
578 
579  // start from the root node
580  if (tmin < 0.f)
581    tmin = 0.f;
582
583  int index = 1;
584  stack3[1].nodep = root;
585  stack3[1].tmax = tmax;
586  tmax = tmin;
587  SKTBNodeT * childNodes[2];
588  int RayDirs[3];
589  RayDirs[0] = ray.mDirection.x < 0.f ? 1 : 0;
590  RayDirs[1] = ray.mDirection.y < 0.f ? 1 : 0;
591  RayDirs[2] = ray.mDirection.z < 0.f ? 1 : 0;
592 
593  // we have to check the node
594  // current node is not the leaf, empty leaves are NULL pointers
595  while (index) {
596    SKTBNodeT *currNode = stack3[index].nodep;
597    tmin = tmax;
598    tmax = stack3[index].tmax;
599#if 0
600    if (debug) {
601      cout << "node = " << (void*)currNode
602           << " tmin = " << tmin << " tmax = " << tmax
603           << endl;
604    }
605#endif
606   
607    assert(tmin <= tmax);
608   
609#ifdef __TRAVERSAL_STATISTICS
610    allNodesTraversed++;
611#endif // __TRAVERSAL_STATISTICS
612    const unsigned int nodeType = GetNodeType(currNode);
613    if (nodeType < CKTBAxes::EE_Leaf) {
614      float tval = (GetSplitValue(currNode) - ray.mOrigin[nodeType]);
615      tval *= invertedDir[nodeType];
616      SKTBNodeT *near, *far;
617      childNodes[0] = GetLeft(currNode);
618      childNodes[1] = GetRight(currNode);
619      int rayDir = RayDirs[nodeType];
620      near = childNodes[rayDir];
621      far = childNodes[rayDir ^ 0x1];
622      // This code is slower than above !!!!!
623      stack3[index].nodep = far;
624      // stack3[index].tmax = tmax; // this is already there, not necessary!
625      __m128 tsim = _mm_set_ss(tval);
626      // index += tval < tmax ? 1 : 0;
627      __m128 tother = _mm_set_ss(tmax);
628      index += _mm_ucomilt_ss(tsim, tother);
629      stack3[index].nodep = near;
630      // stack3[index].tmax = Min(tval, tmax);
631      _mm_store_ss(&(stack3[index].tmax), _mm_min_ps(tsim, tother));
632      tmax = tmin;
633      // index += tval < tmin ? -1 : 0;
634      __m128 tother2 = _mm_set_ss(tmin);
635      index -= _mm_ucomilt_ss(tsim, tother2);
636    }
637    else {
638      if (nodeType == CKTBAxes::EE_Leaf) {
639        // test objects for intersection
640#ifdef _DEBUGKTB
641        cout << "Leaf " << endl;
642        depth++;
643#endif
644#ifdef _DEBUGKTB
645        DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
646              << " extp.t = " << extp->t << endl;
647#endif
648        if (!IsEmptyLeaf_(currNode)) {
649#ifdef _DEBUGKTB
650          cout << "Full leaf at depth= " << depth << endl;
651#endif     
652
653#ifdef __TRAVERSAL_STATISTICS
654          fullLeavesTraversed++;
655#endif // __TRAVERSAL_STATISTICS
656          // test the objects in the full leaf against the ray
657         
658          SimpleRay::IntersectionRes[0].maxt = stack3[index].tmax;
659          if (TestFullLeaf(ray, currNode)) {
660#ifdef _DEBUGKTB
661            cout << "Full leaf HIT " << endl;
662#endif 
663             
664#ifdef __TRAVERSAL_STATISTICS
665            _allNodesTraversed += allNodesTraversed;
666            _fullLeavesTraversed += fullLeavesTraversed;
667            _emptyLeavesTraversed += emptyLeavesTraversed;
668#endif // __TRAVERSAL_STATISTICS
669           
670            // signed distance should be already set in TestFullLeaf
671            // the first object intersected was found   
672            return 1;
673          }
674        } // full leaf
675#ifdef __TRAVERSAL_STATISTICS
676        else {
677#ifdef _DEBUGKTB
678          cout << "Empty leaf at depth= " << depth << endl;
679#endif     
680          emptyLeavesTraversed++;
681        }
682#endif // __TRAVERSAL_STATISTICS
683
684#ifdef _DEBUGKTB
685        cout << "Pop the node" << endl;
686#endif     
687   
688        // pop farChild from the stack
689        // restore the current values
690        index--;
691        continue;
692      }
693      else {
694        assert(nodeType == CKTBAxes::EE_Link);
695#ifdef _DEBUGKTB
696        cout << "Link " << endl;
697#endif
698        stack3[index].nodep = GetLinkNode(currNode);
699        // cout << "Link node was accessed" << endl;
700        continue;
701      }
702    }
703  } // while current node is not the leaf
704
705#ifdef __TRAVERSAL_STATISTICS
706  _allNodesTraversed += allNodesTraversed;
707  _fullLeavesTraversed += fullLeavesTraversed;
708  _emptyLeavesTraversed += emptyLeavesTraversed;
709#endif // __TRAVERSAL_STATISTICS
710
711  // no objects found along the ray path
712  return 0;
713} // FindNearestI - single ray
714#endif
715
716#if 0
717// Reasonably fast - about 101,500 rays per second for single dir!
718// It allows fast switching context from one ray to the next ray so it it
719// virtually independent of memory latency !
720int
721CKTBTraversal::FindNearestI_16oneDir(SimpleRayContainer &rays, int offset)
722{
723  // passing through parameters
724  int cntRays = 0;
725  if (!root)
726    return 0;
727
728  // The auxiliary variables to be precomputed
729  static float invertedDir[cntMaxRays * 4];
730  static float rayOrig[cntMaxRays * 4];
731  static int rayDirs[cntMaxRays * 4];
732  static int indexRay[cntMaxRays];
733  static int indexArray[cntMaxRays];
734  static int indexStack[cntMaxRays];
735  const int LOG2_MAX_HEIGHT = 5;
736  const int MAX_HEIGHT = 1 << LOG2_MAX_HEIGHT;
737  assert(MAX_HEIGHT == 32);
738  static struct SStackElem3 stackA[cntMaxRays * MAX_HEIGHT];
739  static float tmaxArray[cntMaxRays];
740  int cntHits = 0;
741 
742  float tmin, tmax;
743  for (int i = 0; i < cntMaxRays; i++) {
744  // test if the whole CKTB tree is missed by the input ray
745    if ((!bbox.ComputeMinMaxT(rays[i+offset].mOrigin,
746                              rays[i+offset].mDirection,
747                              &tmin, &tmax)) ||
748        (tmax <= tmin) ||
749        (tmax <= 0.f) ) {
750      SimpleRay::IntersectionRes[i].intersectable = 0;
751    }
752    else {
753      int indexR = (cntRays << 2);
754      rayOrig[indexR + 0] = rays[i+offset].mOrigin.x;
755      rayOrig[indexR + 1] = rays[i+offset].mOrigin.y;
756      rayOrig[indexR + 2] = rays[i+offset].mOrigin.z;
757      //rayOrig[indexR + 3] = 0.f;
758      invertedDir[indexR + 0] = 1.0f / (rays[i+offset].mDirection.x);
759      invertedDir[indexR + 1] = 1.0f / (rays[i+offset].mDirection.y);
760      invertedDir[indexR + 2] = 1.0f / (rays[i+offset].mDirection.z);
761      //invertedDir[indexR + 2] = 0.f;
762      rayDirs[indexR + 0] = rays[i+offset].mDirection.x < 0.f ? 1 : 0;
763      rayDirs[indexR + 1] = rays[i+offset].mDirection.y < 0.f ? 1 : 0;
764      rayDirs[indexR + 2] = rays[i+offset].mDirection.z < 0.f ? 1 : 0;
765      //rayDirs[indexR + 3] = 0;
766      indexRay[cntRays] = i; // the index to the ray
767      indexArray[cntRays] = cntRays; // the index to the array
768      int indexS = (cntRays << LOG2_MAX_HEIGHT) + 1;
769      indexStack[cntRays] = indexS; // the index in the stack
770      stackA[indexS].nodep = root; // we start from the root
771      stackA[indexS].tmax = tmax; // maximum distance
772      if (tmin < 0.f) tmin = 0.f;
773      tmaxArray[cntRays] = tmin;
774      cntRays++;
775    }
776  }
777
778//#define _DEBUGKTB
779#ifdef _DEBUGKTB
780  int ib = 0;
781  int depth = 0;
782#endif
783 
784#ifdef __TRAVERSAL_STATISTICS
785  int allNodesTraversed = 0L;
786  int fullLeavesTraversed = 0L;
787  int emptyLeavesTraversed = 0L;
788#endif // __TRAVERSAL_STATISTICS
789 
790  SKTBNodeT * childNodes[2];
791
792#define PREF_DEFAULT _MM_HINT_T0
793 
794  // we have to check the node
795  // current node is not the leaf, empty leaves are NULL pointers
796  while (cntRays) {
797    // we assume that all the nodes are interior nodes
798    for (int i = 0; i < cntRays; i++) {
799      // which indices to array should be used
800      int indexA = indexArray[i];
801      float tmin = tmaxArray[indexA];
802
803      // the stack indexing is here
804      int indexSA = indexStack[indexA];
805      SKTBNodeT *currNode = stackA[indexSA].nodep;
806     
807#if 0
808      if (debug) {
809        cout << "node = " << (void*)currNode
810             << " tmin = " << tmin << " tmax = " << tmax
811             << endl;
812      }
813#endif
814
815#if 0
816      if (tmin > tmax) {
817        cout << "PROBLEM tmin = " << tmin << " tmax = " << tmax;
818        cout << endl;
819      }
820#endif     
821      assert(tmin <= tmax);
822   
823#ifdef __TRAVERSAL_STATISTICS
824      allNodesTraversed++;
825#endif // __TRAVERSAL_STATISTICS
826      const unsigned int nodeType = (GetNodeType(currNode)) & 0x7;
827      if (nodeType < CKTBAxes::EE_Leaf) {
828        int indexRayOD = (indexA << 2) + nodeType;
829        float tval = (GetSplitValue(currNode) - rayOrig[indexRayOD])
830          * invertedDir[indexRayOD];
831        SKTBNodeT *near, *far;
832        childNodes[0] = GetLeft(currNode);
833        childNodes[1] = GetRight(currNode);
834        int rayDir = rayDirs[indexRayOD];
835        near = childNodes[rayDir];
836        far = childNodes[rayDir ^ 0x1];
837       
838        stackA[indexSA].nodep = far; // store far node
839        //stackA[indexSA].tmax = tmax; // with correct dist - the same as before
840        float tmax = tmaxArray[indexA] = stackA[indexSA].tmax;
841        int c = tval < tmax ? 1 : 0;
842        indexSA += c;
843        stackA[indexSA].nodep = near; // store near node
844        stackA[indexSA].tmax = Min(tval, tmax); // with correct dist
845        int d = tval < tmin ? 1 : 0;
846        indexSA -= d;
847        // This is prefetching - so the next time we have it
848        // in the cache !
849        GPREFETCH(stackA[indexSA].nodep, PREF_DEFAULT);
850        // store tmax and index to the stack
851        indexStack[indexA] = indexSA;
852        tmaxArray[indexA] = tmin;       
853      }
854      else {
855        if (nodeType == CKTBAxes::EE_Leaf) {
856          // test objects for intersection
857#ifdef _DEBUGKTB
858          cout << "Leaf " << endl;
859          depth++;
860#endif
861#ifdef _DEBUGKTB
862          DEBUG << "currNode = " << currNode << " entp.t = " << entp->t
863                << " extp.t = " << extp->t << endl;
864#endif
865          float tmax = tmaxArray[indexA] = stackA[indexSA].tmax;
866          if (!IsEmptyLeaf_(currNode)) {
867#ifdef _DEBUGKTB
868            cout << "Full leaf at depth= " << depth << endl;
869#endif     
870
871#ifdef __TRAVERSAL_STATISTICS
872            fullLeavesTraversed++;
873#endif // __TRAVERSAL_STATISTICS
874          // test the objects in the full leaf against the ray
875         
876            // which ray is processed
877            int indexR = indexRay[indexA];
878            SimpleRay::IntersectionRes[indexR].maxt = tmax;
879            if (TestFullLeaf(rays[indexR+offset], currNode, indexR)) {
880
881              // we remove the ray from the calculation
882              indexArray[i] = indexArray[cntRays-1];
883              cntRays--; // we decrease the number of rays
884              cntHits++;
885#ifdef _DEBUGKTB
886              cout << "Full leaf HIT " << endl;
887#endif 
888             
889#ifdef __TRAVERSAL_STATISTICS
890              _allNodesTraversed += allNodesTraversed;
891              _fullLeavesTraversed += fullLeavesTraversed;
892              _emptyLeavesTraversed += emptyLeavesTraversed;
893#endif // __TRAVERSAL_STATISTICS           
894           
895              // signed distance should be already set in TestFullLeaf
896              // the first object intersected was found
897              continue;
898            }
899          } // full leaf
900#ifdef __TRAVERSAL_STATISTICS
901          else {
902#ifdef _DEBUGKTB
903            cout << "Empty leaf at depth= " << depth << endl;
904#endif     
905            emptyLeavesTraversed++;
906          }
907#endif // __TRAVERSAL_STATISTICS
908
909#ifdef _DEBUGKTB
910          cout << "Pop the node" << endl;
911#endif     
912   
913          // pop farChild from the stack
914          // restore the current values
915          --indexSA;
916          // This is bits 0,1,2,3,4,5 - the stack depth = 32 !
917          if ( (indexSA & 0x1f) == 0) {
918            // we remove the ray from the calculation
919            indexArray[i] = indexArray[cntRays-1];
920            cntRays--; // we decrease the number of rays
921          }
922          else {
923            indexStack[indexA] = indexSA;
924            // we prefetch the data to be accessible
925            // the next time
926            GPREFETCH(stackA[indexSA].nodep, PREF_DEFAULT);
927          }
928          continue;
929        }
930        else {
931          assert(nodeType == CKTBAxes::EE_Link);
932#ifdef _DEBUGKTB
933          cout << "Link " << endl;
934#endif
935          stackA[indexSA].nodep = GetLinkNode(currNode);
936          // cout << "Link node was accessed" << endl;
937          continue;
938        }
939      } // empty leaf or link
940    } // for all active rays
941  } // while cntRays
942
943#ifdef __TRAVERSAL_STATISTICS
944  _allNodesTraversed += allNodesTraversed;
945  _fullLeavesTraversed += fullLeavesTraversed;
946  _emptyLeavesTraversed += emptyLeavesTraversed;
947#endif // __TRAVERSAL_STATISTICS
948
949  // no objects found along the ray path
950  return cntHits;
951}
952#endif
953
954#ifdef __SSE__
955
956#if 1
957// Even faster - about 125,500 rays per second for single dir and 164 rps
958// for double dir !
959int
960CKTBTraversal::FindNearestI_16oneDir(SimpleRayContainer &rays, int offset)
961{
962  static RayPacket2x2 raypack;
963  struct SResultI {
964    Intersectable *intersectable;
965    float          tdist;   
966  };
967  static SResultI results[16];
968   
969  for (int i = 0; i < 4; i++) {
970    int k = i * 4 + offset;
971    for (int j = 0; j < 4; j++, k++) {
972      raypack.SetLoc(j, rays[k].mOrigin);
973      raypack.SetDir(j, rays[k].mDirection);     
974    }
975    // Here either use ray packet traversal or
976    // casting individual rays
977    FindNearestI(raypack);
978    k = i * 4;
979    for (int j = 0; j < 4; j++, k++) {
980      results[k].intersectable = raypack.GetObject(j);
981      results[k].tdist = raypack.GetT(j);
982    } // for j
983  } // for i
984
985  // Copy the results to the output array
986  for (int i = 0; i < 16; i++) {
987    SimpleRay::IntersectionRes[i].intersectable =
988      results[i].intersectable;
989    SimpleRay::IntersectionRes[i].tdist =
990      results[i].tdist;
991  } // for i
992}
993#endif
994 
995#if 0
996// This code works well 1/1/2008 - 11:00
997// The same operations for packets of rays for the same signs,
998// otherwise it is emulated by decomposition
999// of a packet to individual rays and traced individually.
1000void
1001CKTBTraversal::FindNearestI(RayPacket2x2 &rp)
1002{
1003  int m1 = _mm_movemask_ps(rp.dx4);
1004  if ((m1 == 0)||(m1 == 15)) {
1005  m1 = _mm_movemask_ps(rp.dy4);
1006  if ((m1 == 0)||(m1 == 15)) {
1007  m1 = _mm_movemask_ps(rp.dz4);
1008  if ((m1 == 0)||(m1 == 15)) {
1009    rp.Init();
1010    // all the signs for 4 rays are the same, use
1011    // ray packet traversal
1012    // Compute min and max distances
1013    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1014    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1015    SimpleRay sray[4];
1016    int maxIntersections = 4;
1017    GALIGN16 union { int inters[4]; __m128 inters_4; };
1018    inters[0] = inters[1] = inters[2] = inters[3] = 1;
1019    unsigned int inters32 = 0xf;
1020    for (int i = 0; i < 4; i++) {
1021      bbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1022      if ( (tmin4[i] >= tmax4[i]) ||
1023           (tmax4[i] < 0.f) ) {
1024        inters[i] = 0; // finished
1025        inters32 &= ~(1 << i); // bit zero when ray is invalid
1026        maxIntersections--;
1027      }     
1028      if (tmin4[i] < 0.f)
1029        tmin4[i] = 0.f;
1030      sray[i].mOrigin = rp.GetLoc(i);
1031      sray[i].mDirection = rp.GetDir(i);
1032    } // for i
1033    if (maxIntersections == 0)
1034      return;
1035
1036    SKTBNodeT * childNodes[2];
1037    int RayDirs[3];
1038    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1039    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1040    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1041    //int activeMask=_mm_movemask_ps(_mm_cmplt_ps( tmin_4, tmax_4 ))&inters32;
1042    int activeMask = inters32;
1043    int indexStack = 0;
1044    SKTBNodeT *currNode = root;
1045    unsigned int k = GetNodeType(currNode);
1046    for (;;) {     
1047      while (k < CKTBAxes::EE_Leaf) {
1048        // the 3 operations below can be brought down to 3 simple float
1049        // calculations by precomputing min/max of the inverse dir
1050        const __m128 node_split = _mm_set_ps1(GetSplitValue(currNode));
1051        const __m128 t4 =
1052          _mm_mul_ps(_mm_sub_ps(node_split, rp.orig[k]), rp.idir[k]);
1053        childNodes[0] = GetLeft(currNode);
1054        childNodes[1] = GetRight(currNode);
1055        int rayDir = RayDirs[k];
1056        SKTBNodeT *far = childNodes[rayDir];
1057        if (!(_mm_movemask_ps(_mm_cmpgt_ps(t4, tmin_4)) & activeMask))
1058        {
1059          currNode = far;
1060          k = GetNodeType(currNode);
1061          continue;
1062        }
1063        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1064        k = GetNodeType(currNode);     
1065        if (! (_mm_movemask_ps(_mm_cmplt_ps( t4, tmax_4)) & activeMask))
1066          continue;
1067
1068        // pop far node to the stack
1069        stack4[indexStack].nodep = far;
1070        stack4[indexStack].tmax_4 = tmax_4;
1071        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1072        // stack4[indexStack].mask = activeMask;
1073        indexStack++;
1074       
1075        tmax_4 = _mm_min_ps(t4, tmax_4);
1076        activeMask &= _mm_movemask_ps(_mm_cmplt_ps( tmin_4, tmax_4 ));
1077      } // while this is an interior node
1078
1079      // either a leaf or a link
1080      if (k == CKTBAxes::EE_Leaf) {
1081        // test objects for intersection
1082        if (!IsEmptyLeaf_(currNode)) {
1083          // cout << "Full leaf" << endl;
1084         
1085          // test the objects in the full leaf against the ray   
1086          for (int i = 0; i < 4; i++) {
1087            if (inters[i] ) {
1088              // no intersection so far !
1089              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1090              // Test only rays that were not finished
1091              if (TestFullLeaf(sray[i], currNode, i)) {
1092                // intersection for this ray found
1093                inters[i] = 0;
1094                inters32 &= ~(1 << i);
1095                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1096                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);           
1097                // signed distance should be already set in TestFullLeaf
1098                // the first object intersected was found
1099                if (--maxIntersections == 0)
1100                  return;
1101              }
1102            } // if this ray did not hit the triangle so far
1103          } // for all 4 rays
1104        } // full leaf
1105        // pop farChild from the stack
1106        // restore the current values
1107        // update the minimum distance since we traverse to the next one
1108
1109        if (indexStack == 0)
1110          return;
1111        indexStack--;
1112        currNode = stack4[indexStack].nodep;
1113        k = GetNodeType(currNode);
1114        tmin_4 = stack4[indexStack].tmin_4;
1115        tmax_4 = stack4[indexStack].tmax_4;
1116        activeMask = _mm_movemask_ps(_mm_cmple_ps( tmin_4, tmax_4 )) & inters32;
1117        continue;
1118      }
1119      // cout << "Link node was accessed" << endl;
1120      assert(k == CKTBAxes::EE_Link);
1121      currNode = GetLinkNode(currNode);
1122      k = GetNodeType(currNode);
1123    } // for
1124    return;
1125  }}}
1126
1127  // Trace ray by ray
1128  SimpleRay ray;
1129  for (int i = 0; i < 4; i++) {
1130    ray.mOrigin = rp.GetLoc(i);
1131    ray.mDirection = rp.GetDir(i);
1132    FindNearestI(ray);
1133    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1134    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1135    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1136  } // for
1137}
1138#endif
1139
1140
1141#if 1
1142// This code also works well 1/1/2008 - 14:00
1143// Using mask of 128-bits width - the code works as well, only a bit
1144// faster than the code above
1145void
1146CKTBTraversal::FindNearestI(RayPacket2x2 &rp)
1147{
1148  int m1 = _mm_movemask_ps(rp.dx4);
1149  if ((m1 == 0)||(m1 == 15)) {
1150  m1 = _mm_movemask_ps(rp.dy4);
1151  if ((m1 == 0)||(m1 == 15)) {
1152  m1 = _mm_movemask_ps(rp.dz4);
1153  if ((m1 == 0)||(m1 == 15)) {
1154    rp.Init();
1155    // all the signs for 4 rays are the same, use
1156    // ray packet traversal
1157    // Compute min and max distances
1158    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1159    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1160    GALIGN16 union { float activeMask[4]; __m128 activeMask_4; };
1161    GALIGN16 union { float liveMask[4]; __m128 liveMask_4; };
1162    liveMask[0] = liveMask[1] = liveMask[2] = liveMask[3] = 0xffffffff;
1163
1164    GALIGN16 SimpleRay sray[4];
1165    int maxIntersections = 4;
1166    // unsigned int inters32 = 0xf;
1167    for (int i = 0; i < 4; i++) {
1168      rp.SetObject(i, 0);
1169      bbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1170      if ( (tmin4[i] >= tmax4[i]) ||
1171           (tmax4[i] < 0.f) ) {
1172        liveMask[i] = 0; // finished
1173        // inters32 &= ~(1 << i); // bit zero when ray is invalid
1174        maxIntersections--;
1175      }
1176      if (tmin4[i] < 0.f)
1177        tmin4[i] = 0.f;
1178      sray[i].mOrigin = rp.GetLoc(i);
1179      sray[i].mDirection = rp.GetDir(i);
1180    } // for i
1181    if (maxIntersections == 0)
1182      return;
1183
1184    // This is the mask 128 bits witdth
1185    //activeMask_4 =
1186    //  _mm_and_ps(_mm_cmple_ps(tmin_4, tmax_4),
1187    //           _mm_cmplt_ps(tmax_4, _mm_setzero_ps()));
1188    activeMask_4 = liveMask_4;
1189
1190    SKTBNodeT * childNodes[2];
1191    int RayDirs[4];
1192    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1193    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1194    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1195    int indexStack = 0;
1196    SKTBNodeT *currNode = root;
1197    unsigned int k = GetNodeType(currNode);
1198    for (;;) {
1199      // traverse until we find a leaf
1200      while (k < CKTBAxes::EE_Leaf) {
1201        // the 3 operations below can be brought down to 3 simple float
1202        // calculations by precomputing min/max of the inverse dir
1203        // const __m128 node_split = ;
1204        const __m128 t4 =
1205          _mm_mul_ps(_mm_sub_ps(_mm_set_ps1(GetSplitValue(currNode)),
1206                                rp.orig[k]), rp.idir[k]);
1207        childNodes[0] = GetLeft(currNode);
1208        childNodes[1] = GetRight(currNode);
1209        int rayDir = RayDirs[k];
1210        SKTBNodeT *far = childNodes[rayDir];
1211        if (_mm_movemask_ps(_mm_and_ps(_mm_cmpge_ps(t4, tmin_4),
1212                                       activeMask_4))) {         
1213          currNode = far;
1214          k = GetNodeType(currNode);
1215          continue;
1216        }
1217
1218        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1219        k = GetNodeType(currNode);     
1220        if (_mm_movemask_ps(_mm_and_ps(_mm_cmple_ps(t4, tmax_4),
1221                                       activeMask_4)))
1222          continue;
1223
1224        // pop far node to the stack
1225        stack4[indexStack].nodep = far;
1226        stack4[indexStack].tmax_4 = tmax_4;
1227
1228// Uncomenting this macro is unsafe!
1229// Not convinced if for packet of 4 rays we can say that since when
1230// one ray is different than the others, it could bring to wrong state
1231// It is surely true for one ray when tmin < t < tmax, but for a packet
1232// of rays this condition can be true only for a single ray
1233//   tmin4 = max(t4, tmin4) = min(t4, tmax4)
1234//#define _NOT_STORE_MINT
1235
1236#ifdef _NOT_STORE_MINT 
1237#else
1238        // store mint onto the stack
1239        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1240#endif 
1241        // stack4[indexStack].mask = activeMask;
1242        indexStack++;
1243       
1244        tmax_4 = _mm_min_ps(t4, tmax_4);
1245        activeMask_4 = _mm_cmplt_ps( tmin_4, tmax_4 );
1246      } // while this is an interior node
1247
1248      // either a leaf or a link
1249      if (k == CKTBAxes::EE_Leaf) {
1250        // test objects for intersection
1251        if (!IsEmptyLeaf_(currNode)) {
1252          // cout << "Full leaf" << endl;
1253         
1254          // test the objects in the full leaf against the ray   
1255          for (int i = 0; i < 4; i++) {
1256            if (liveMask[i] ) {
1257              // no intersection so far !
1258              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1259#if 0
1260              // Using subroutine
1261              // Test only rays that were not finished
1262              if (TestFullLeaf(sray[i], currNode, i))
1263#else
1264              // avoiding one call
1265              const ObjectContainer * const list = GetObjList(currNode);
1266              int intersected = 0;
1267              // iterate the whole list and find out the nearest intersection
1268              ObjectContainer::const_iterator sc_end = list->end();
1269              for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
1270                // if the intersection realy lies in the node       
1271                intersected |= ((*sc)->CastSimpleRay(sray[i], i));
1272              } // for all objects
1273              if (intersected)
1274#endif
1275              {
1276                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1277                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1278                // signed distance should be already set in TestFullLeaf
1279                // the first object intersected was found
1280                if (--maxIntersections == 0)
1281                  return;
1282                // inters32 &= ~(1 << i);
1283                liveMask[i] = 0;
1284              }
1285            } // if this ray did not hit the triangle so far
1286          } // for all 4 rays
1287        } // full leaf
1288
1289        // pop farChild from the stack
1290        // restore the current values
1291        // update the minimum distance since we traverse to the next one
1292        do {
1293          if (indexStack == 0)
1294            return;
1295          indexStack--;
1296          currNode = stack4[indexStack].nodep;
1297          k = GetNodeType(currNode);
1298#ifdef _NOT_STORE_MINT
1299          // this is an attempt !
1300          tmin_4 = tmax_4;
1301#else
1302          // This surrely works
1303          tmin_4 = stack4[indexStack].tmin_4;
1304#endif   
1305          tmax_4 = stack4[indexStack].tmax_4;
1306          activeMask_4 = _mm_and_ps(_mm_cmple_ps( tmin_4, tmax_4 ), liveMask_4);
1307        }
1308        while (_mm_movemask_ps(activeMask_4) == 0);
1309      }
1310      else {
1311        // cout << "Link node was accessed" << endl;
1312        assert(k == CKTBAxes::EE_Link);
1313        currNode = GetLinkNode(currNode);
1314        k = GetNodeType(currNode);
1315      }
1316    } // for(;;)
1317    return;
1318  }}}
1319
1320  // Trace ray by ray
1321  SimpleRay ray;
1322  for (int i = 0; i < 4; i++) {
1323    ray.mOrigin = rp.GetLoc(i);
1324    ray.mDirection = rp.GetDir(i);
1325    FindNearestI(ray);
1326    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1327    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1328    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1329  } // for
1330}
1331#endif
1332
1333#if 1
1334// This code also works well 1/1/2008 - 14:00
1335// Using mask of 128-bits width - the code works as well, only a bit
1336// faster than the code above
1337void
1338CKTBTraversal::FindNearestI(RayPacket2x2 &rp, Vector3 &boxmin, Vector3 &boxmax)
1339{
1340  static AxisAlignedBox3 localbox;
1341  localbox.SetMin(boxmin);
1342  localbox.SetMax(boxmax);
1343
1344  int m1 = _mm_movemask_ps(rp.dx4);
1345  if ((m1 == 0)||(m1 == 15)) {
1346  m1 = _mm_movemask_ps(rp.dy4);
1347  if ((m1 == 0)||(m1 == 15)) {
1348  m1 = _mm_movemask_ps(rp.dz4);
1349  if ((m1 == 0)||(m1 == 15)) {
1350    rp.Init();
1351   
1352    // all the signs for 4 rays are the same, use
1353    // ray packet traversal
1354    // Compute min and max distances
1355    GALIGN16 union { float tmin4[4]; __m128 tmin_4; };
1356    GALIGN16 union { float tmax4[4]; __m128 tmax_4; };
1357    GALIGN16 union { float activeMask[4]; __m128 activeMask_4; };
1358    GALIGN16 union { float liveMask[4]; __m128 liveMask_4; };
1359    liveMask[0] = liveMask[1] = liveMask[2] = liveMask[3] = 0xffffffff;
1360
1361    GALIGN16 SimpleRay sray[4];
1362    int maxIntersections = 4;
1363    // unsigned int inters32 = 0xf;
1364    for (int i = 0; i < 4; i++) {
1365      rp.SetObject(i, 0);
1366      localbox.ComputeMinMaxT(rp.GetLoc(i), rp.GetDir(i), &(tmin4[i]), &(tmax4[i]));
1367      if ( (tmin4[i] >= tmax4[i]) ||
1368           (tmax4[i] < 0.f) ) {
1369        liveMask[i] = 0; // finished
1370        // inters32 &= ~(1 << i); // bit zero when ray is invalid
1371        maxIntersections--;
1372      }
1373      if (tmin4[i] < 0.f)
1374        tmin4[i] = 0.f;
1375      sray[i].mOrigin = rp.GetLoc(i);
1376      sray[i].mDirection = rp.GetDir(i);
1377    } // for i
1378    if (maxIntersections == 0)
1379      return;
1380
1381    // This is the mask 128 bits witdth
1382    //activeMask_4 =
1383    //  _mm_and_ps(_mm_cmple_ps(tmin_4, tmax_4),
1384    //           _mm_cmplt_ps(tmax_4, _mm_setzero_ps()));
1385    activeMask_4 = liveMask_4;
1386
1387    SKTBNodeT * childNodes[2];
1388    int RayDirs[4];
1389    RayDirs[0] = (rp.dx[0] > 0.f) ? 1 : 0;
1390    RayDirs[1] = (rp.dy[0] > 0.f) ? 1 : 0;
1391    RayDirs[2] = (rp.dz[0] > 0.f) ? 1 : 0;
1392    int indexStack = 0;
1393    SKTBNodeT *currNode = root;
1394    unsigned int k = GetNodeType(currNode);
1395    for (;;) {
1396      // traverse until we find a leaf
1397      while (k < CKTBAxes::EE_Leaf) {
1398        // the 3 operations below can be brought down to 3 simple float
1399        // calculations by precomputing min/max of the inverse dir
1400        // const __m128 node_split = ;
1401        const __m128 t4 =
1402          _mm_mul_ps(_mm_sub_ps(_mm_set_ps1(GetSplitValue(currNode)),
1403                                rp.orig[k]), rp.idir[k]);
1404        childNodes[0] = GetLeft(currNode);
1405        childNodes[1] = GetRight(currNode);
1406        int rayDir = RayDirs[k];
1407        SKTBNodeT *far = childNodes[rayDir];
1408        if (_mm_movemask_ps(_mm_and_ps(_mm_cmpge_ps(t4, tmin_4),
1409                                       activeMask_4))) {         
1410          currNode = far;
1411          k = GetNodeType(currNode);
1412          continue;
1413        }
1414
1415        currNode = childNodes[rayDir ^ 0x1]; // this is near node
1416        k = GetNodeType(currNode);     
1417        if (_mm_movemask_ps(_mm_and_ps(_mm_cmple_ps(t4, tmax_4),
1418                                       activeMask_4)))
1419          continue;
1420
1421        // pop far node to the stack
1422        stack4[indexStack].nodep = far;
1423        stack4[indexStack].tmax_4 = tmax_4;
1424
1425// Uncomenting this macro is unsafe!
1426// Not convinced if for packet of 4 rays we can say that since when
1427// one ray is different than the others, it could bring to wrong state
1428// It is surely true for one ray when tmin < t < tmax, but for a packet
1429// of rays this condition can be true only for a single ray
1430//   tmin4 = max(t4, tmin4) = min(t4, tmax4)
1431//#define _NOT_STORE_MINT
1432
1433#ifdef _NOT_STORE_MINT 
1434#else
1435        // store mint onto the stack
1436        stack4[indexStack].tmin_4 = _mm_max_ps(t4, tmin_4);
1437#endif 
1438        // stack4[indexStack].mask = activeMask;
1439        indexStack++;
1440       
1441        tmax_4 = _mm_min_ps(t4, tmax_4);
1442        activeMask_4 = _mm_cmplt_ps( tmin_4, tmax_4 );
1443      } // while this is an interior node
1444
1445      // either a leaf or a link
1446      if (k == CKTBAxes::EE_Leaf) {
1447        // test objects for intersection
1448        if (!IsEmptyLeaf_(currNode)) {
1449          // cout << "Full leaf" << endl;
1450         
1451          // test the objects in the full leaf against the ray   
1452          for (int i = 0; i < 4; i++) {
1453            if (liveMask[i] ) {
1454              // no intersection so far !
1455              SimpleRay::IntersectionRes[i].maxt = tmax4[i];
1456#if 0
1457              // Using subroutine
1458              // Test only rays that were not finished
1459              if (TestFullLeaf(sray[i], currNode, i))
1460#else
1461              // avoiding one call
1462              const ObjectContainer * const list = GetObjList(currNode);
1463              int intersected = 0;
1464              // iterate the whole list and find out the nearest intersection
1465              ObjectContainer::const_iterator sc_end = list->end();
1466              for (ObjectContainer::const_iterator sc = list->begin(); sc != sc_end; sc++) {
1467                // if the intersection realy lies in the node       
1468                intersected |= ((*sc)->CastSimpleRay(sray[i], i));
1469              } // for all objects
1470              if (intersected)
1471#endif
1472              {
1473                rp.SetT(i, SimpleRay::IntersectionRes[0].maxt);
1474                rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1475                // signed distance should be already set in TestFullLeaf
1476                // the first object intersected was found
1477                if (--maxIntersections == 0)
1478                  return;
1479                // inters32 &= ~(1 << i);
1480                liveMask[i] = 0;
1481              }
1482            } // if this ray did not hit the triangle so far
1483          } // for all 4 rays
1484        } // full leaf
1485
1486        // pop farChild from the stack
1487        // restore the current values
1488        // update the minimum distance since we traverse to the next one
1489        do {
1490          if (indexStack == 0)
1491            return;
1492          indexStack--;
1493          currNode = stack4[indexStack].nodep;
1494          k = GetNodeType(currNode);
1495#ifdef _NOT_STORE_MINT
1496          // this is an attempt !
1497          tmin_4 = tmax_4;
1498#else
1499          // This surrely works
1500          tmin_4 = stack4[indexStack].tmin_4;
1501#endif   
1502          tmax_4 = stack4[indexStack].tmax_4;
1503          activeMask_4 = _mm_and_ps(_mm_cmple_ps( tmin_4, tmax_4 ), liveMask_4);
1504        }
1505        while (_mm_movemask_ps(activeMask_4) == 0);
1506      }
1507      else {
1508        // cout << "Link node was accessed" << endl;
1509        assert(k == CKTBAxes::EE_Link);
1510        currNode = GetLinkNode(currNode);
1511        k = GetNodeType(currNode);
1512      }
1513    } // for(;;)
1514    return;
1515  }}}
1516
1517  // Trace ray by ray
1518  SimpleRay ray;
1519  for (int i = 0; i < 4; i++) {
1520    ray.mOrigin = rp.GetLoc(i);
1521    ray.mDirection = rp.GetDir(i);
1522    FindNearestI(ray, localbox);
1523    rp.SetObject(i, SimpleRay::IntersectionRes[0].intersectable);
1524    rp.SetT(i, SimpleRay::IntersectionRes[0].tdist);
1525    // SimpleRay::IntersectionRes[0].intersectable->GetNormal(0);
1526  } // for
1527}
1528#endif
1529
1530#endif // __SSE__
1531
1532#endif //  TRV00F
1533
1534} // namespace
1535
Note: See TracBrowser for help on using the repository browser.