1 | // ============================================================================
|
---|
2 | // $Id: raypack.h $
|
---|
3 | //
|
---|
4 | // raypack.h
|
---|
5 | // CRayPacket class - core of the ray-packet traversal routines
|
---|
6 | //
|
---|
7 | // Class: CRayPacket2x2
|
---|
8 | //
|
---|
9 | // REPLACEMENT_STRING
|
---|
10 | //
|
---|
11 | // Initial coding by Vlastimil Havran, 2006. The data design is in fact
|
---|
12 | // Jakko Biker layout as propose in the article on Intel Web Site in year 2005
|
---|
13 | // http://www.intel.com/cd/ids/developer/asmo-na/eng/245711.htm?page=1
|
---|
14 |
|
---|
15 | #ifndef __RAYPACK_H__
|
---|
16 | #define __RAYPACK_H__
|
---|
17 |
|
---|
18 | #include <cassert>
|
---|
19 |
|
---|
20 | namespace GtpVisibilityPreprocessor {
|
---|
21 |
|
---|
22 | #include "Vector3.h"
|
---|
23 |
|
---|
24 | #ifdef __SSE__
|
---|
25 |
|
---|
26 | // System headers for SSE
|
---|
27 | #ifdef __INTEL_COMPILER
|
---|
28 | #include <xmmintrin.h>
|
---|
29 | #else
|
---|
30 | // We assume GNU GCC compiler 3.4 or higher
|
---|
31 | #include <xmmintrin.h>
|
---|
32 | #endif
|
---|
33 |
|
---|
34 |
|
---|
35 | // forward declarations
|
---|
36 | #define SSE_INTRINSIC
|
---|
37 | #ifdef SSE_INTRINSIC
|
---|
38 | #define ALIGN16 __declspec(align(16))
|
---|
39 |
|
---|
40 | #ifdef _MSC_VER
|
---|
41 | #define NEW_ALIGN16(type,n) ((type*)_aligned_malloc((n)*sizeof(type),16))
|
---|
42 | #define FREE_ALIGN16(array) if(array){_aligned_free(array);(array)=0;}
|
---|
43 | #else
|
---|
44 | #define NEW_ALIGN16(type,n) ((type*)malloc((n)*sizeof(type)))
|
---|
45 | #define FREE_ALIGN16(array) if (array) { free(array);(array)=0;}
|
---|
46 | #endif // _MSC_VC
|
---|
47 |
|
---|
48 | #define PAD_FOUR(h) mulFour(h)
|
---|
49 | union extract_m128
|
---|
50 | {
|
---|
51 | __m128 m;
|
---|
52 | float f[4];
|
---|
53 | };
|
---|
54 | #else
|
---|
55 | #define NEW_ALIGN16(type,n) ((type*)malloc((n)*sizeof(type)))
|
---|
56 | #define ALIGN16
|
---|
57 | #define FREE_ALIGN16(array) if(array){free(array);(array)=0;}
|
---|
58 | #define PAD_FOUR(h) (h)
|
---|
59 | #endif
|
---|
60 |
|
---|
61 | // -------------------------------------------------------------------
|
---|
62 | // RayPacket2x2 class. A set of 4 ray is defined by a location and a
|
---|
63 | // direction. The direction is always normalized (length == 1) during
|
---|
64 | // use.
|
---|
65 | // -------------------------------------------------------------------
|
---|
66 | class RayPacket2x2
|
---|
67 | {
|
---|
68 | public:
|
---|
69 | enum {
|
---|
70 | // The number of rays in one packet
|
---|
71 | PACKSIZE = 4
|
---|
72 | };
|
---|
73 |
|
---|
74 | // constructor
|
---|
75 | RayPacket2x2(
|
---|
76 | // origin and the direction of rays
|
---|
77 | const Vector3 orf[],
|
---|
78 | const Vector3 dirf[],
|
---|
79 | // The same type for all four rays
|
---|
80 | const int _type,
|
---|
81 | // All the rays has to start at the same cell
|
---|
82 | const void *_originCell = NULL,
|
---|
83 | // All the rays has to start at the same object
|
---|
84 | const Intersectable *_startObject = NULL,
|
---|
85 | // and also for shadow rays finish at the same object
|
---|
86 | const Intersectable *_stopObject = NULL
|
---|
87 | )
|
---|
88 | {
|
---|
89 | // location
|
---|
90 | ox[0] = orf[0].x; ox[1] = orf[1].x; ox[2] = orf[2].x; ox[3] = orf[3].x;
|
---|
91 | oy[0] = orf[0].y; oy[1] = orf[1].y; oy[2] = orf[2].y; oy[3] = orf[3].y;
|
---|
92 | oz[0] = orf[0].z; oz[1] = orf[1].z; oz[2] = orf[2].z; oz[3] = orf[3].z;
|
---|
93 | // direction, we assume to be normalized
|
---|
94 | dx[0] = dirf[0].x; dx[1] = dirf[1].x; dx[2] = dirf[2].x; dx[3] = dirf[3].x;
|
---|
95 | dy[0] = dirf[0].y; dy[1] = dirf[1].y; dy[2] = dirf[2].y; dy[3] = dirf[3].y;
|
---|
96 | dz[0] = dirf[0].z; dz[1] = dirf[1].z; dz[2] = dirf[2].z; dz[3] = dirf[3].z;
|
---|
97 | // Other components
|
---|
98 | ttype = _type;
|
---|
99 | origin = _originCell;
|
---|
100 | termination = NULL;
|
---|
101 | startObject = _startObject;
|
---|
102 | stopObject = _stopObject;
|
---|
103 | Init();
|
---|
104 | }
|
---|
105 | // dummy constructor
|
---|
106 | RayPacket2x2() {}
|
---|
107 |
|
---|
108 | // Inititalize the ray again when already constructed
|
---|
109 | void Init(
|
---|
110 | // origin and the direction of rays
|
---|
111 | const Vector3 orf[],
|
---|
112 | const Vector3 dirf[],
|
---|
113 | // The same type for all four rays
|
---|
114 | const int _type,
|
---|
115 | // All the rays has to start at the same cell
|
---|
116 | const void *_originCell = NULL,
|
---|
117 | // All the rays has to start at the same object
|
---|
118 | const Intersectable *_startObject = NULL,
|
---|
119 | // and also for shadow rays finish at the same object
|
---|
120 | const Intersectable *_stopObject = NULL,
|
---|
121 | // if the direction of rays is normalized or not
|
---|
122 | bool dirNormalized = false)
|
---|
123 | {
|
---|
124 | // location
|
---|
125 | ox[0] = orf[0].x; ox[1] = orf[1].x; ox[2] = orf[2].x; ox[3] = orf[3].x;
|
---|
126 | oy[0] = orf[0].y; oy[1] = orf[1].y; oy[2] = orf[2].y; oy[3] = orf[3].y;
|
---|
127 | oz[0] = orf[0].z; oz[1] = orf[1].z; oz[2] = orf[2].z; oz[3] = orf[3].z;
|
---|
128 | // direction
|
---|
129 | if (dirNormalized) {
|
---|
130 | // already normalized
|
---|
131 | dx[0] = dirf[0].x; dx[1] = dirf[1].x; dx[2] = dirf[2].x; dx[3] = dirf[3].x;
|
---|
132 | dy[0] = dirf[0].y; dy[1] = dirf[1].y; dy[2] = dirf[2].y; dy[3] = dirf[3].y;
|
---|
133 | dz[0] = dirf[0].z; dz[1] = dirf[1].z; dz[2] = dirf[2].z; dz[3] = dirf[3].z;
|
---|
134 | }
|
---|
135 | else {
|
---|
136 | dx[0] = dirf[0].x; dx[1] = dirf[1].x; dx[2] = dirf[2].x; dx[3] = dirf[3].x;
|
---|
137 | dy[0] = dirf[0].y; dy[1] = dirf[1].y; dy[2] = dirf[2].y; dy[3] = dirf[3].y;
|
---|
138 | dz[0] = dirf[0].z; dz[1] = dirf[1].z; dz[2] = dirf[2].z; dz[3] = dirf[3].z;
|
---|
139 | std::cerr << "Normalization not yet implemented" << std::endl;
|
---|
140 | abort();
|
---|
141 | }
|
---|
142 | // other components
|
---|
143 | ttype = _type;
|
---|
144 | origin = _originCell;
|
---|
145 | termination = NULL;
|
---|
146 | startObject = _startObject;
|
---|
147 | stopObject = _stopObject;
|
---|
148 | Init();
|
---|
149 | }
|
---|
150 |
|
---|
151 | // Computes the inverted direction of the rays, used optionally by
|
---|
152 | // a ray traversal algorithm. This has to be reconsidered, if it
|
---|
153 | // is really valuable. !!!
|
---|
154 | void ComputeInvertedDir() const;
|
---|
155 |
|
---|
156 | // Computes the sign of the rays and returns false if all the directions
|
---|
157 | // for all three axes are the same, but it could be different among axes,
|
---|
158 | // but for one axis all 4 rays must have the same direction
|
---|
159 | bool ComputeDirSign() const;
|
---|
160 |
|
---|
161 | // the cell in the ASDS, where ray starts from
|
---|
162 | void SetOrigin(const void *c) {origin = c;}
|
---|
163 | const void *GetOrigin() const { return origin; }
|
---|
164 |
|
---|
165 | // the cell in the ASDS, where ray finishes the walk
|
---|
166 | void SetTermination(const void *c) {termination = c; }
|
---|
167 | const void* GetTermination() const { return termination;}
|
---|
168 |
|
---|
169 | // the object on which the ray starts at
|
---|
170 | const Intersectable* GetStartObject() const { return startObject;}
|
---|
171 | const Intersectable* GetStopObject() const { return stopObject;}
|
---|
172 |
|
---|
173 | void SetStartObject(const Intersectable *newStartObject) {
|
---|
174 | startObject = newStartObject;
|
---|
175 | }
|
---|
176 | void SetStopObject(const Intersectable *newStopObject) {
|
---|
177 | stopObject = newStopObject;
|
---|
178 | }
|
---|
179 | int GetType() const { return ttype; }
|
---|
180 |
|
---|
181 | // Reading and Setting origin of the ray and direction
|
---|
182 | // Ray origin
|
---|
183 | inline void SetLoc(int i, const Vector3 &l);
|
---|
184 | Vector3 GetLoc(int i) const;
|
---|
185 | // Direction
|
---|
186 | void SetDir(int i, const Vector3 &ndir);
|
---|
187 | Vector3 GetDir(int i) const;
|
---|
188 |
|
---|
189 | // Retuns an object that is intersected by i-th ray
|
---|
190 | Intersectable* GetObject(int i) const;
|
---|
191 | void SetObject(int i, Intersectable* obj);
|
---|
192 | // Retuns a signed distance that is intersected by i-th ray
|
---|
193 | float GetT(int i) const;
|
---|
194 | void SetT(int i, float t);
|
---|
195 | // Retuns maximum signed distance that is intersected by i-th ray
|
---|
196 | float GetMaxT(int i) const;
|
---|
197 | void SetMaxT(int i, float t);
|
---|
198 |
|
---|
199 | // make such operation to slightly change the ray direction
|
---|
200 | // in case any component of ray direction is zero. This is
|
---|
201 | // carried out for all the rays in a packet
|
---|
202 | void CorrectZeroComponents();
|
---|
203 |
|
---|
204 | // Returns the sign of the direction if this was precomputed
|
---|
205 | const int &GetSign(int axis) const { return sign_dir[axis];}
|
---|
206 |
|
---|
207 | // Reset the result of intersection
|
---|
208 | void ResetObjects() {
|
---|
209 | obj[0] = obj[1] = obj[2] = obj[3] = 0;
|
---|
210 | }
|
---|
211 |
|
---|
212 | private:
|
---|
213 | // Here it is crucial the layout of the rays in memory
|
---|
214 | // The layout by Jakko Biker is used
|
---|
215 | typedef float real;
|
---|
216 | union
|
---|
217 | {
|
---|
218 | struct
|
---|
219 | {
|
---|
220 | // ox[N],oy[N],oz[N] - origin of the ray N
|
---|
221 | union { real ox[4]; __m128 ox4; };
|
---|
222 | union { real oy[4]; __m128 oy4; };
|
---|
223 | union { real oz[4]; __m128 oz4; };
|
---|
224 | };
|
---|
225 | __m128 orig[3];
|
---|
226 | };
|
---|
227 | union
|
---|
228 | {
|
---|
229 | struct
|
---|
230 | {
|
---|
231 | // dx[N],dy[N],dz[N] - direction of the ray N
|
---|
232 | union { real dx[4]; __m128 dx4; };
|
---|
233 | union { real dy[4]; __m128 dy4; };
|
---|
234 | union { real dz[4]; __m128 dz4; };
|
---|
235 | };
|
---|
236 | __m128 dir[3];
|
---|
237 | };
|
---|
238 |
|
---|
239 | #define _USE_INVDIR_RP
|
---|
240 | #ifdef _USE_INVDIR_RP
|
---|
241 | union
|
---|
242 | {
|
---|
243 | struct
|
---|
244 | {
|
---|
245 | // idx[N],idy[N],idz[N] - direction of the ray N
|
---|
246 | // inverted dir - maybe an overkill for SSE
|
---|
247 | // to be checked !
|
---|
248 | union { real idx[4]; __m128 idx4; };
|
---|
249 | union { real idy[4]; __m128 idy4; };
|
---|
250 | union { real idz[4]; __m128 idz4; };
|
---|
251 | };
|
---|
252 | __m128 idir[3];
|
---|
253 | };
|
---|
254 | #endif
|
---|
255 |
|
---|
256 | // The auxiliary and result values for traversal
|
---|
257 |
|
---|
258 | // Here is the result - currently computed signed distance
|
---|
259 | union { real t[4]; __m128 t4; };
|
---|
260 | // and so far minimum signed distance computed. This is required
|
---|
261 | // for computing ray object intersections
|
---|
262 | union { real tmax[4]; __m128 tmax4; };
|
---|
263 | // Here are the pointers to the objects that correspond to tmax[4]
|
---|
264 | // above. They can be different for all the rays !
|
---|
265 | union { Intersectable* obj[4]; __m128 obj4; };
|
---|
266 |
|
---|
267 | friend class CKTBTraversal;
|
---|
268 |
|
---|
269 | // Type of the ray: primary, shadow, dummy etc., see ERayType above
|
---|
270 | int ttype;
|
---|
271 |
|
---|
272 | // The sign of direction to be used in some algorithms. The sign
|
---|
273 | // has to be the same for all the rays in all the components of the
|
---|
274 | // direction vector !!!!
|
---|
275 | mutable int sign_dir[3];
|
---|
276 |
|
---|
277 | // I should have some abstract cell data type !!! here
|
---|
278 | // corresponds to the spatial elementary cell
|
---|
279 | const void *origin;
|
---|
280 | const void *termination;
|
---|
281 |
|
---|
282 | // the object on which surface a ray starts from
|
---|
283 | const Intersectable *startObject;
|
---|
284 | // the object on which surface a ray ends, for computation
|
---|
285 | // of the visibility queries between two points
|
---|
286 | const Intersectable *stopObject;
|
---|
287 |
|
---|
288 | /// Precompute some CRay parameters. Most of them used for ropes traversal.
|
---|
289 | inline void Init();
|
---|
290 |
|
---|
291 | // Precompute some values that are necessary.
|
---|
292 | inline void Precompute();
|
---|
293 | };
|
---|
294 |
|
---|
295 | // --------------------------------------------------------------------------
|
---|
296 | // RayPacket2x2::SetLoc()
|
---|
297 | // --------------------------------------------------------------------------
|
---|
298 | inline void
|
---|
299 | RayPacket2x2::SetLoc(int i, const Vector3 &l)
|
---|
300 | {
|
---|
301 | assert( (i>=0) && (i<4));
|
---|
302 | ox[i] = l.x;
|
---|
303 | oy[i] = l.y;
|
---|
304 | oz[i] = l.z;
|
---|
305 | }
|
---|
306 |
|
---|
307 | inline void
|
---|
308 | RayPacket2x2::SetDir(int i, const Vector3 &ndr)
|
---|
309 | {
|
---|
310 | // We assume that the direction is normalized !!!
|
---|
311 | assert( (i>=0) && (i<4));
|
---|
312 | dx[i] = ndr.x;
|
---|
313 | dy[i] = ndr.y;
|
---|
314 | dz[i] = ndr.z;
|
---|
315 | }
|
---|
316 |
|
---|
317 | inline Vector3
|
---|
318 | RayPacket2x2::GetLoc(int i) const
|
---|
319 | {
|
---|
320 | assert( (i>=0) && (i<4));
|
---|
321 | return Vector3(ox[i],oy[i],oz[i]);
|
---|
322 | }
|
---|
323 |
|
---|
324 | inline Vector3
|
---|
325 | RayPacket2x2::GetDir(int i) const
|
---|
326 | {
|
---|
327 | assert( (i>=0) && (i<4));
|
---|
328 | return Vector3(dx[i],dy[i],dz[i]);
|
---|
329 | }
|
---|
330 |
|
---|
331 | // --------------------------------------------------------------------------
|
---|
332 | // RayPacket2x2::Precompute()
|
---|
333 | // --------------------------------------------------------------------------
|
---|
334 | inline void
|
---|
335 | RayPacket2x2::Precompute()
|
---|
336 | {
|
---|
337 | // initialize inverted dir ?
|
---|
338 | #ifdef _USE_INVDIR_RP
|
---|
339 | // inverted dir - maybe an overkill for SSE
|
---|
340 | // to be checked !
|
---|
341 | idx[0] = 1.0f / dx[0];
|
---|
342 | idx[1] = 1.0f / dx[1];
|
---|
343 | idx[2] = 1.0f / dx[2];
|
---|
344 | idx[3] = 1.0f / dx[3];
|
---|
345 |
|
---|
346 | idy[0] = 1.0f / dy[0];
|
---|
347 | idy[1] = 1.0f / dy[1];
|
---|
348 | idy[2] = 1.0f / dy[2];
|
---|
349 | idy[3] = 1.0f / dy[3];
|
---|
350 |
|
---|
351 | idz[0] = 1.0f / dz[0];
|
---|
352 | idz[1] = 1.0f / dz[1];
|
---|
353 | idz[2] = 1.0f / dz[2];
|
---|
354 | idz[3] = 1.0f / dz[3];
|
---|
355 | #endif
|
---|
356 | }
|
---|
357 |
|
---|
358 | // --------------------------------------------------------------------------
|
---|
359 | // RayPacket2x2::Init()
|
---|
360 | // --------------------------------------------------------------------------
|
---|
361 | inline void
|
---|
362 | RayPacket2x2::Init()
|
---|
363 | {
|
---|
364 | // apply the standard precomputation
|
---|
365 | Precompute();
|
---|
366 | }
|
---|
367 |
|
---|
368 | // Computes the sign of the rays and returns false if all the directions
|
---|
369 | // for all three axes are the same, but it could be different among axes,
|
---|
370 | // but for one axis all 4 rays must have the same direction
|
---|
371 | inline bool
|
---|
372 | RayPacket2x2::ComputeDirSign() const
|
---|
373 | {
|
---|
374 | // Set the sign of the direction 1 when negative
|
---|
375 | sign_dir[0] = (dx[0] < 0.0f);
|
---|
376 | sign_dir[1] = (dy[0] < 0.0f);
|
---|
377 | sign_dir[2] = (dz[0] < 0.0f);
|
---|
378 | for (int i = 1; i < 4; i++) {
|
---|
379 | if (sign_dir[0] != (dx[i] < 0.0f))
|
---|
380 | return true; // different direction in x
|
---|
381 | if (sign_dir[1] != (dy[i] < 0.0f))
|
---|
382 | return true; // different direction in y
|
---|
383 | if (sign_dir[2] != (dz[i] < 0.0f))
|
---|
384 | return true; // different direction in z
|
---|
385 | }// for
|
---|
386 |
|
---|
387 | // Returns false if all 4 rays have the consistent direction
|
---|
388 | return false;
|
---|
389 | }
|
---|
390 |
|
---|
391 | inline Intersectable*
|
---|
392 | RayPacket2x2::GetObject(int i) const
|
---|
393 | {
|
---|
394 | assert( (i>=0) && (i<4));
|
---|
395 | return obj[i];
|
---|
396 | }
|
---|
397 |
|
---|
398 | inline void
|
---|
399 | RayPacket2x2::SetObject(int i, Intersectable* object)
|
---|
400 | {
|
---|
401 | assert( (i>=0) && (i<4));
|
---|
402 | obj[i] = object;
|
---|
403 | }
|
---|
404 |
|
---|
405 | inline float
|
---|
406 | RayPacket2x2::GetT(int i) const
|
---|
407 | {
|
---|
408 | assert( (i>=0) && (i<4));
|
---|
409 | return t[i];
|
---|
410 | }
|
---|
411 |
|
---|
412 | inline void
|
---|
413 | RayPacket2x2::SetT(int i, float tnew)
|
---|
414 | {
|
---|
415 | assert( (i>=0) && (i<4));
|
---|
416 | t[i] = tnew;
|
---|
417 | }
|
---|
418 |
|
---|
419 | inline float
|
---|
420 | RayPacket2x2::GetMaxT(int i) const
|
---|
421 | {
|
---|
422 | assert( (i>=0) && (i<4));
|
---|
423 | return tmax[i];
|
---|
424 | }
|
---|
425 |
|
---|
426 | inline void
|
---|
427 | RayPacket2x2::SetMaxT(int i, float tmaxnew)
|
---|
428 | {
|
---|
429 | assert( (i>=0) && (i<4));
|
---|
430 | tmax[i] = tmaxnew;
|
---|
431 | }
|
---|
432 | #else // __SSE__
|
---|
433 |
|
---|
434 | // ? What to do here
|
---|
435 | //#error "AAA"
|
---|
436 |
|
---|
437 | #endif // __SSE__
|
---|
438 |
|
---|
439 | } // namespace
|
---|
440 |
|
---|
441 | #endif // __RAYPACK_H__
|
---|
442 |
|
---|