source: GTP/trunk/App/Demos/Illum/Standalone/HierRayEngine [DirectX]/half.h @ 1481

Revision 1481, 16.8 KB checked in by szirmay, 18 years ago (diff)
Line 
1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// *       Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// *       Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// *       Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35// Primary authors:
36//     Florian Kainz <kainz@ilm.com>
37//     Rod Bogart <rgb@ilm.com>
38
39//---------------------------------------------------------------------------
40//
41//      half -- a 16-bit floating point number class:
42//
43//      Type half can represent positive and negative numbers, whose
44//      magnitude is between roughly 6.1e-5 and 6.5e+4, with a relative
45//      error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46//      with an absolute error of 6.0e-8.  All integers from -2048 to
47//      +2048 can be represented exactly.
48//
49//      Type half behaves (almost) like the built-in C++ floating point
50//      types.  In arithmetic expressions, half, float and double can be
51//      mixed freely.  Here are a few examples:
52//
53//          half a (3.5);
54//          float b (a + sqrt (a));
55//          a += b;
56//          b += a;
57//          b = a + 7;
58//
59//      Conversions from half to float are lossless; all half numbers
60//      are exactly representable as floats.
61//
62//      Conversions from float to half may not preserve the float's
63//      value exactly.  If a float is not representable as a half, the
64//      float value is rounded to the nearest representable half.  If
65//      a float value is exactly in the middle between the two closest
66//      representable half values, then the float value is rounded to
67//      the half with the greater magnitude.
68//
69//      Overflows during float-to-half conversions cause arithmetic
70//      exceptions.  An overflow occurs when the float value to be
71//      converted is too large to be represented as a half, or if the
72//      float value is an infinity or a NAN.
73//
74//      The implementation of type half makes the following assumptions
75//      about the implementation of the built-in C++ types:
76//
77//          float is an IEEE 754 single-precision number
78//          sizeof (float) == 4
79//          sizeof (unsigned int) == sizeof (float)
80//          alignof (unsigned int) == alignof (float)
81//          sizeof (unsigned short) == 2
82//
83//---------------------------------------------------------------------------
84
85#ifndef _HALF_H_
86#define _HALF_H_
87
88#include <iostream>
89
90class half
91{
92  public:
93
94    //-------------
95    // Constructors
96    //-------------
97
98    half ();                    // no initialization
99    half (float f);
100
101
102    //--------------------
103    // Conversion to float
104    //--------------------
105
106    operator            float () const;
107
108
109    //------------
110    // Unary minus
111    //------------
112
113    half                operator - () const;
114
115
116    //-----------
117    // Assignment
118    //-----------
119
120    half &              operator = (half  h);
121    half &              operator = (float f);
122
123    half &              operator += (half  h);
124    half &              operator += (float f);
125
126    half &              operator -= (half  h);
127    half &              operator -= (float f);
128
129    half &              operator *= (half  h);
130    half &              operator *= (float f);
131
132    half &              operator /= (half  h);
133    half &              operator /= (float f);
134
135
136    //---------------------------------------------------------
137    // Round to n-bit precision (n should be between 0 and 10).
138    // After rounding, the significand's 10-n least significant
139    // bits will be zero.
140    //---------------------------------------------------------
141
142    half                round (unsigned int n) const;
143
144
145    //--------------------------------------------------------------------
146    // Classification:
147    //
148    //  h.isFinite()            returns true if h is a normalized number,
149    //                          a denormalized number or zero
150    //
151    //  h.isNormalized()        returns true if h is a normalized number
152    //
153    //  h.isDenormalized()      returns true if h is a denormalized number
154    //
155    //  h.isZero()              returns true if h is zero
156    //
157    //  h.isNan()               returns true if h is a NAN
158    //
159    //  h.isInfinity()          returns true if h is a positive
160    //                          or a negative infinity
161    //
162    //  h.isNegative()          returns true if the sign bit of h
163    //                          is set (negative)
164    //--------------------------------------------------------------------
165
166    bool                isFinite () const;
167    bool                isNormalized () const;
168    bool                isDenormalized () const;
169    bool                isZero () const;
170    bool                isNan () const;
171    bool                isInfinity () const;
172    bool                isNegative () const;
173
174
175    //--------------------------------------------
176    // Special values
177    //
178    //  posInf()        returns +infinity
179    //
180    //  negInf()        returns +infinity
181    //
182    //  qNan()          returns a NAN with the bit
183    //                  pattern 0111111111111111
184    //
185    //  sNan()          returns a NAN with the bit
186    //                  pattern 0111110111111111
187    //--------------------------------------------
188
189    static half         posInf ();
190    static half         negInf ();
191    static half         qNan ();
192    static half         sNan ();
193
194
195    //--------------------------------------
196    // Access to the internal representation
197    //--------------------------------------
198
199    unsigned short      bits () const;
200    void                setBits (unsigned short bits);
201
202
203  public:
204
205    union uif
206    {
207        unsigned int    i;
208        float           f;
209    };
210
211  private:
212
213    static short        convert (int i);
214    static float        overflow ();
215    static bool         selftest ();
216
217    unsigned short      _h;
218
219    static const uif            _toFloat[1 << 16];
220    static const unsigned short _eLut[1 << 9];
221    static const bool           _itWorks;
222};
223
224
225//-----------
226// Stream I/O
227//-----------
228
229std::ostream &          operator << (std::ostream &os, half  h);
230std::istream &          operator >> (std::istream &is, half &h);
231
232
233//----------
234// Debugging
235//----------
236
237void                    printBits   (std::ostream &os, half  h);
238void                    printBits   (std::ostream &os, float f);
239void                    printBits   (char  c[19], half  h);
240void                    printBits   (char  c[35], float f);
241
242
243//-------
244// Limits
245//-------
246
247//----------------------------------------------------------------
248// Visual C++ will complain if these are not float constants,
249// but at least one other compiler (gcc 2.96) produces incorrect
250// results if they are.
251//----------------------------------------------------------------
252
253#ifdef WIN32
254#define HALF_MIN        5.96046448e-08f // Smallest positive half
255
256#define HALF_NRM_MIN    6.10351562e-05f // Smallest positive normalized half
257
258#define HALF_MAX        65504.0f        // Largest positive half
259
260#define HALF_EPSILON    0.00097656f     // Smallest positive e for which
261                                        // half (1.0 + e) != half (1.0)
262#else
263#define HALF_MIN        5.96046448e-08  // Smallest positive half
264
265#define HALF_NRM_MIN    6.10351562e-05  // Smallest positive normalized half
266
267#define HALF_MAX        65504.0         // Largest positive half
268
269#define HALF_EPSILON    0.00097656      // Smallest positive e for which
270                                        // half (1.0 + e) != half (1.0)
271#endif // WIN32
272
273#define HALF_MANT_DIG   11              // Number of digits in mantissa
274                                        // (significand + hidden leading 1)
275
276#define HALF_DIG        2               // Number of base 10 digits that
277                                        // can be represented without change
278
279#define HALF_RADIX      2               // Base of the exponent
280
281#define HALF_MIN_EXP    -13             // Minimum negative integer such that
282                                        // HALF_RADIX raised to the power of
283                                        // one less than that integer is a
284                                        // normalized half
285
286#define HALF_MAX_EXP    16              // Maximum positive integer such that
287                                        // HALF_RADIX raised to the power of
288                                        // one less than that integer is a
289                                        // normalized half
290
291#define HALF_MIN_10_EXP -4              // Minimum positive integer such
292                                        // that 10 raised to that power is
293                                        // a normalized half
294
295#define HALF_MAX_10_EXP 4               // Maximum positive integer such
296                                        // that 10 raised to that power is
297                                        // a normalized half
298
299
300//---------------------------------------------------------------------------
301//
302// Implementation --
303//
304// Representation of a float:
305//
306//      We assume that a float, f, is an IEEE 754 single-precision
307//      floating point number, whose bits are arranged as follows:
308//
309//          31 (msb)
310//          |
311//          | 30     23
312//          | |      |
313//          | |      | 22                    0 (lsb)
314//          | |      | |                     |
315//          X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
316//
317//          s e        m
318//
319//      S is the sign-bit, e is the exponent and m is the significand.
320//
321//      If e is between 1 and 254, f is a normalized number:
322//
323//                  s    e-127
324//          f = (-1)  * 2      * 1.m
325//
326//      If e is 0, and m is not zero, f is a denormalized number:
327//
328//                  s    -126
329//          f = (-1)  * 2      * 0.m
330//
331//      If e and m are both zero, f is zero:
332//
333//          f = 0.0
334//
335//      If e is 255, f is an "infinity" or "not a number" (NAN),
336//      depending on whether m is zero or not.
337//
338//      Examples:
339//
340//          0 00000000 00000000000000000000000 = 0.0
341//          0 01111110 00000000000000000000000 = 0.5
342//          0 01111111 00000000000000000000000 = 1.0
343//          0 10000000 00000000000000000000000 = 2.0
344//          0 10000000 10000000000000000000000 = 3.0
345//          1 10000101 11110000010000000000000 = -124.0625
346//          0 11111111 00000000000000000000000 = +infinity
347//          1 11111111 00000000000000000000000 = -infinity
348//          0 11111111 10000000000000000000000 = NAN
349//          1 11111111 11111111111111111111111 = NAN
350//
351// Representation of a half:
352//
353//      Here is the bit-layout for a half number, h:
354//
355//          15 (msb)
356//          |
357//          | 14  10
358//          | |   |
359//          | |   | 9        0 (lsb)
360//          | |   | |        |
361//          X XXXXX XXXXXXXXXX
362//
363//          s e     m
364//
365//      S is the sign-bit, e is the exponent and m is the significand.
366//
367//      If e is between 1 and 30, h is a normalized number:
368//
369//                  s    e-15
370//          h = (-1)  * 2     * 1.m
371//
372//      If e is 0, and m is not zero, h is a denormalized number:
373//
374//                  S    -14
375//          h = (-1)  * 2     * 0.m
376//
377//      If e and m are both zero, h is zero:
378//
379//          h = 0.0
380//
381//      If e is 31, h is an "infinity" or "not a number" (NAN),
382//      depending on whether m is zero or not.
383//
384//      Examples:
385//
386//          0 00000 0000000000 = 0.0
387//          0 01110 0000000000 = 0.5
388//          0 01111 0000000000 = 1.0
389//          0 10000 0000000000 = 2.0
390//          0 10000 1000000000 = 3.0
391//          1 10101 1111000001 = -124.0625
392//          0 11111 0000000000 = +infinity
393//          1 11111 0000000000 = -infinity
394//          0 11111 1000000000 = NAN
395//          1 11111 1111111111 = NAN
396//
397// Conversion:
398//
399//      Converting from a float to a half requires some non-trivial bit
400//      manipulations.  In some cases, this makes conversion relatively
401//      slow, but the most common case is accelerated via table lookups.
402//
403//      Converting back from a half to a float is easier because we don't
404//      have to do any rounding.  In addition, there are only 65536
405//      different half numbers; we can convert each of those numbers once
406//      and store the results in a table.  Later, all conversions can be
407//      done using only simple table lookups.
408//
409//---------------------------------------------------------------------------
410
411
412//--------------------
413// Simple constructors
414//--------------------
415
416inline
417half::half ()
418{
419    // no initialization
420}
421
422
423//----------------------------
424// Half-from-float constructor
425//----------------------------
426
427inline
428half::half (float f)
429{
430    if (f == 0)
431    {
432        //
433        // Common special case - zero.
434        // For speed, we don't preserve the zero's sign.
435        //
436
437        _h = 0;
438    }
439    else
440    {
441        //
442        // We extract the combined sign and exponent, e, from our
443        // floating-point number, f.  Then we convert e to the sign
444        // and exponent of the half number via a table lookup.
445        //
446        // For the most common case, where a normalized half is produced,
447        // the table lookup returns a non-zero value; in this case, all
448        // we have to do, is round f's significand to 10 bits and combine
449        // the result with e.
450        //
451        // For all other cases (overflow, zeroes, denormalized numbers
452        // resulting from underflow, infinities and NANs), the table
453        // lookup returns zero, and we call a longer, non-inline function
454        // to do the float-to-half conversion.
455        //
456
457        uif x;
458
459        x.f = f;
460
461        register int e = (x.i >> 23) & 0x000001ff;
462
463        e = _eLut[e];
464
465        if (e)
466        {
467            //
468            // Simple case - round the significand and
469            // combine it with the sign and exponent.
470            //
471
472            _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13);
473        }
474        else
475        {
476            //
477            // Difficult case - call a function.
478            //
479
480            _h = convert (x.i);
481        }
482    }
483}
484
485
486//------------------------------------------
487// Half-to-float conversion via table lookup
488//------------------------------------------
489
490inline
491half::operator float () const
492{
493    return _toFloat[_h].f;
494}
495
496
497//-------------------------
498// Round to n-bit precision
499//-------------------------
500
501inline half
502half::round (unsigned int n) const
503{
504    //
505    // Parameter check.
506    //
507
508    if (n >= 10)
509        return *this;
510
511    //
512    // Disassemble h into the sign, s,
513    // and the combined exponent and significand, e.
514    //
515
516    unsigned short s = _h & 0x8000;
517    unsigned short e = _h & 0x7fff;
518
519    //
520    // Round the exponent and significand to the nearest value
521    // where ones occur only in the (10-n) most significant bits.
522    // Note that the exponent adjusts automatically if rounding
523    // up causes the significand to overflow.
524    //
525
526    e >>= 9 - n;
527    e  += e & 1;
528    e <<= 9 - n;
529
530    //
531    // Check for exponent overflow.
532    //
533
534    if (e >= 0x7c00)
535    {
536        //
537        // Overflow occurred -- truncate instead of rounding.
538        //
539
540        e = _h;
541        e >>= 10 - n;
542        e <<= 10 - n;
543    }
544
545    //
546    // Put the original sign bit back.
547    //
548
549    half h;
550    h._h = s | e;
551
552    return h;
553}
554
555
556//-----------------------
557// Other inline functions
558//-----------------------
559
560inline half     
561half::operator - () const
562{
563    half h;
564    h._h = _h ^ 0x8000;
565    return h;
566}
567
568
569inline half &
570half::operator = (half h)
571{
572    _h = h._h;
573    return *this;
574}
575
576
577inline half &
578half::operator = (float f)
579{
580    *this = half (f);
581    return *this;
582}
583
584
585inline half &
586half::operator += (half h)
587{
588    *this = half (float (*this) + float (h));
589    return *this;
590}
591
592
593inline half &
594half::operator += (float f)
595{
596    *this = half (float (*this) + f);
597    return *this;
598}
599
600
601inline half &
602half::operator -= (half h)
603{
604    *this = half (float (*this) - float (h));
605    return *this;
606}
607
608
609inline half &
610half::operator -= (float f)
611{
612    *this = half (float (*this) - f);
613    return *this;
614}
615
616
617inline half &
618half::operator *= (half h)
619{
620    *this = half (float (*this) * float (h));
621    return *this;
622}
623
624
625inline half &
626half::operator *= (float f)
627{
628    *this = half (float (*this) * f);
629    return *this;
630}
631
632
633inline half &
634half::operator /= (half h)
635{
636    *this = half (float (*this) / float (h));
637    return *this;
638}
639
640
641inline half &
642half::operator /= (float f)
643{
644    *this = half (float (*this) / f);
645    return *this;
646}
647
648
649inline bool     
650half::isFinite () const
651{
652    unsigned short e = (_h >> 10) & 0x001f;
653    return e < 31;
654}
655
656
657inline bool
658half::isNormalized () const
659{
660    unsigned short e = (_h >> 10) & 0x001f;
661    return e > 0 && e < 31;
662}
663
664
665inline bool
666half::isDenormalized () const
667{
668    unsigned short e = (_h >> 10) & 0x001f;
669    unsigned short m =  _h & 0x3ff;
670    return e == 0 && m != 0;
671}
672
673
674inline bool
675half::isZero () const
676{
677    return (_h & 0x7fff) == 0;
678}
679
680
681inline bool
682half::isNan () const
683{
684    unsigned short e = (_h >> 10) & 0x001f;
685    unsigned short m =  _h & 0x3ff;
686    return e == 31 && m != 0;
687}
688
689
690inline bool
691half::isInfinity () const
692{
693    unsigned short e = (_h >> 10) & 0x001f;
694    unsigned short m =  _h & 0x3ff;
695    return e == 31 && m == 0;
696}
697
698
699inline bool     
700half::isNegative () const
701{
702    return (_h & 0x8000) != 0;
703}
704
705
706inline half
707half::posInf ()
708{
709    half h;
710    h._h = 0x7c00;
711    return h;
712}
713
714
715inline half
716half::negInf ()
717{
718    half h;
719    h._h = 0xfc00;
720    return h;
721}
722
723
724inline half
725half::qNan ()
726{
727    half h;
728    h._h = 0x7fff;
729    return h;
730}
731
732
733inline half
734half::sNan ()
735{
736    half h;
737    h._h = 0x7dff;
738    return h;
739}
740
741
742inline unsigned short
743half::bits () const
744{
745    return _h;
746}
747
748
749inline void
750half::setBits (unsigned short bits)
751{
752    _h = bits;
753}
754
755
756#endif
Note: See TracBrowser for help on using the repository browser.