source: NonGTP/OpenEXR/include/half.h @ 855

Revision 855, 17.5 KB checked in by igarcia, 18 years ago (diff)
Line 
1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// *       Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// *       Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// *       Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35// Primary authors:
36//     Florian Kainz <kainz@ilm.com>
37//     Rod Bogart <rgb@ilm.com>
38
39//---------------------------------------------------------------------------
40//
41//      half -- a 16-bit floating point number class:
42//
43//      Type half can represent positive and negative numbers, whose
44//      magnitude is between roughly 6.1e-5 and 6.5e+4, with a relative
45//      error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46//      with an absolute error of 6.0e-8.  All integers from -2048 to
47//      +2048 can be represented exactly.
48//
49//      Type half behaves (almost) like the built-in C++ floating point
50//      types.  In arithmetic expressions, half, float and double can be
51//      mixed freely.  Here are a few examples:
52//
53//          half a (3.5);
54//          float b (a + sqrt (a));
55//          a += b;
56//          b += a;
57//          b = a + 7;
58//
59//      Conversions from half to float are lossless; all half numbers
60//      are exactly representable as floats.
61//
62//      Conversions from float to half may not preserve the float's
63//      value exactly.  If a float is not representable as a half, the
64//      float value is rounded to the nearest representable half.  If
65//      a float value is exactly in the middle between the two closest
66//      representable half values, then the float value is rounded to
67//      the half with the greater magnitude.
68//
69//      Overflows during float-to-half conversions cause arithmetic
70//      exceptions.  An overflow occurs when the float value to be
71//      converted is too large to be represented as a half, or if the
72//      float value is an infinity or a NAN.
73//
74//      The implementation of type half makes the following assumptions
75//      about the implementation of the built-in C++ types:
76//
77//          float is an IEEE 754 single-precision number
78//          sizeof (float) == 4
79//          sizeof (unsigned int) == sizeof (float)
80//          alignof (unsigned int) == alignof (float)
81//          sizeof (unsigned short) == 2
82//
83//---------------------------------------------------------------------------
84
85#ifndef _HALF_H_
86#define _HALF_H_
87
88#include <iostream>
89
90class half
91{
92  public:
93
94    //-------------
95    // Constructors
96    //-------------
97
98    half ();                    // no initialization
99    half (float f);
100
101
102    //--------------------
103    // Conversion to float
104    //--------------------
105
106    operator            float () const;
107
108
109    //------------
110    // Unary minus
111    //------------
112
113    half                operator - () const;
114
115
116    //-----------
117    // Assignment
118    //-----------
119
120    half &              operator = (half  h);
121    half &              operator = (float f);
122
123    half &              operator += (half  h);
124    half &              operator += (float f);
125
126    half &              operator -= (half  h);
127    half &              operator -= (float f);
128
129    half &              operator *= (half  h);
130    half &              operator *= (float f);
131
132    half &              operator /= (half  h);
133    half &              operator /= (float f);
134
135
136    //---------------------------------------------------------
137    // Round to n-bit precision (n should be between 0 and 10).
138    // After rounding, the significand's 10-n least significant
139    // bits will be zero.
140    //---------------------------------------------------------
141
142    half                round (unsigned int n) const;
143
144
145    //--------------------------------------------------------------------
146    // Classification:
147    //
148    //  h.isFinite()            returns true if h is a normalized number,
149    //                          a denormalized number or zero
150    //
151    //  h.isNormalized()        returns true if h is a normalized number
152    //
153    //  h.isDenormalized()      returns true if h is a denormalized number
154    //
155    //  h.isZero()              returns true if h is zero
156    //
157    //  h.isNan()               returns true if h is a NAN
158    //
159    //  h.isInfinity()          returns true if h is a positive
160    //                          or a negative infinity
161    //
162    //  h.isNegative()          returns true if the sign bit of h
163    //                          is set (negative)
164    //--------------------------------------------------------------------
165
166    bool                isFinite () const;
167    bool                isNormalized () const;
168    bool                isDenormalized () const;
169    bool                isZero () const;
170    bool                isNan () const;
171    bool                isInfinity () const;
172    bool                isNegative () const;
173
174
175    //--------------------------------------------
176    // Special values
177    //
178    //  posInf()        returns +infinity
179    //
180    //  negInf()        returns +infinity
181    //
182    //  qNan()          returns a NAN with the bit
183    //                  pattern 0111111111111111
184    //
185    //  sNan()          returns a NAN with the bit
186    //                  pattern 0111110111111111
187    //--------------------------------------------
188
189    static half         posInf ();
190    static half         negInf ();
191    static half         qNan ();
192    static half         sNan ();
193
194
195    //--------------------------------------
196    // Access to the internal representation
197    //--------------------------------------
198
199    unsigned short      bits () const;
200    void                setBits (unsigned short bits);
201
202
203  public:
204
205    union uif
206    {
207        unsigned int    i;
208        float           f;
209    };
210
211  private:
212
213    static short        convert (int i);
214    static float        overflow ();
215
216    unsigned short      _h;
217
218    //---------------------------------------------------
219    // Windows dynamic libraries don't like static
220    // member variables.
221    //---------------------------------------------------
222#ifndef OPENEXR_DLL
223    static const uif            _toFloat[1 << 16];
224    static const unsigned short _eLut[1 << 9];
225#endif
226};
227
228#if defined(OPENEXR_DLL)
229    //--------------------------------------
230    // Lookup tables defined for Windows DLL
231    //--------------------------------------
232    #if defined(HALF_EXPORTS)
233        extern __declspec(dllexport) half::uif          _toFloat[1 << 16];
234        extern __declspec(dllexport) unsigned short     _eLut[1 << 9];
235    #else
236        extern __declspec(dllimport) half::uif          _toFloat[1 << 16];
237        extern __declspec(dllimport) unsigned short     _eLut[1 << 9];
238    #endif
239#endif
240
241
242//-----------
243// Stream I/O
244//-----------
245
246std::ostream &          operator << (std::ostream &os, half  h);
247std::istream &          operator >> (std::istream &is, half &h);
248
249
250//----------
251// Debugging
252//----------
253
254void                    printBits   (std::ostream &os, half  h);
255void                    printBits   (std::ostream &os, float f);
256void                    printBits   (char  c[19], half  h);
257void                    printBits   (char  c[35], float f);
258
259
260//-------------------------------------------------------------------------
261// Limits
262//
263// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
264// constants, but at least one other compiler (gcc 2.96) produces incorrect
265// results if they are.
266//-------------------------------------------------------------------------
267
268#if defined(PLATFORM_WINDOWS)
269
270#define HALF_MIN        5.96046448e-08f // Smallest positive half
271
272#define HALF_NRM_MIN    6.10351562e-05f // Smallest positive normalized half
273
274#define HALF_MAX        65504.0f        // Largest positive half
275
276#define HALF_EPSILON    0.00097656f     // Smallest positive e for which
277                                        // half (1.0 + e) != half (1.0)
278#else
279
280#define HALF_MIN        5.96046448e-08  // Smallest positive half
281
282#define HALF_NRM_MIN    6.10351562e-05  // Smallest positive normalized half
283
284#define HALF_MAX        65504.0         // Largest positive half
285
286#define HALF_EPSILON    0.00097656      // Smallest positive e for which
287                                        // half (1.0 + e) != half (1.0)
288#endif
289
290
291#define HALF_MANT_DIG   11              // Number of digits in mantissa
292                                        // (significand + hidden leading 1)
293
294#define HALF_DIG        2               // Number of base 10 digits that
295                                        // can be represented without change
296
297#define HALF_RADIX      2               // Base of the exponent
298
299#define HALF_MIN_EXP    -13             // Minimum negative integer such that
300                                        // HALF_RADIX raised to the power of
301                                        // one less than that integer is a
302                                        // normalized half
303
304#define HALF_MAX_EXP    16              // Maximum positive integer such that
305                                        // HALF_RADIX raised to the power of
306                                        // one less than that integer is a
307                                        // normalized half
308
309#define HALF_MIN_10_EXP -4              // Minimum positive integer such
310                                        // that 10 raised to that power is
311                                        // a normalized half
312
313#define HALF_MAX_10_EXP 4               // Maximum positive integer such
314                                        // that 10 raised to that power is
315                                        // a normalized half
316
317
318//---------------------------------------------------------------------------
319//
320// Implementation --
321//
322// Representation of a float:
323//
324//      We assume that a float, f, is an IEEE 754 single-precision
325//      floating point number, whose bits are arranged as follows:
326//
327//          31 (msb)
328//          |
329//          | 30     23
330//          | |      |
331//          | |      | 22                    0 (lsb)
332//          | |      | |                     |
333//          X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
334//
335//          s e        m
336//
337//      S is the sign-bit, e is the exponent and m is the significand.
338//
339//      If e is between 1 and 254, f is a normalized number:
340//
341//                  s    e-127
342//          f = (-1)  * 2      * 1.m
343//
344//      If e is 0, and m is not zero, f is a denormalized number:
345//
346//                  s    -126
347//          f = (-1)  * 2      * 0.m
348//
349//      If e and m are both zero, f is zero:
350//
351//          f = 0.0
352//
353//      If e is 255, f is an "infinity" or "not a number" (NAN),
354//      depending on whether m is zero or not.
355//
356//      Examples:
357//
358//          0 00000000 00000000000000000000000 = 0.0
359//          0 01111110 00000000000000000000000 = 0.5
360//          0 01111111 00000000000000000000000 = 1.0
361//          0 10000000 00000000000000000000000 = 2.0
362//          0 10000000 10000000000000000000000 = 3.0
363//          1 10000101 11110000010000000000000 = -124.0625
364//          0 11111111 00000000000000000000000 = +infinity
365//          1 11111111 00000000000000000000000 = -infinity
366//          0 11111111 10000000000000000000000 = NAN
367//          1 11111111 11111111111111111111111 = NAN
368//
369// Representation of a half:
370//
371//      Here is the bit-layout for a half number, h:
372//
373//          15 (msb)
374//          |
375//          | 14  10
376//          | |   |
377//          | |   | 9        0 (lsb)
378//          | |   | |        |
379//          X XXXXX XXXXXXXXXX
380//
381//          s e     m
382//
383//      S is the sign-bit, e is the exponent and m is the significand.
384//
385//      If e is between 1 and 30, h is a normalized number:
386//
387//                  s    e-15
388//          h = (-1)  * 2     * 1.m
389//
390//      If e is 0, and m is not zero, h is a denormalized number:
391//
392//                  S    -14
393//          h = (-1)  * 2     * 0.m
394//
395//      If e and m are both zero, h is zero:
396//
397//          h = 0.0
398//
399//      If e is 31, h is an "infinity" or "not a number" (NAN),
400//      depending on whether m is zero or not.
401//
402//      Examples:
403//
404//          0 00000 0000000000 = 0.0
405//          0 01110 0000000000 = 0.5
406//          0 01111 0000000000 = 1.0
407//          0 10000 0000000000 = 2.0
408//          0 10000 1000000000 = 3.0
409//          1 10101 1111000001 = -124.0625
410//          0 11111 0000000000 = +infinity
411//          1 11111 0000000000 = -infinity
412//          0 11111 1000000000 = NAN
413//          1 11111 1111111111 = NAN
414//
415// Conversion:
416//
417//      Converting from a float to a half requires some non-trivial bit
418//      manipulations.  In some cases, this makes conversion relatively
419//      slow, but the most common case is accelerated via table lookups.
420//
421//      Converting back from a half to a float is easier because we don't
422//      have to do any rounding.  In addition, there are only 65536
423//      different half numbers; we can convert each of those numbers once
424//      and store the results in a table.  Later, all conversions can be
425//      done using only simple table lookups.
426//
427//---------------------------------------------------------------------------
428
429
430//--------------------
431// Simple constructors
432//--------------------
433
434inline
435half::half ()
436{
437    // no initialization
438}
439
440
441//----------------------------
442// Half-from-float constructor
443//----------------------------
444
445inline
446half::half (float f)
447{
448    if (f == 0)
449    {
450        //
451        // Common special case - zero.
452        // For speed, we don't preserve the zero's sign.
453        //
454
455        _h = 0;
456    }
457    else
458    {
459        //
460        // We extract the combined sign and exponent, e, from our
461        // floating-point number, f.  Then we convert e to the sign
462        // and exponent of the half number via a table lookup.
463        //
464        // For the most common case, where a normalized half is produced,
465        // the table lookup returns a non-zero value; in this case, all
466        // we have to do, is round f's significand to 10 bits and combine
467        // the result with e.
468        //
469        // For all other cases (overflow, zeroes, denormalized numbers
470        // resulting from underflow, infinities and NANs), the table
471        // lookup returns zero, and we call a longer, non-inline function
472        // to do the float-to-half conversion.
473        //
474
475        uif x;
476
477        x.f = f;
478
479        register int e = (x.i >> 23) & 0x000001ff;
480
481        e = _eLut[e];
482
483        if (e)
484        {
485            //
486            // Simple case - round the significand and
487            // combine it with the sign and exponent.
488            //
489
490            _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13);
491        }
492        else
493        {
494            //
495            // Difficult case - call a function.
496            //
497
498            _h = convert (x.i);
499        }
500    }
501}
502
503
504//------------------------------------------
505// Half-to-float conversion via table lookup
506//------------------------------------------
507
508inline
509half::operator float () const
510{
511    return _toFloat[_h].f;
512}
513
514
515//-------------------------
516// Round to n-bit precision
517//-------------------------
518
519inline half
520half::round (unsigned int n) const
521{
522    //
523    // Parameter check.
524    //
525
526    if (n >= 10)
527        return *this;
528
529    //
530    // Disassemble h into the sign, s,
531    // and the combined exponent and significand, e.
532    //
533
534    unsigned short s = _h & 0x8000;
535    unsigned short e = _h & 0x7fff;
536
537    //
538    // Round the exponent and significand to the nearest value
539    // where ones occur only in the (10-n) most significant bits.
540    // Note that the exponent adjusts automatically if rounding
541    // up causes the significand to overflow.
542    //
543
544    e >>= 9 - n;
545    e  += e & 1;
546    e <<= 9 - n;
547
548    //
549    // Check for exponent overflow.
550    //
551
552    if (e >= 0x7c00)
553    {
554        //
555        // Overflow occurred -- truncate instead of rounding.
556        //
557
558        e = _h;
559        e >>= 10 - n;
560        e <<= 10 - n;
561    }
562
563    //
564    // Put the original sign bit back.
565    //
566
567    half h;
568    h._h = s | e;
569
570    return h;
571}
572
573
574//-----------------------
575// Other inline functions
576//-----------------------
577
578inline half     
579half::operator - () const
580{
581    half h;
582    h._h = _h ^ 0x8000;
583    return h;
584}
585
586
587inline half &
588half::operator = (half h)
589{
590    _h = h._h;
591    return *this;
592}
593
594
595inline half &
596half::operator = (float f)
597{
598    *this = half (f);
599    return *this;
600}
601
602
603inline half &
604half::operator += (half h)
605{
606    *this = half (float (*this) + float (h));
607    return *this;
608}
609
610
611inline half &
612half::operator += (float f)
613{
614    *this = half (float (*this) + f);
615    return *this;
616}
617
618
619inline half &
620half::operator -= (half h)
621{
622    *this = half (float (*this) - float (h));
623    return *this;
624}
625
626
627inline half &
628half::operator -= (float f)
629{
630    *this = half (float (*this) - f);
631    return *this;
632}
633
634
635inline half &
636half::operator *= (half h)
637{
638    *this = half (float (*this) * float (h));
639    return *this;
640}
641
642
643inline half &
644half::operator *= (float f)
645{
646    *this = half (float (*this) * f);
647    return *this;
648}
649
650
651inline half &
652half::operator /= (half h)
653{
654    *this = half (float (*this) / float (h));
655    return *this;
656}
657
658
659inline half &
660half::operator /= (float f)
661{
662    *this = half (float (*this) / f);
663    return *this;
664}
665
666
667inline bool     
668half::isFinite () const
669{
670    unsigned short e = (_h >> 10) & 0x001f;
671    return e < 31;
672}
673
674
675inline bool
676half::isNormalized () const
677{
678    unsigned short e = (_h >> 10) & 0x001f;
679    return e > 0 && e < 31;
680}
681
682
683inline bool
684half::isDenormalized () const
685{
686    unsigned short e = (_h >> 10) & 0x001f;
687    unsigned short m =  _h & 0x3ff;
688    return e == 0 && m != 0;
689}
690
691
692inline bool
693half::isZero () const
694{
695    return (_h & 0x7fff) == 0;
696}
697
698
699inline bool
700half::isNan () const
701{
702    unsigned short e = (_h >> 10) & 0x001f;
703    unsigned short m =  _h & 0x3ff;
704    return e == 31 && m != 0;
705}
706
707
708inline bool
709half::isInfinity () const
710{
711    unsigned short e = (_h >> 10) & 0x001f;
712    unsigned short m =  _h & 0x3ff;
713    return e == 31 && m == 0;
714}
715
716
717inline bool     
718half::isNegative () const
719{
720    return (_h & 0x8000) != 0;
721}
722
723
724inline half
725half::posInf ()
726{
727    half h;
728    h._h = 0x7c00;
729    return h;
730}
731
732
733inline half
734half::negInf ()
735{
736    half h;
737    h._h = 0xfc00;
738    return h;
739}
740
741
742inline half
743half::qNan ()
744{
745    half h;
746    h._h = 0x7fff;
747    return h;
748}
749
750
751inline half
752half::sNan ()
753{
754    half h;
755    h._h = 0x7dff;
756    return h;
757}
758
759
760inline unsigned short
761half::bits () const
762{
763    return _h;
764}
765
766
767inline void
768half::setBits (unsigned short bits)
769{
770    _h = bits;
771}
772
773#undef HALF_EXPORT_CONST
774
775#endif
Note: See TracBrowser for help on using the repository browser.