source: NonGTP/OpenEXR/include/half/half.h @ 855

Revision 855, 17.0 KB checked in by igarcia, 18 years ago (diff)
Line 
1///////////////////////////////////////////////////////////////////////////
2//
3// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4// Digital Ltd. LLC
5//
6// All rights reserved.
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11// *       Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// *       Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following disclaimer
15// in the documentation and/or other materials provided with the
16// distribution.
17// *       Neither the name of Industrial Light & Magic nor the names of
18// its contributors may be used to endorse or promote products derived
19// from this software without specific prior written permission.
20//
21// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32//
33///////////////////////////////////////////////////////////////////////////
34
35// Primary authors:
36//     Florian Kainz <kainz@ilm.com>
37//     Rod Bogart <rgb@ilm.com>
38
39//---------------------------------------------------------------------------
40//
41//      half -- a 16-bit floating point number class:
42//
43//      Type half can represent positive and negative numbers, whose
44//      magnitude is between roughly 6.1e-5 and 6.5e+4, with a relative
45//      error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46//      with an absolute error of 6.0e-8.  All integers from -2048 to
47//      +2048 can be represented exactly.
48//
49//      Type half behaves (almost) like the built-in C++ floating point
50//      types.  In arithmetic expressions, half, float and double can be
51//      mixed freely.  Here are a few examples:
52//
53//          half a (3.5);
54//          float b (a + sqrt (a));
55//          a += b;
56//          b += a;
57//          b = a + 7;
58//
59//      Conversions from half to float are lossless; all half numbers
60//      are exactly representable as floats.
61//
62//      Conversions from float to half may not preserve the float's
63//      value exactly.  If a float is not representable as a half, the
64//      float value is rounded to the nearest representable half.  If
65//      a float value is exactly in the middle between the two closest
66//      representable half values, then the float value is rounded to
67//      the half with the greater magnitude.
68//
69//      Overflows during float-to-half conversions cause arithmetic
70//      exceptions.  An overflow occurs when the float value to be
71//      converted is too large to be represented as a half, or if the
72//      float value is an infinity or a NAN.
73//
74//      The implementation of type half makes the following assumptions
75//      about the implementation of the built-in C++ types:
76//
77//          float is an IEEE 754 single-precision number
78//          sizeof (float) == 4
79//          sizeof (unsigned int) == sizeof (float)
80//          alignof (unsigned int) == alignof (float)
81//          sizeof (unsigned short) == 2
82//
83//---------------------------------------------------------------------------
84
85#ifndef _HALF_H_
86#define _HALF_H_
87
88#include <iostream>
89
90class half
91{
92  public:
93
94    //-------------
95    // Constructors
96    //-------------
97
98    half ();                    // no initialization
99    half (const half &h);
100    half (float f);
101
102
103    //--------------------
104    // Conversion to float
105    //--------------------
106
107    operator            float () const;
108
109
110    //------------
111    // Unary minus
112    //------------
113
114    half                operator - () const;
115
116
117    //-----------
118    // Assignment
119    //-----------
120
121    half                operator = (half  h);
122    half                operator = (float f);
123
124    half                operator += (half  h);
125    half                operator += (float f);
126
127    half                operator -= (half  h);
128    half                operator -= (float f);
129
130    half                operator *= (half  h);
131    half                operator *= (float f);
132
133    half                operator /= (half  h);
134    half                operator /= (float f);
135
136
137    //---------------------------------------------------------
138    // Round to n-bit precision (n should be between 0 and 10).
139    // After rounding, the significand's 10-n least significant
140    // bits will be zero.
141    //---------------------------------------------------------
142
143    half                round (unsigned int n) const;
144
145
146    //--------------------------------------------------------------------
147    // Classification:
148    //
149    //  h.isFinite()            returns true if h is a normalized number,
150    //                          a denormalized number or zero
151    //
152    //  h.isNormalized()        returns true if h is a normalized number
153    //
154    //  h.isDenormalized()      returns true if h is a denormalized number
155    //
156    //  h.isZero()              returns true if h is zero
157    //
158    //  h.isNan()               returns true if h is a NAN
159    //
160    //  h.isInfinity()          returns true if h is a positive
161    //                          or a negative infinity
162    //
163    //  h.isNegative()          returns true if the sign bit of h
164    //                          is set (negative)
165    //--------------------------------------------------------------------
166
167    bool                isFinite () const;
168    bool                isNormalized () const;
169    bool                isDenormalized () const;
170    bool                isZero () const;
171    bool                isNan () const;
172    bool                isInfinity () const;
173    bool                isNegative () const;
174
175
176    //--------------------------------------------
177    // Special values
178    //
179    //  posInf()        returns +infinity
180    //
181    //  negInf()        returns +infinity
182    //
183    //  qNan()          returns a NAN with the bit
184    //                  pattern 0111111111111111
185    //
186    //  sNan()          returns a NAN with the bit
187    //                  pattern 0111110111111111
188    //--------------------------------------------
189
190    static half         posInf ();
191    static half         negInf ();
192    static half         qNan ();
193    static half         sNan ();
194
195
196    //--------------------------------------
197    // Access to the internal representation
198    //--------------------------------------
199
200    unsigned short      bits () const;
201    void                setBits (unsigned short bits);
202
203
204  public:
205
206    union uif
207    {
208        unsigned int    i;
209        float           f;
210    };
211
212  private:
213
214    static short        convert (int i);
215    static float        overflow ();
216    static bool         selftest ();
217
218    unsigned short      _h;
219
220    static const uif            _toFloat[1 << 16];
221    static const unsigned short _eLut[1 << 9];
222    static const bool           _itWorks;
223};
224
225
226//-----------
227// Stream I/O
228//-----------
229
230std::ostream &          operator << (std::ostream &os, half  h);
231std::istream &          operator >> (std::istream &is, half &h);
232
233
234//----------
235// Debugging
236//----------
237
238void                    printBits   (std::ostream &os, half  h);
239void                    printBits   (std::ostream &os, float f);
240void                    printBits   (char  c[19], half  h);
241void                    printBits   (char  c[35], float f);
242
243
244//-------
245// Limits
246//-------
247
248#define HALF_MIN        5.96046448e-08  // Smallest positive half
249
250#define HALF_NRM_MIN    6.10351562e-05  // Smallest positive normalized half
251
252#define HALF_MAX        65504.0         // Largest positive half
253
254#define HALF_EPSILON    0.00097656      // Smallest positive e for which
255                                        // half (1.0 + e) != half (1.0)
256
257#define HALF_MANT_DIG   11              // Number of digits in mantissa
258                                        // (significand + hidden leading 1)
259
260#define HALF_DIG        2               // Number of base 10 digits that
261                                        // can be represented without change
262
263#define HALF_RADIX      2               // Base of the exponent
264
265#define HALF_MIN_EXP    -13             // Minimum negative integer such that
266                                        // HALF_RADIX raised to the power of
267                                        // one less than that integer is a
268                                        // normalized half
269
270#define HALF_MAX_EXP    16              // Maximum positive integer such that
271                                        // HALF_RADIX raised to the power of
272                                        // one less than that integer is a
273                                        // normalized half
274
275#define HALF_MIN_10_EXP -4              // Minimum positive integer such
276                                        // that 10 raised to that power is
277                                        // a normalized half
278
279#define HALF_MAX_10_EXP 4               // Maximum positive integer such
280                                        // that 10 raised to that power is
281                                        // a normalized half
282
283
284//---------------------------------------------------------------------------
285//
286// Implementation --
287//
288// Representation of a float:
289//
290//      We assume that a float, f, is an IEEE 754 single-precision
291//      floating point number, whose bits are arranged as follows:
292//
293//          31 (msb)
294//          |
295//          | 30     23
296//          | |      |
297//          | |      | 22                    0 (lsb)
298//          | |      | |                     |
299//          X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
300//
301//          s e        m
302//
303//      S is the sign-bit, e is the exponent and m is the significand.
304//
305//      If e is between 1 and 254, f is a normalized number:
306//
307//                  s    e-127
308//          f = (-1)  * 2      * 1.m
309//
310//      If e is 0, and m is not zero, f is a denormalized number:
311//
312//                  s    -126
313//          f = (-1)  * 2      * 0.m
314//
315//      If e and m are both zero, f is zero:
316//
317//          f = 0.0
318//
319//      If e is 255, f is an "infinity" or "not a number" (NAN),
320//      depending on whether m is zero or not.
321//
322//      Examples:
323//
324//          0 00000000 00000000000000000000000 = 0.0
325//          0 01111110 00000000000000000000000 = 0.5
326//          0 01111111 00000000000000000000000 = 1.0
327//          0 10000000 00000000000000000000000 = 2.0
328//          0 10000000 10000000000000000000000 = 3.0
329//          1 10000101 11110000010000000000000 = -124.0625
330//          0 11111111 00000000000000000000000 = +infinity
331//          1 11111111 00000000000000000000000 = -infinity
332//          0 11111111 10000000000000000000000 = NAN
333//          1 11111111 11111111111111111111111 = NAN
334//
335// Representation of a half:
336//
337//      Here is the bit-layout for a half number, h:
338//
339//          15 (msb)
340//          |
341//          | 14  10
342//          | |   |
343//          | |   | 9        0 (lsb)
344//          | |   | |        |
345//          X XXXXX XXXXXXXXXX
346//
347//          s e     m
348//
349//      S is the sign-bit, e is the exponent and m is the significand.
350//
351//      If e is between 1 and 30, h is a normalized number:
352//
353//                  s    e-15
354//          h = (-1)  * 2     * 1.m
355//
356//      If e is 0, and m is not zero, h is a denormalized number:
357//
358//                  S    -14
359//          h = (-1)  * 2     * 0.m
360//
361//      If e and m are both zero, h is zero:
362//
363//          h = 0.0
364//
365//      If e is 31, h is an "infinity" or "not a number" (NAN),
366//      depending on whether m is zero or not.
367//
368//      Examples:
369//
370//          0 00000 0000000000 = 0.0
371//          0 01110 0000000000 = 0.5
372//          0 01111 0000000000 = 1.0
373//          0 10000 0000000000 = 2.0
374//          0 10000 1000000000 = 3.0
375//          1 10101 1111000001 = -124.0625
376//          0 11111 0000000000 = +infinity
377//          1 11111 0000000000 = -infinity
378//          0 11111 1000000000 = NAN
379//          1 11111 1111111111 = NAN
380//
381// Conversion:
382//
383//      Converting from a float to a half requires some non-trivial bit
384//      manipulations.  In some cases, this makes conversion relatively
385//      slow, but the most common case is accelerated via table lookups.
386//
387//      Converting back from a half to a float is easier because we don't
388//      have to do any rounding.  In addition, there are only 65536
389//      different half numbers; we can convert each of those numbers once
390//      and store the results in a table.  Later, all conversions can be
391//      done using only simple table lookups.
392//
393//---------------------------------------------------------------------------
394
395
396//--------------------
397// Simple constructors
398//--------------------
399
400inline
401half::half ()
402{
403    // no initialization
404}
405
406
407inline
408half::half (const half &h)
409{
410    _h = h._h;
411}
412
413
414//----------------------------
415// Half-from-float constructor
416//----------------------------
417
418inline
419half::half (float f)
420{
421    if (f == 0)
422    {
423        //
424        // Common special case - zero.
425        // For speed, we don't preserve the zero's sign.
426        //
427
428        _h = 0;
429    }
430    else
431    {
432        //
433        // We extract the combined sign and exponent, e, from our
434        // floating-point number, f.  Then we convert e to the sign
435        // and exponent of the half number via a table lookup.
436        //
437        // For the most common case, where a normalized half is produced,
438        // the table lookup returns a non-zero value; in this case, all
439        // we have to do, is round f's significand to 10 bits and combine
440        // the result with e.
441        //
442        // For all other cases (overflow, zeroes, denormalized numbers
443        // resulting from underflow, infinities and NANs), the table
444        // lookup returns zero, and we call a longer, non-inline function
445        // to do the float-to-half conversion.
446        //
447
448        uif x;
449
450        x.f = f;
451
452        register int e = (x.i >> 23) & 0x000001ff;
453
454        e = _eLut[e];
455
456        if (e)
457        {
458            //
459            // Simple case - round the significand and
460            // combine it with the sign and exponent.
461            //
462
463            _h = e + (((x.i & 0x007fffff) + 0x00001000) >> 13);
464        }
465        else
466        {
467            //
468            // Difficult case - call a function.
469            //
470
471            _h = convert (x.i);
472        }
473    }
474}
475
476
477//------------------------------------------
478// Half-to-float conversion via table lookup
479//------------------------------------------
480
481inline
482half::operator float () const
483{
484    return _toFloat[_h].f;
485}
486
487
488//-------------------------
489// Round to n-bit precision
490//-------------------------
491
492inline half
493half::round (unsigned int n) const
494{
495    //
496    // Parameter check.
497    //
498
499    if (n >= 10)
500        return *this;
501
502    //
503    // Disassemble h into the sign, s,
504    // and the combined exponent and significand, e.
505    //
506
507    unsigned short s = _h & 0x8000;
508    unsigned short e = _h & 0x7fff;
509
510    //
511    // Round the exponent and significand to the nearest value
512    // where ones occur only in the (10-n) most significant bits.
513    // Note that the exponent adjusts automatically if rounding
514    // up causes the significand to overflow.
515    //
516
517    e >>= 9 - n;
518    e  += e & 1;
519    e <<= 9 - n;
520
521    //
522    // Check for exponent overflow.
523    //
524
525    if (e >= 0x7c00)
526    {
527        //
528        // Overflow occurred -- truncate instead of rounding.
529        //
530
531        e = _h;
532        e >>= 10 - n;
533        e <<= 10 - n;
534    }
535
536    //
537    // Put the original sign bit back.
538    //
539
540    half h;
541    h._h = s | e;
542
543    return h;
544}
545
546
547//-----------------------
548// Other inline functions
549//-----------------------
550
551inline half     
552half::operator - () const
553{
554    half h;
555    h._h = _h ^ 0x8000;
556    return h;
557}
558
559
560inline half     
561half::operator = (half h)
562{
563    _h = h._h;
564    return *this;
565}
566
567
568inline half     
569half::operator = (float f)
570{
571    *this = half (f);
572    return *this;
573}
574
575
576inline half     
577half::operator += (half h)
578{
579    *this = half (float (*this) + float (h));
580    return *this;
581}
582
583
584inline half     
585half::operator += (float f)
586{
587    *this = half (float (*this) + f);
588    return *this;
589}
590
591
592inline half     
593half::operator -= (half h)
594{
595    *this = half (float (*this) - float (h));
596    return *this;
597}
598
599
600inline half     
601half::operator -= (float f)
602{
603    *this = half (float (*this) - f);
604    return *this;
605}
606
607
608inline half     
609half::operator *= (half h)
610{
611    *this = half (float (*this) * float (h));
612    return *this;
613}
614
615
616inline half     
617half::operator *= (float f)
618{
619    *this = half (float (*this) * f);
620    return *this;
621}
622
623
624inline half     
625half::operator /= (half h)
626{
627    *this = half (float (*this) / float (h));
628    return *this;
629}
630
631
632inline half     
633half::operator /= (float f)
634{
635    *this = half (float (*this) / f);
636    return *this;
637}
638
639
640inline bool     
641half::isFinite () const
642{
643    unsigned short e = (_h >> 10) & 0x001f;
644    return e < 31;
645}
646
647
648inline bool
649half::isNormalized () const
650{
651    unsigned short e = (_h >> 10) & 0x001f;
652    return e > 0 && e < 31;
653}
654
655
656inline bool
657half::isDenormalized () const
658{
659    unsigned short e = (_h >> 10) & 0x001f;
660    unsigned short m =  _h & 0x3ff;
661    return e == 0 && m != 0;
662}
663
664
665inline bool
666half::isZero () const
667{
668    return (_h & 0x7fff) == 0;
669}
670
671
672inline bool
673half::isNan () const
674{
675    unsigned short e = (_h >> 10) & 0x001f;
676    unsigned short m =  _h & 0x3ff;
677    return e == 31 && m != 0;
678}
679
680
681inline bool
682half::isInfinity () const
683{
684    unsigned short e = (_h >> 10) & 0x001f;
685    unsigned short m =  _h & 0x3ff;
686    return e == 31 && m == 0;
687}
688
689
690inline bool     
691half::isNegative () const
692{
693    return (_h & 0x8000) != 0;
694}
695
696
697inline half
698half::posInf ()
699{
700    half h;
701    h._h = 0x7c00;
702    return h;
703}
704
705
706inline half
707half::negInf ()
708{
709    half h;
710    h._h = 0xfc00;
711    return h;
712}
713
714
715inline half
716half::qNan ()
717{
718    half h;
719    h._h = 0x7fff;
720    return h;
721}
722
723
724inline half
725half::sNan ()
726{
727    half h;
728    h._h = 0x7dff;
729    return h;
730}
731
732
733inline unsigned short
734half::bits () const
735{
736    return _h;
737}
738
739
740inline void
741half::setBits (unsigned short bits)
742{
743    _h = bits;
744}
745
746
747#endif
Note: See TracBrowser for help on using the repository browser.