[657] | 1 | /*
|
---|
| 2 | -----------------------------------------------------------------------------
|
---|
| 3 | This source file is part of OGRE
|
---|
| 4 | (Object-oriented Graphics Rendering Engine)
|
---|
| 5 | For the latest info, see http://www.ogre3d.org/
|
---|
| 6 |
|
---|
| 7 | Copyright (c) 2000-2005 The OGRE Team
|
---|
| 8 | Also see acknowledgements in Readme.html
|
---|
| 9 |
|
---|
| 10 | This program is free software; you can redistribute it and/or modify it under
|
---|
| 11 | the terms of the GNU Lesser General Public License as published by the Free Software
|
---|
| 12 | Foundation; either version 2 of the License, or (at your option) any later
|
---|
| 13 | version.
|
---|
| 14 |
|
---|
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT
|
---|
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
---|
| 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
|
---|
| 18 |
|
---|
| 19 | You should have received a copy of the GNU Lesser General Public License along with
|
---|
| 20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
---|
| 21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to
|
---|
| 22 | http://www.gnu.org/copyleft/lesser.txt.
|
---|
| 23 | -----------------------------------------------------------------------------
|
---|
| 24 | */
|
---|
| 25 | #ifndef OGREIMAGERESAMPLER_H
|
---|
| 26 | #define OGREIMAGERESAMPLER_H
|
---|
| 27 |
|
---|
| 28 | #include <algorithm>
|
---|
| 29 |
|
---|
| 30 | // this file is inlined into OgreImage.cpp!
|
---|
| 31 | // do not include anywhere else.
|
---|
| 32 | namespace Ogre {
|
---|
| 33 |
|
---|
| 34 | // define uint64 type
|
---|
| 35 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC
|
---|
| 36 | typedef unsigned __int64 uint64;
|
---|
| 37 | #else
|
---|
| 38 | typedef unsigned long long uint64;
|
---|
| 39 | #endif
|
---|
| 40 |
|
---|
| 41 | // variable name hints:
|
---|
| 42 | // sx_48 = 16/48-bit fixed-point x-position in source
|
---|
| 43 | // stepx = difference between adjacent sx_48 values
|
---|
| 44 | // sx1 = lower-bound integer x-position in source
|
---|
| 45 | // sx2 = upper-bound integer x-position in source
|
---|
| 46 | // sxf = fractional weight beween sx1 and sx2
|
---|
| 47 | // x,y,z = location of output pixel in destination
|
---|
| 48 |
|
---|
| 49 | // nearest-neighbor resampler, does not convert formats.
|
---|
| 50 | // templated on bytes-per-pixel to allow compiler optimizations, such
|
---|
| 51 | // as simplifying memcpy() and replacing multiplies with bitshifts
|
---|
| 52 | template<unsigned int elemsize> struct NearestResampler {
|
---|
| 53 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
| 54 | // assert(src.format == dst.format);
|
---|
| 55 |
|
---|
| 56 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
| 57 | uchar* srcdata = (uchar*)src.data;
|
---|
| 58 | uchar* pdst = (uchar*)dst.data;
|
---|
| 59 |
|
---|
| 60 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
| 61 | // using 16/48-bit fixed precision, incremented by steps
|
---|
| 62 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
| 63 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
| 64 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
| 65 |
|
---|
| 66 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
| 67 | // for the center of the destination pixel, not the top-left corner
|
---|
| 68 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
| 69 | for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) {
|
---|
| 70 | size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch;
|
---|
| 71 |
|
---|
| 72 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
| 73 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) {
|
---|
| 74 | size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch;
|
---|
| 75 |
|
---|
| 76 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
| 77 | for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) {
|
---|
| 78 | uchar* psrc = srcdata +
|
---|
| 79 | elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff);
|
---|
| 80 | memcpy(pdst, psrc, elemsize);
|
---|
| 81 | pdst += elemsize;
|
---|
| 82 | }
|
---|
| 83 | pdst += elemsize*dst.getRowSkip();
|
---|
| 84 | }
|
---|
| 85 | pdst += elemsize*dst.getSliceSkip();
|
---|
| 86 | }
|
---|
| 87 | }
|
---|
| 88 | };
|
---|
| 89 |
|
---|
| 90 |
|
---|
| 91 | // default floating-point linear resampler, does format conversion
|
---|
| 92 | struct LinearResampler {
|
---|
| 93 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
| 94 | size_t srcelemsize = PixelUtil::getNumElemBytes(src.format);
|
---|
| 95 | size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format);
|
---|
| 96 |
|
---|
| 97 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
| 98 | uchar* srcdata = (uchar*)src.data;
|
---|
| 99 | uchar* pdst = (uchar*)dst.data;
|
---|
| 100 |
|
---|
| 101 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
| 102 | // using 16/48-bit fixed precision, incremented by steps
|
---|
| 103 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
| 104 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
| 105 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
| 106 |
|
---|
| 107 | // temp is 16/16 bit fixed precision, used to adjust a source
|
---|
| 108 | // coordinate (x, y, or z) backwards by half a pixel so that the
|
---|
| 109 | // integer bits represent the first sample (eg, sx1) and the
|
---|
| 110 | // fractional bits are the blend weight of the second sample
|
---|
| 111 | unsigned int temp;
|
---|
| 112 |
|
---|
| 113 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
| 114 | // for the center of the destination pixel, not the top-left corner
|
---|
| 115 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
| 116 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
|
---|
| 117 | temp = sz_48 >> 32;
|
---|
| 118 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 119 | size_t sz1 = temp >> 16; // src z, sample #1
|
---|
| 120 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
|
---|
| 121 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
|
---|
| 122 |
|
---|
| 123 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
| 124 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
| 125 | temp = sy_48 >> 32;
|
---|
| 126 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 127 | size_t sy1 = temp >> 16; // src y #1
|
---|
| 128 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
|
---|
| 129 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
| 130 |
|
---|
| 131 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
| 132 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
| 133 | temp = sx_48 >> 32;
|
---|
| 134 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 135 | size_t sx1 = temp >> 16; // src x #1
|
---|
| 136 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
|
---|
| 137 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
| 138 |
|
---|
| 139 | ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1;
|
---|
| 140 | ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2;
|
---|
| 141 |
|
---|
| 142 | #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \
|
---|
| 143 | srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch))
|
---|
| 144 |
|
---|
| 145 | UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1);
|
---|
| 146 | UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1);
|
---|
| 147 | UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2);
|
---|
| 148 | UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2);
|
---|
| 149 | #undef UNPACK
|
---|
| 150 |
|
---|
| 151 | ColourValue accum =
|
---|
| 152 | x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) +
|
---|
| 153 | x2y1z1 * ( sxf *(1.0f - syf)*(1.0f - szf)) +
|
---|
| 154 | x1y2z1 * ((1.0f - sxf)* syf *(1.0f - szf)) +
|
---|
| 155 | x2y2z1 * ( sxf * syf *(1.0f - szf)) +
|
---|
| 156 | x1y1z2 * ((1.0f - sxf)*(1.0f - syf)* szf ) +
|
---|
| 157 | x2y1z2 * ( sxf *(1.0f - syf)* szf ) +
|
---|
| 158 | x1y2z2 * ((1.0f - sxf)* syf * szf ) +
|
---|
| 159 | x2y2z2 * ( sxf * syf * szf );
|
---|
| 160 |
|
---|
| 161 | PixelUtil::packColour(accum, dst.format, pdst);
|
---|
| 162 |
|
---|
| 163 | pdst += dstelemsize;
|
---|
| 164 | }
|
---|
| 165 | pdst += dstelemsize*dst.getRowSkip();
|
---|
| 166 | }
|
---|
| 167 | pdst += dstelemsize*dst.getSliceSkip();
|
---|
| 168 | }
|
---|
| 169 | }
|
---|
| 170 | };
|
---|
| 171 |
|
---|
| 172 |
|
---|
| 173 | // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only.
|
---|
| 174 | // avoids overhead of pixel unpack/repack function calls
|
---|
| 175 | struct LinearResampler_Float32 {
|
---|
| 176 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
| 177 | size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float);
|
---|
| 178 | size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float);
|
---|
| 179 | // assert(srcchannels == 3 || srcchannels == 4);
|
---|
| 180 | // assert(dstchannels == 3 || dstchannels == 4);
|
---|
| 181 |
|
---|
| 182 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
| 183 | float* srcdata = (float*)src.data;
|
---|
| 184 | float* pdst = (float*)dst.data;
|
---|
| 185 |
|
---|
| 186 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
| 187 | // using 16/48-bit fixed precision, incremented by steps
|
---|
| 188 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
| 189 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
| 190 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
| 191 |
|
---|
| 192 | // temp is 16/16 bit fixed precision, used to adjust a source
|
---|
| 193 | // coordinate (x, y, or z) backwards by half a pixel so that the
|
---|
| 194 | // integer bits represent the first sample (eg, sx1) and the
|
---|
| 195 | // fractional bits are the blend weight of the second sample
|
---|
| 196 | unsigned int temp;
|
---|
| 197 |
|
---|
| 198 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
| 199 | // for the center of the destination pixel, not the top-left corner
|
---|
| 200 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
| 201 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
|
---|
| 202 | temp = sz_48 >> 32;
|
---|
| 203 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 204 | size_t sz1 = temp >> 16; // src z, sample #1
|
---|
| 205 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
|
---|
| 206 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
|
---|
| 207 |
|
---|
| 208 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
| 209 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
| 210 | temp = sy_48 >> 32;
|
---|
| 211 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 212 | size_t sy1 = temp >> 16; // src y #1
|
---|
| 213 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
|
---|
| 214 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
| 215 |
|
---|
| 216 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
| 217 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
| 218 | temp = sx_48 >> 32;
|
---|
| 219 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
| 220 | size_t sx1 = temp >> 16; // src x #1
|
---|
| 221 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
|
---|
| 222 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
| 223 |
|
---|
| 224 | // process R,G,B,A simultaneously for cache coherence?
|
---|
| 225 | float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
---|
| 226 |
|
---|
| 227 | #define ACCUM3(x,y,z,factor) \
|
---|
| 228 | { float f = factor; \
|
---|
| 229 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
|
---|
| 230 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
|
---|
| 231 | accum[2]+=srcdata[off+2]*f; }
|
---|
| 232 |
|
---|
| 233 | #define ACCUM4(x,y,z,factor) \
|
---|
| 234 | { float f = factor; \
|
---|
| 235 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
|
---|
| 236 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
|
---|
| 237 | accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; }
|
---|
| 238 |
|
---|
| 239 | if (srcchannels == 3 || dstchannels == 3) {
|
---|
| 240 | // RGB, no alpha
|
---|
| 241 | ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
|
---|
| 242 | ACCUM3(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf));
|
---|
| 243 | ACCUM3(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf));
|
---|
| 244 | ACCUM3(sx2,sy2,sz1, sxf * syf *(1.0f-szf));
|
---|
| 245 | ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf );
|
---|
| 246 | ACCUM3(sx2,sy1,sz2, sxf *(1.0f-syf)* szf );
|
---|
| 247 | ACCUM3(sx1,sy2,sz2,(1.0f-sxf)* syf * szf );
|
---|
| 248 | ACCUM3(sx2,sy2,sz2, sxf * syf * szf );
|
---|
| 249 | accum[3] = 1.0f;
|
---|
| 250 | } else {
|
---|
| 251 | // RGBA
|
---|
| 252 | ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
|
---|
| 253 | ACCUM4(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf));
|
---|
| 254 | ACCUM4(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf));
|
---|
| 255 | ACCUM4(sx2,sy2,sz1, sxf * syf *(1.0f-szf));
|
---|
| 256 | ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf );
|
---|
| 257 | ACCUM4(sx2,sy1,sz2, sxf *(1.0f-syf)* szf );
|
---|
| 258 | ACCUM4(sx1,sy2,sz2,(1.0f-sxf)* syf * szf );
|
---|
| 259 | ACCUM4(sx2,sy2,sz2, sxf * syf * szf );
|
---|
| 260 | }
|
---|
| 261 |
|
---|
| 262 | memcpy(pdst, accum, sizeof(float)*dstchannels);
|
---|
| 263 |
|
---|
| 264 | #undef ACCUM3
|
---|
| 265 | #undef ACCUM4
|
---|
| 266 |
|
---|
| 267 | pdst += dstchannels;
|
---|
| 268 | }
|
---|
| 269 | pdst += dstchannels*dst.getRowSkip();
|
---|
| 270 | }
|
---|
| 271 | pdst += dstchannels*dst.getSliceSkip();
|
---|
| 272 | }
|
---|
| 273 | }
|
---|
| 274 | };
|
---|
| 275 |
|
---|
| 276 |
|
---|
| 277 |
|
---|
| 278 | // byte linear resampler, does not do any format conversions.
|
---|
| 279 | // only handles pixel formats that use 1 byte per color channel.
|
---|
| 280 | // 2D only; punts 3D pixelboxes to default LinearResampler (slow).
|
---|
| 281 | // templated on bytes-per-pixel to allow compiler optimizations, such
|
---|
| 282 | // as unrolling loops and replacing multiplies with bitshifts
|
---|
| 283 | template<unsigned int channels> struct LinearResampler_Byte {
|
---|
| 284 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
| 285 | // assert(src.format == dst.format);
|
---|
| 286 |
|
---|
| 287 | // only optimized for 2D
|
---|
| 288 | if (src.getDepth() > 1 || dst.getDepth() > 1) {
|
---|
| 289 | LinearResampler::scale(src, dst);
|
---|
| 290 | return;
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | // srcdata stays at beginning of slice, pdst is a moving pointer
|
---|
| 294 | uchar* srcdata = (uchar*)src.data;
|
---|
| 295 | uchar* pdst = (uchar*)dst.data;
|
---|
| 296 |
|
---|
| 297 | // sx_48,sy_48 represent current position in source
|
---|
| 298 | // using 16/48-bit fixed precision, incremented by steps
|
---|
| 299 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
| 300 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
| 301 |
|
---|
| 302 | // bottom 28 bits of temp are 16/12 bit fixed precision, used to
|
---|
| 303 | // adjust a source coordinate backwards by half a pixel so that the
|
---|
| 304 | // integer bits represent the first sample (eg, sx1) and the
|
---|
| 305 | // fractional bits are the blend weight of the second sample
|
---|
| 306 | unsigned int temp;
|
---|
| 307 |
|
---|
| 308 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
| 309 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
| 310 | temp = sy_48 >> 36;
|
---|
| 311 | temp = (temp > 0x800)? temp - 0x800: 0;
|
---|
| 312 | unsigned int syf = temp & 0xFFF;
|
---|
| 313 | size_t sy1 = temp >> 12;
|
---|
| 314 | size_t sy2 = std::min(sy1+1, src.bottom-src.top-1);
|
---|
| 315 | size_t syoff1 = sy1 * src.rowPitch;
|
---|
| 316 | size_t syoff2 = sy2 * src.rowPitch;
|
---|
| 317 |
|
---|
| 318 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
| 319 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
| 320 | temp = sx_48 >> 36;
|
---|
| 321 | temp = (temp > 0x800)? temp - 0x800 : 0;
|
---|
| 322 | unsigned int sxf = temp & 0xFFF;
|
---|
| 323 | size_t sx1 = temp >> 12;
|
---|
| 324 | size_t sx2 = std::min(sx1+1, src.right-src.left-1);
|
---|
| 325 |
|
---|
| 326 | unsigned int sxfsyf = sxf*syf;
|
---|
| 327 | for (unsigned int k = 0; k < channels; k++) {
|
---|
| 328 | unsigned int accum =
|
---|
| 329 | srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) +
|
---|
| 330 | srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) +
|
---|
| 331 | srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) +
|
---|
| 332 | srcdata[(sx2 + syoff2)*channels+k]*sxfsyf;
|
---|
| 333 | // accum is computed using 8/24-bit fixed-point math
|
---|
| 334 | // (maximum is 0xFF000000; rounding will not cause overflow)
|
---|
| 335 | *pdst++ = (accum + 0x800000) >> 24;
|
---|
| 336 | }
|
---|
| 337 | }
|
---|
| 338 | pdst += channels*dst.getRowSkip();
|
---|
| 339 | }
|
---|
| 340 | }
|
---|
| 341 | };
|
---|
| 342 |
|
---|
| 343 | }
|
---|
| 344 |
|
---|
| 345 | #endif
|
---|