1 | /*
|
---|
2 | -----------------------------------------------------------------------------
|
---|
3 | This source file is part of OGRE
|
---|
4 | (Object-oriented Graphics Rendering Engine)
|
---|
5 | For the latest info, see http://www.ogre3d.org/
|
---|
6 |
|
---|
7 | Copyright (c) 2000-2005 The OGRE Team
|
---|
8 | Also see acknowledgements in Readme.html
|
---|
9 |
|
---|
10 | This program is free software; you can redistribute it and/or modify it under
|
---|
11 | the terms of the GNU Lesser General Public License as published by the Free Software
|
---|
12 | Foundation; either version 2 of the License, or (at your option) any later
|
---|
13 | version.
|
---|
14 |
|
---|
15 | This program is distributed in the hope that it will be useful, but WITHOUT
|
---|
16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
---|
17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
|
---|
18 |
|
---|
19 | You should have received a copy of the GNU Lesser General Public License along with
|
---|
20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
---|
21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to
|
---|
22 | http://www.gnu.org/copyleft/lesser.txt.
|
---|
23 | -----------------------------------------------------------------------------
|
---|
24 | */
|
---|
25 | #ifndef OGREIMAGERESAMPLER_H
|
---|
26 | #define OGREIMAGERESAMPLER_H
|
---|
27 |
|
---|
28 | #include <algorithm>
|
---|
29 |
|
---|
30 | // this file is inlined into OgreImage.cpp!
|
---|
31 | // do not include anywhere else.
|
---|
32 | namespace Ogre {
|
---|
33 |
|
---|
34 | // define uint64 type
|
---|
35 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC
|
---|
36 | typedef unsigned __int64 uint64;
|
---|
37 | #else
|
---|
38 | typedef unsigned long long uint64;
|
---|
39 | #endif
|
---|
40 |
|
---|
41 | // variable name hints:
|
---|
42 | // sx_48 = 16/48-bit fixed-point x-position in source
|
---|
43 | // stepx = difference between adjacent sx_48 values
|
---|
44 | // sx1 = lower-bound integer x-position in source
|
---|
45 | // sx2 = upper-bound integer x-position in source
|
---|
46 | // sxf = fractional weight beween sx1 and sx2
|
---|
47 | // x,y,z = location of output pixel in destination
|
---|
48 |
|
---|
49 | // nearest-neighbor resampler, does not convert formats.
|
---|
50 | // templated on bytes-per-pixel to allow compiler optimizations, such
|
---|
51 | // as simplifying memcpy() and replacing multiplies with bitshifts
|
---|
52 | template<unsigned int elemsize> struct NearestResampler {
|
---|
53 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
54 | // assert(src.format == dst.format);
|
---|
55 |
|
---|
56 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
57 | uchar* srcdata = (uchar*)src.data;
|
---|
58 | uchar* pdst = (uchar*)dst.data;
|
---|
59 |
|
---|
60 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
61 | // using 16/48-bit fixed precision, incremented by steps
|
---|
62 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
63 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
64 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
65 |
|
---|
66 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
67 | // for the center of the destination pixel, not the top-left corner
|
---|
68 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
69 | for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) {
|
---|
70 | size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch;
|
---|
71 |
|
---|
72 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
73 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) {
|
---|
74 | size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch;
|
---|
75 |
|
---|
76 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
77 | for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) {
|
---|
78 | uchar* psrc = srcdata +
|
---|
79 | elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff);
|
---|
80 | memcpy(pdst, psrc, elemsize);
|
---|
81 | pdst += elemsize;
|
---|
82 | }
|
---|
83 | pdst += elemsize*dst.getRowSkip();
|
---|
84 | }
|
---|
85 | pdst += elemsize*dst.getSliceSkip();
|
---|
86 | }
|
---|
87 | }
|
---|
88 | };
|
---|
89 |
|
---|
90 |
|
---|
91 | // default floating-point linear resampler, does format conversion
|
---|
92 | struct LinearResampler {
|
---|
93 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
94 | size_t srcelemsize = PixelUtil::getNumElemBytes(src.format);
|
---|
95 | size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format);
|
---|
96 |
|
---|
97 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
98 | uchar* srcdata = (uchar*)src.data;
|
---|
99 | uchar* pdst = (uchar*)dst.data;
|
---|
100 |
|
---|
101 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
102 | // using 16/48-bit fixed precision, incremented by steps
|
---|
103 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
104 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
105 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
106 |
|
---|
107 | // temp is 16/16 bit fixed precision, used to adjust a source
|
---|
108 | // coordinate (x, y, or z) backwards by half a pixel so that the
|
---|
109 | // integer bits represent the first sample (eg, sx1) and the
|
---|
110 | // fractional bits are the blend weight of the second sample
|
---|
111 | unsigned int temp;
|
---|
112 |
|
---|
113 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
114 | // for the center of the destination pixel, not the top-left corner
|
---|
115 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
116 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
|
---|
117 | temp = sz_48 >> 32;
|
---|
118 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
119 | size_t sz1 = temp >> 16; // src z, sample #1
|
---|
120 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
|
---|
121 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
|
---|
122 |
|
---|
123 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
124 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
125 | temp = sy_48 >> 32;
|
---|
126 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
127 | size_t sy1 = temp >> 16; // src y #1
|
---|
128 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
|
---|
129 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
130 |
|
---|
131 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
132 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
133 | temp = sx_48 >> 32;
|
---|
134 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
135 | size_t sx1 = temp >> 16; // src x #1
|
---|
136 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
|
---|
137 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
138 |
|
---|
139 | ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1;
|
---|
140 | ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2;
|
---|
141 |
|
---|
142 | #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \
|
---|
143 | srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch))
|
---|
144 |
|
---|
145 | UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1);
|
---|
146 | UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1);
|
---|
147 | UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2);
|
---|
148 | UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2);
|
---|
149 | #undef UNPACK
|
---|
150 |
|
---|
151 | ColourValue accum =
|
---|
152 | x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) +
|
---|
153 | x2y1z1 * ( sxf *(1.0f - syf)*(1.0f - szf)) +
|
---|
154 | x1y2z1 * ((1.0f - sxf)* syf *(1.0f - szf)) +
|
---|
155 | x2y2z1 * ( sxf * syf *(1.0f - szf)) +
|
---|
156 | x1y1z2 * ((1.0f - sxf)*(1.0f - syf)* szf ) +
|
---|
157 | x2y1z2 * ( sxf *(1.0f - syf)* szf ) +
|
---|
158 | x1y2z2 * ((1.0f - sxf)* syf * szf ) +
|
---|
159 | x2y2z2 * ( sxf * syf * szf );
|
---|
160 |
|
---|
161 | PixelUtil::packColour(accum, dst.format, pdst);
|
---|
162 |
|
---|
163 | pdst += dstelemsize;
|
---|
164 | }
|
---|
165 | pdst += dstelemsize*dst.getRowSkip();
|
---|
166 | }
|
---|
167 | pdst += dstelemsize*dst.getSliceSkip();
|
---|
168 | }
|
---|
169 | }
|
---|
170 | };
|
---|
171 |
|
---|
172 |
|
---|
173 | // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only.
|
---|
174 | // avoids overhead of pixel unpack/repack function calls
|
---|
175 | struct LinearResampler_Float32 {
|
---|
176 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
177 | size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float);
|
---|
178 | size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float);
|
---|
179 | // assert(srcchannels == 3 || srcchannels == 4);
|
---|
180 | // assert(dstchannels == 3 || dstchannels == 4);
|
---|
181 |
|
---|
182 | // srcdata stays at beginning, pdst is a moving pointer
|
---|
183 | float* srcdata = (float*)src.data;
|
---|
184 | float* pdst = (float*)dst.data;
|
---|
185 |
|
---|
186 | // sx_48,sy_48,sz_48 represent current position in source
|
---|
187 | // using 16/48-bit fixed precision, incremented by steps
|
---|
188 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
189 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
190 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth();
|
---|
191 |
|
---|
192 | // temp is 16/16 bit fixed precision, used to adjust a source
|
---|
193 | // coordinate (x, y, or z) backwards by half a pixel so that the
|
---|
194 | // integer bits represent the first sample (eg, sx1) and the
|
---|
195 | // fractional bits are the blend weight of the second sample
|
---|
196 | unsigned int temp;
|
---|
197 |
|
---|
198 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust
|
---|
199 | // for the center of the destination pixel, not the top-left corner
|
---|
200 | uint64 sz_48 = (stepz >> 1) - 1;
|
---|
201 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) {
|
---|
202 | temp = sz_48 >> 32;
|
---|
203 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
204 | size_t sz1 = temp >> 16; // src z, sample #1
|
---|
205 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2
|
---|
206 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2
|
---|
207 |
|
---|
208 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
209 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
210 | temp = sy_48 >> 32;
|
---|
211 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
212 | size_t sy1 = temp >> 16; // src y #1
|
---|
213 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2
|
---|
214 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
215 |
|
---|
216 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
217 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
218 | temp = sx_48 >> 32;
|
---|
219 | temp = (temp > 0x8000)? temp - 0x8000 : 0;
|
---|
220 | size_t sx1 = temp >> 16; // src x #1
|
---|
221 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2
|
---|
222 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2
|
---|
223 |
|
---|
224 | // process R,G,B,A simultaneously for cache coherence?
|
---|
225 | float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
---|
226 |
|
---|
227 | #define ACCUM3(x,y,z,factor) \
|
---|
228 | { float f = factor; \
|
---|
229 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
|
---|
230 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
|
---|
231 | accum[2]+=srcdata[off+2]*f; }
|
---|
232 |
|
---|
233 | #define ACCUM4(x,y,z,factor) \
|
---|
234 | { float f = factor; \
|
---|
235 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \
|
---|
236 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \
|
---|
237 | accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; }
|
---|
238 |
|
---|
239 | if (srcchannels == 3 || dstchannels == 3) {
|
---|
240 | // RGB, no alpha
|
---|
241 | ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
|
---|
242 | ACCUM3(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf));
|
---|
243 | ACCUM3(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf));
|
---|
244 | ACCUM3(sx2,sy2,sz1, sxf * syf *(1.0f-szf));
|
---|
245 | ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf );
|
---|
246 | ACCUM3(sx2,sy1,sz2, sxf *(1.0f-syf)* szf );
|
---|
247 | ACCUM3(sx1,sy2,sz2,(1.0f-sxf)* syf * szf );
|
---|
248 | ACCUM3(sx2,sy2,sz2, sxf * syf * szf );
|
---|
249 | accum[3] = 1.0f;
|
---|
250 | } else {
|
---|
251 | // RGBA
|
---|
252 | ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf));
|
---|
253 | ACCUM4(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf));
|
---|
254 | ACCUM4(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf));
|
---|
255 | ACCUM4(sx2,sy2,sz1, sxf * syf *(1.0f-szf));
|
---|
256 | ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf );
|
---|
257 | ACCUM4(sx2,sy1,sz2, sxf *(1.0f-syf)* szf );
|
---|
258 | ACCUM4(sx1,sy2,sz2,(1.0f-sxf)* syf * szf );
|
---|
259 | ACCUM4(sx2,sy2,sz2, sxf * syf * szf );
|
---|
260 | }
|
---|
261 |
|
---|
262 | memcpy(pdst, accum, sizeof(float)*dstchannels);
|
---|
263 |
|
---|
264 | #undef ACCUM3
|
---|
265 | #undef ACCUM4
|
---|
266 |
|
---|
267 | pdst += dstchannels;
|
---|
268 | }
|
---|
269 | pdst += dstchannels*dst.getRowSkip();
|
---|
270 | }
|
---|
271 | pdst += dstchannels*dst.getSliceSkip();
|
---|
272 | }
|
---|
273 | }
|
---|
274 | };
|
---|
275 |
|
---|
276 |
|
---|
277 |
|
---|
278 | // byte linear resampler, does not do any format conversions.
|
---|
279 | // only handles pixel formats that use 1 byte per color channel.
|
---|
280 | // 2D only; punts 3D pixelboxes to default LinearResampler (slow).
|
---|
281 | // templated on bytes-per-pixel to allow compiler optimizations, such
|
---|
282 | // as unrolling loops and replacing multiplies with bitshifts
|
---|
283 | template<unsigned int channels> struct LinearResampler_Byte {
|
---|
284 | static void scale(const PixelBox& src, const PixelBox& dst) {
|
---|
285 | // assert(src.format == dst.format);
|
---|
286 |
|
---|
287 | // only optimized for 2D
|
---|
288 | if (src.getDepth() > 1 || dst.getDepth() > 1) {
|
---|
289 | LinearResampler::scale(src, dst);
|
---|
290 | return;
|
---|
291 | }
|
---|
292 |
|
---|
293 | // srcdata stays at beginning of slice, pdst is a moving pointer
|
---|
294 | uchar* srcdata = (uchar*)src.data;
|
---|
295 | uchar* pdst = (uchar*)dst.data;
|
---|
296 |
|
---|
297 | // sx_48,sy_48 represent current position in source
|
---|
298 | // using 16/48-bit fixed precision, incremented by steps
|
---|
299 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth();
|
---|
300 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight();
|
---|
301 |
|
---|
302 | // bottom 28 bits of temp are 16/12 bit fixed precision, used to
|
---|
303 | // adjust a source coordinate backwards by half a pixel so that the
|
---|
304 | // integer bits represent the first sample (eg, sx1) and the
|
---|
305 | // fractional bits are the blend weight of the second sample
|
---|
306 | unsigned int temp;
|
---|
307 |
|
---|
308 | uint64 sy_48 = (stepy >> 1) - 1;
|
---|
309 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) {
|
---|
310 | temp = sy_48 >> 36;
|
---|
311 | temp = (temp > 0x800)? temp - 0x800: 0;
|
---|
312 | unsigned int syf = temp & 0xFFF;
|
---|
313 | size_t sy1 = temp >> 12;
|
---|
314 | size_t sy2 = std::min(sy1+1, src.bottom-src.top-1);
|
---|
315 | size_t syoff1 = sy1 * src.rowPitch;
|
---|
316 | size_t syoff2 = sy2 * src.rowPitch;
|
---|
317 |
|
---|
318 | uint64 sx_48 = (stepx >> 1) - 1;
|
---|
319 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) {
|
---|
320 | temp = sx_48 >> 36;
|
---|
321 | temp = (temp > 0x800)? temp - 0x800 : 0;
|
---|
322 | unsigned int sxf = temp & 0xFFF;
|
---|
323 | size_t sx1 = temp >> 12;
|
---|
324 | size_t sx2 = std::min(sx1+1, src.right-src.left-1);
|
---|
325 |
|
---|
326 | unsigned int sxfsyf = sxf*syf;
|
---|
327 | for (unsigned int k = 0; k < channels; k++) {
|
---|
328 | unsigned int accum =
|
---|
329 | srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) +
|
---|
330 | srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) +
|
---|
331 | srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) +
|
---|
332 | srcdata[(sx2 + syoff2)*channels+k]*sxfsyf;
|
---|
333 | // accum is computed using 8/24-bit fixed-point math
|
---|
334 | // (maximum is 0xFF000000; rounding will not cause overflow)
|
---|
335 | *pdst++ = (accum + 0x800000) >> 24;
|
---|
336 | }
|
---|
337 | }
|
---|
338 | pdst += channels*dst.getRowSkip();
|
---|
339 | }
|
---|
340 | }
|
---|
341 | };
|
---|
342 |
|
---|
343 | }
|
---|
344 |
|
---|
345 | #endif
|
---|