File: | jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16nw.c |
Warning: | line 537, column 26 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. Oracle designates this | |||
8 | * particular file as subject to the "Classpath" exception as provided | |||
9 | * by Oracle in the LICENSE file that accompanied this code. | |||
10 | * | |||
11 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
14 | * version 2 for more details (a copy is included in the LICENSE file that | |||
15 | * accompanied this code). | |||
16 | * | |||
17 | * You should have received a copy of the GNU General Public License version | |||
18 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
20 | * | |||
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
22 | * or visit www.oracle.com if you need additional information or have any | |||
23 | * questions. | |||
24 | */ | |||
25 | ||||
26 | ||||
27 | /* | |||
28 | * FUNCTION | |||
29 | * Internal functions for mlib_ImageConv* on U8/S16/U16 types and | |||
30 | * MLIB_EDGE_DST_NO_WRITE mask | |||
31 | */ | |||
32 | ||||
33 | #include "mlib_image.h" | |||
34 | #include "mlib_c_ImageConv.h" | |||
35 | ||||
36 | /* | |||
37 | This define switches between functions of different data types | |||
38 | */ | |||
39 | #define IMG_TYPE2 2 | |||
40 | ||||
41 | /***************************************************************/ | |||
42 | #if IMG_TYPE2 == 1 | |||
43 | ||||
44 | #define DTYPEmlib_s16 mlib_u8 | |||
45 | #define CONV_FUNC(KERN)mlib_convKERNnw_s16 mlib_c_conv##KERN##nw_u8 | |||
46 | #define CONV_FUNC_I(KERN)mlib_i_convKERNnw_s16 mlib_i_conv##KERN##nw_u8 | |||
47 | #define DSCALE65536.0 (1 << 24) | |||
48 | #define FROM_S32(x)((x) >> 16) (((x) >> 24) ^ 128) | |||
49 | #define S64TOS32(x)((x) & 0xffffffff) (x) | |||
50 | #define SAT_OFF -(1u << 31) | |||
51 | ||||
52 | #elif IMG_TYPE2 == 2 | |||
53 | ||||
54 | #define DTYPEmlib_s16 mlib_s16 | |||
55 | #define CONV_FUNC(KERN)mlib_convKERNnw_s16 mlib_conv##KERN##nw_s16 | |||
56 | #define CONV_FUNC_I(KERN)mlib_i_convKERNnw_s16 mlib_i_conv##KERN##nw_s16 | |||
57 | #define DSCALE65536.0 65536.0 | |||
58 | #define FROM_S32(x)((x) >> 16) ((x) >> 16) | |||
59 | #define S64TOS32(x)((x) & 0xffffffff) ((x) & 0xffffffff) | |||
60 | #define SAT_OFF | |||
61 | ||||
62 | #elif IMG_TYPE2 == 3 | |||
63 | ||||
64 | #define DTYPEmlib_s16 mlib_u16 | |||
65 | #define CONV_FUNC(KERN)mlib_convKERNnw_s16 mlib_conv##KERN##nw_u16 | |||
66 | #define CONV_FUNC_I(KERN)mlib_i_convKERNnw_s16 mlib_i_conv##KERN##nw_u16 | |||
67 | #define DSCALE65536.0 65536.0 | |||
68 | #define FROM_S32(x)((x) >> 16) (((x) >> 16) ^ 0x8000) | |||
69 | #define S64TOS32(x)((x) & 0xffffffff) (x) | |||
70 | #define SAT_OFF -(1u << 31) | |||
71 | ||||
72 | #endif /* IMG_TYPE == 1 */ | |||
73 | ||||
74 | /***************************************************************/ | |||
75 | #define BUFF_SIZE1600 1600 | |||
76 | ||||
77 | #define CACHE_SIZE(64*1024) (64*1024) | |||
78 | ||||
79 | /***************************************************************/ | |||
80 | #define FTYPEmlib_d64 mlib_d64 | |||
81 | ||||
82 | #ifndef MLIB_USE_FTOI_CLAMPING | |||
83 | ||||
84 | #define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >= 2147483647) ? 2147483647 : (mlib_s32)(x))) \ | |||
85 | (((x) <= MLIB_S32_MIN(-2147483647 -1)) ? MLIB_S32_MIN(-2147483647 -1) : (((x) >= MLIB_S32_MAX2147483647) ? MLIB_S32_MAX2147483647 : (mlib_s32)(x))) | |||
86 | ||||
87 | #else | |||
88 | ||||
89 | #define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >= 2147483647) ? 2147483647 : (mlib_s32)(x))) ((mlib_s32)(x)) | |||
90 | ||||
91 | #endif /* MLIB_USE_FTOI_CLAMPING */ | |||
92 | ||||
93 | /***************************************************************/ | |||
94 | #define D2I(x)((((x)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((x)) >= 2147483647) ? 2147483647 : (mlib_s32)((x)))) CLAMP_S32((x) SAT_OFF)((((x)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((x)) >= 2147483647) ? 2147483647 : (mlib_s32)((x)))) | |||
95 | ||||
96 | /***************************************************************/ | |||
97 | #ifdef _LITTLE_ENDIAN1 | |||
98 | ||||
99 | #define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \ | |||
100 | dp[0 ] = res1; \ | |||
101 | dp[chan1] = res0 | |||
102 | ||||
103 | #else | |||
104 | ||||
105 | #define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \ | |||
106 | dp[0 ] = res0; \ | |||
107 | dp[chan1] = res1 | |||
108 | ||||
109 | #endif /* _LITTLE_ENDIAN */ | |||
110 | ||||
111 | /***************************************************************/ | |||
112 | #ifdef _NO_LONGLONG | |||
113 | ||||
114 | #define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | (((mlib_s64)sp[0]) & 0xffffffff) \ | |||
115 | buff[i ] = sp[0]; \ | |||
116 | buff[i + 1] = sp[chan1] | |||
117 | ||||
118 | #else /* _NO_LONGLONG */ | |||
119 | ||||
120 | #ifdef _LITTLE_ENDIAN1 | |||
121 | ||||
122 | #define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | (((mlib_s64)sp[0]) & 0xffffffff) \ | |||
123 | *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])(((mlib_s64)sp[0]) & 0xffffffff) | |||
124 | ||||
125 | #else /* _LITTLE_ENDIAN */ | |||
126 | ||||
127 | #define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | (((mlib_s64)sp[0]) & 0xffffffff) \ | |||
128 | *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])(((mlib_s64)sp[chan1]) & 0xffffffff) | |||
129 | ||||
130 | #endif /* _LITTLE_ENDIAN */ | |||
131 | #endif /* _NO_LONGLONG */ | |||
132 | ||||
133 | /***************************************************************/ | |||
134 | typedef union { | |||
135 | mlib_d64 d64; | |||
136 | struct { | |||
137 | mlib_s32 i0; | |||
138 | mlib_s32 i1; | |||
139 | } i32s; | |||
140 | struct { | |||
141 | mlib_s32 f0; | |||
142 | mlib_s32 f1; | |||
143 | } f32s; | |||
144 | } d64_2x32; | |||
145 | ||||
146 | /***************************************************************/ | |||
147 | #define DEF_VARS(type)type *adr_src, *sl, *sp = ((void*)0); type *adr_dst, *dl, *dp = ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt, sll , dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c \ | |||
148 | type *adr_src, *sl, *sp = NULL((void*)0); \ | |||
149 | type *adr_dst, *dl, *dp = NULL((void*)0); \ | |||
150 | FTYPEmlib_d64 *pbuff = buff; \ | |||
151 | mlib_s32 wid, hgt, sll, dll; \ | |||
152 | mlib_s32 nchannel, chan1; \ | |||
153 | mlib_s32 i, j, c | |||
154 | ||||
155 | /***************************************************************/ | |||
156 | #define GET_SRC_DST_PARAMETERS(type)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src) ; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride (src) / sizeof(type); dll = mlib_ImageGetStride(dst) / sizeof (type); adr_src = (type *)mlib_ImageGetData(src); adr_dst = ( type *)mlib_ImageGetData(dst) \ | |||
157 | hgt = mlib_ImageGetHeight(src); \ | |||
158 | wid = mlib_ImageGetWidth(src); \ | |||
159 | nchannel = mlib_ImageGetChannels(src); \ | |||
160 | sll = mlib_ImageGetStride(src) / sizeof(type); \ | |||
161 | dll = mlib_ImageGetStride(dst) / sizeof(type); \ | |||
162 | adr_src = (type *)mlib_ImageGetData(src); \ | |||
163 | adr_dst = (type *)mlib_ImageGetData(dst) | |||
164 | ||||
165 | /***************************************************************/ | |||
166 | #if IMG_TYPE2 == 1 | |||
167 | ||||
168 | /* Test for the presence of any "1" bit in bits | |||
169 | 8 to 31 of val. If present, then val is either | |||
170 | negative or >255. If over/underflows of 8 bits | |||
171 | are uncommon, then this technique can be a win, | |||
172 | since only a single test, rather than two, is | |||
173 | necessary to determine if clamping is needed. | |||
174 | On the other hand, if over/underflows are common, | |||
175 | it adds an extra test. | |||
176 | */ | |||
177 | #define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 - 1)) dst = (-32767 -1); else dst = (mlib_s16)val \ | |||
178 | if (val & 0xffffff00) { \ | |||
179 | if (val < MLIB_U8_MIN0) \ | |||
180 | dst = MLIB_U8_MIN0; \ | |||
181 | else \ | |||
182 | dst = MLIB_U8_MAX(127*2 +1); \ | |||
183 | } else { \ | |||
184 | dst = (mlib_u8)val; \ | |||
185 | } | |||
186 | ||||
187 | #elif IMG_TYPE2 == 2 | |||
188 | ||||
189 | #define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 - 1)) dst = (-32767 -1); else dst = (mlib_s16)val \ | |||
190 | if (val >= MLIB_S16_MAX32767) \ | |||
191 | dst = MLIB_S16_MAX32767; \ | |||
192 | else if (val <= MLIB_S16_MIN(-32767 -1)) \ | |||
193 | dst = MLIB_S16_MIN(-32767 -1); \ | |||
194 | else \ | |||
195 | dst = (mlib_s16)val | |||
196 | ||||
197 | #elif IMG_TYPE2 == 3 | |||
198 | ||||
199 | #define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 - 1)) dst = (-32767 -1); else dst = (mlib_s16)val \ | |||
200 | if (val >= MLIB_U16_MAX(32767 *2 +1)) \ | |||
201 | dst = MLIB_U16_MAX(32767 *2 +1); \ | |||
202 | else if (val <= MLIB_U16_MIN0) \ | |||
203 | dst = MLIB_U16_MIN0; \ | |||
204 | else \ | |||
205 | dst = (mlib_u16)val | |||
206 | ||||
207 | #endif /* IMG_TYPE == 1 */ | |||
208 | ||||
209 | /***************************************************************/ | |||
210 | #define MAX_KER7 7 | |||
211 | #define MAX_N15 15 | |||
212 | ||||
213 | static mlib_status mlib_ImageConv1xN(mlib_image *dst, | |||
214 | const mlib_image *src, | |||
215 | const mlib_d64 *k, | |||
216 | mlib_s32 n, | |||
217 | mlib_s32 dn, | |||
218 | mlib_s32 cmask) | |||
219 | { | |||
220 | FTYPEmlib_d64 buff[BUFF_SIZE1600]; | |||
221 | mlib_s32 off, kh; | |||
222 | mlib_s32 d0, d1; | |||
223 | const FTYPEmlib_d64 *pk; | |||
224 | FTYPEmlib_d64 k0, k1, k2, k3; | |||
225 | FTYPEmlib_d64 p0, p1, p2, p3, p4; | |||
226 | DEF_VARS(DTYPE)mlib_s16 *adr_src, *sl, *sp = ((void*)0); mlib_s16 *adr_dst, * dl, *dp = ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt , sll, dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c; | |||
227 | DTYPEmlib_s16 *sl_c, *dl_c, *sl0; | |||
228 | mlib_s32 l, hsize, max_hsize; | |||
229 | GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src) ; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride (src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof (mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst = (mlib_s16 *)mlib_ImageGetData(dst); | |||
230 | ||||
231 | hgt -= (n - 1); | |||
232 | adr_dst += dn*dll; | |||
233 | ||||
234 | max_hsize = (CACHE_SIZE(64*1024)/sizeof(DTYPEmlib_s16))/sll; | |||
235 | ||||
236 | if (!max_hsize) max_hsize = 1; | |||
237 | ||||
238 | if (max_hsize > BUFF_SIZE1600) { | |||
239 | pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*max_hsize); | |||
240 | } | |||
241 | ||||
242 | chan1 = nchannel; | |||
243 | ||||
244 | sl_c = adr_src; | |||
245 | dl_c = adr_dst; | |||
246 | ||||
247 | for (l = 0; l < hgt; l += hsize) { | |||
248 | hsize = hgt - l; | |||
249 | ||||
250 | if (hsize > max_hsize) hsize = max_hsize; | |||
251 | ||||
252 | for (c = 0; c < nchannel; c++) { | |||
253 | if (!(cmask & (1 << (chan1 - 1 - c)))) continue; | |||
254 | ||||
255 | sl = sl_c + c; | |||
256 | dl = dl_c + c; | |||
257 | ||||
258 | for (j = 0; j < hsize; j++) pbuff[j] = 0.0; | |||
259 | ||||
260 | for (i = 0; i < wid; i++) { | |||
261 | sl0 = sl; | |||
262 | ||||
263 | for (off = 0; off < (n - 4); off += 4) { | |||
264 | pk = k + off; | |||
265 | sp = sl0; | |||
266 | ||||
267 | k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; | |||
268 | p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll]; | |||
269 | sp += 3*sll; | |||
270 | ||||
271 | for (j = 0; j < hsize; j += 2) { | |||
272 | p0 = p2; p1 = p3; p2 = p4; | |||
273 | p3 = sp[0]; | |||
274 | p4 = sp[sll]; | |||
275 | ||||
276 | pbuff[j ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; | |||
277 | pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; | |||
278 | ||||
279 | sp += 2*sll; | |||
280 | } | |||
281 | ||||
282 | sl0 += 4*sll; | |||
283 | } | |||
284 | ||||
285 | pk = k + off; | |||
286 | sp = sl0; | |||
287 | ||||
288 | k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; | |||
289 | p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll]; | |||
290 | ||||
291 | dp = dl; | |||
292 | kh = n - off; | |||
293 | ||||
294 | if (kh == 4) { | |||
295 | sp += 3*sll; | |||
296 | ||||
297 | for (j = 0; j <= (hsize - 2); j += 2) { | |||
298 | p0 = p2; p1 = p3; p2 = p4; | |||
299 | p3 = sp[0]; | |||
300 | p4 = sp[sll]; | |||
301 | ||||
302 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])))); | |||
303 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)(( p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1])))); | |||
304 | ||||
305 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
306 | dp[dll] = FROM_S32(d1)((d1) >> 16); | |||
307 | ||||
308 | pbuff[j] = 0; | |||
309 | pbuff[j + 1] = 0; | |||
310 | ||||
311 | sp += 2*sll; | |||
312 | dp += 2*dll; | |||
313 | } | |||
314 | ||||
315 | if (j < hsize) { | |||
316 | p0 = p2; p1 = p3; p2 = p4; | |||
317 | p3 = sp[0]; | |||
318 | ||||
319 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])))); | |||
320 | ||||
321 | pbuff[j] = 0; | |||
322 | ||||
323 | dp[0] = FROM_S32(d0)((d0) >> 16); | |||
324 | } | |||
325 | ||||
326 | } else if (kh == 3) { | |||
327 | sp += 2*sll; | |||
328 | ||||
329 | for (j = 0; j <= (hsize - 2); j += 2) { | |||
330 | p0 = p2; p1 = p3; | |||
331 | p2 = sp[0]; | |||
332 | p3 = sp[sll]; | |||
333 | ||||
334 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + pbuff[j])) <= (-2147483647 -1) ) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + pbuff[j])))); | |||
335 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1])((((p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + pbuff[ j + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1])))); | |||
336 | ||||
337 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
338 | dp[dll] = FROM_S32(d1)((d1) >> 16); | |||
339 | ||||
340 | pbuff[j] = 0; | |||
341 | pbuff[j + 1] = 0; | |||
342 | ||||
343 | sp += 2*sll; | |||
344 | dp += 2*dll; | |||
345 | } | |||
346 | ||||
347 | if (j < hsize) { | |||
348 | p0 = p2; p1 = p3; | |||
349 | p2 = sp[0]; | |||
350 | ||||
351 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + pbuff[j])) <= (-2147483647 -1) ) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + pbuff[j])))); | |||
352 | ||||
353 | pbuff[j] = 0; | |||
354 | ||||
355 | dp[0] = FROM_S32(d0)((d0) >> 16); | |||
356 | } | |||
357 | ||||
358 | } else if (kh == 2) { | |||
359 | sp += sll; | |||
360 | ||||
361 | for (j = 0; j <= (hsize - 2); j += 2) { | |||
362 | p0 = p2; | |||
363 | p1 = sp[0]; | |||
364 | p2 = sp[sll]; | |||
365 | ||||
366 | d0 = D2I(p0*k0 + p1*k1 + pbuff[j])((((p0*k0 + p1*k1 + pbuff[j])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + pbuff[j])))); | |||
367 | d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1])((((p1*k0 + p2*k1 + pbuff[j + 1])) <= (-2147483647 -1)) ? ( -2147483647 -1) : ((((p1*k0 + p2*k1 + pbuff[j + 1])) >= 2147483647 ) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + pbuff[j + 1])))); | |||
368 | ||||
369 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
370 | dp[dll] = FROM_S32(d1)((d1) >> 16); | |||
371 | ||||
372 | pbuff[j] = 0; | |||
373 | pbuff[j + 1] = 0; | |||
374 | ||||
375 | sp += 2*sll; | |||
376 | dp += 2*dll; | |||
377 | } | |||
378 | ||||
379 | if (j < hsize) { | |||
380 | p0 = p2; | |||
381 | p1 = sp[0]; | |||
382 | ||||
383 | d0 = D2I(p0*k0 + p1*k1 + pbuff[j])((((p0*k0 + p1*k1 + pbuff[j])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + pbuff[j])))); | |||
384 | ||||
385 | pbuff[j] = 0; | |||
386 | ||||
387 | dp[0] = FROM_S32(d0)((d0) >> 16); | |||
388 | } | |||
389 | ||||
390 | } else /* if (kh == 1) */ { | |||
391 | for (j = 0; j < hsize; j++) { | |||
392 | p0 = sp[0]; | |||
393 | ||||
394 | d0 = D2I(p0*k0 + pbuff[j])((((p0*k0 + pbuff[j])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + pbuff[j])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + pbuff[j])))); | |||
395 | ||||
396 | dp[0] = FROM_S32(d0)((d0) >> 16); | |||
397 | ||||
398 | pbuff[j] = 0; | |||
399 | ||||
400 | sp += sll; | |||
401 | dp += dll; | |||
402 | } | |||
403 | } | |||
404 | ||||
405 | sl += chan1; | |||
406 | dl += chan1; | |||
407 | } | |||
408 | } | |||
409 | ||||
410 | sl_c += max_hsize*sll; | |||
411 | dl_c += max_hsize*dll; | |||
412 | } | |||
413 | ||||
414 | if (pbuff != buff) mlib_free(pbuff); | |||
415 | ||||
416 | return MLIB_SUCCESS; | |||
417 | } | |||
418 | ||||
419 | /***************************************************************/ | |||
420 | mlib_status CONV_FUNC(MxN)mlib_convMxNnw_s16(mlib_image *dst, | |||
421 | const mlib_image *src, | |||
422 | const mlib_s32 *kernel, | |||
423 | mlib_s32 m, | |||
424 | mlib_s32 n, | |||
425 | mlib_s32 dm, | |||
426 | mlib_s32 dn, | |||
427 | mlib_s32 scale, | |||
428 | mlib_s32 cmask) | |||
429 | { | |||
430 | FTYPEmlib_d64 buff[BUFF_SIZE1600], *buffs_arr[2*(MAX_N15 + 1)]; | |||
431 | FTYPEmlib_d64 **buffs = buffs_arr, *buffd; | |||
432 | FTYPEmlib_d64 akernel[256], *k = akernel, fscale = DSCALE65536.0; | |||
433 | mlib_s32 mn, l, off, kw, bsize, buff_ind; | |||
434 | mlib_s32 d0, d1; | |||
435 | FTYPEmlib_d64 k0, k1, k2, k3, k4, k5, k6; | |||
436 | FTYPEmlib_d64 p0, p1, p2, p3, p4, p5, p6, p7; | |||
437 | d64_2x32 dd; | |||
438 | DEF_VARS(DTYPE)mlib_s16 *adr_src, *sl, *sp = ((void*)0); mlib_s16 *adr_dst, * dl, *dp = ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt , sll, dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c; | |||
439 | mlib_s32 chan2; | |||
440 | mlib_s32 *buffo, *buffi; | |||
441 | mlib_status status = MLIB_SUCCESS; | |||
442 | ||||
443 | GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src) ; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride (src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof (mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst = (mlib_s16 *)mlib_ImageGetData(dst); | |||
444 | ||||
445 | if (scale > 30) { | |||
| ||||
446 | fscale *= 1.0/(1 << 30); | |||
447 | scale -= 30; | |||
448 | } | |||
449 | ||||
450 | fscale /= (1 << scale); | |||
451 | ||||
452 | mn = m*n; | |||
453 | ||||
454 | if (mn > 256) { | |||
455 | k = mlib_malloc(mn*sizeof(mlib_d64)); | |||
456 | ||||
457 | if (k == NULL((void*)0)) return MLIB_FAILURE; | |||
458 | } | |||
459 | ||||
460 | for (i = 0; i < mn; i++) { | |||
461 | k[i] = kernel[i]*fscale; | |||
462 | } | |||
463 | ||||
464 | if (m == 1) { | |||
465 | status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask); | |||
466 | FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free (k); return status; | |||
467 | } | |||
468 | ||||
469 | bsize = (n + 3)*wid; | |||
470 | ||||
471 | if ((bsize > BUFF_SIZE1600) || (n > MAX_N15)) { | |||
472 | pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*bsize + sizeof(FTYPEmlib_d64 *)*2*(n + 1)); | |||
473 | ||||
474 | if (pbuff == NULL((void*)0)) { | |||
475 | status = MLIB_FAILURE; | |||
476 | FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free (k); return status; | |||
477 | } | |||
478 | buffs = (FTYPEmlib_d64 **)(pbuff + bsize); | |||
479 | } | |||
480 | ||||
481 | for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid; | |||
482 | for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l]; | |||
483 | buffd = buffs[n] + wid; | |||
484 | buffo = (mlib_s32*)(buffd + wid); | |||
485 | buffi = buffo + (wid &~ 1); | |||
486 | ||||
487 | chan1 = nchannel; | |||
488 | chan2 = chan1 + chan1; | |||
489 | ||||
490 | wid -= (m - 1); | |||
491 | hgt -= (n - 1); | |||
492 | adr_dst += dn*dll + dm*nchannel; | |||
493 | ||||
494 | for (c = 0; c < nchannel; c++) { | |||
495 | if (!(cmask & (1 << (chan1 - 1 - c)))) continue; | |||
496 | ||||
497 | sl = adr_src + c; | |||
498 | dl = adr_dst + c; | |||
499 | ||||
500 | for (l = 0; l < n; l++) { | |||
501 | FTYPEmlib_d64 *buff = buffs[l]; | |||
502 | ||||
503 | for (i = 0; i < wid + (m - 1); i++) { | |||
504 | buff[i] = (FTYPEmlib_d64)sl[i*chan1]; | |||
505 | } | |||
506 | ||||
507 | sl += sll; | |||
508 | } | |||
509 | ||||
510 | buff_ind = 0; | |||
511 | ||||
512 | for (i = 0; i < wid; i++) buffd[i] = 0.0; | |||
513 | ||||
514 | for (j = 0; j < hgt; j++) { | |||
515 | FTYPEmlib_d64 **buffc = buffs + buff_ind; | |||
516 | FTYPEmlib_d64 *buffn = buffc[n]; | |||
517 | FTYPEmlib_d64 *pk = k; | |||
518 | ||||
519 | for (l = 0; l < n; l++) { | |||
520 | FTYPEmlib_d64 *buff_l = buffc[l]; | |||
521 | ||||
522 | for (off = 0; off < m;) { | |||
523 | FTYPEmlib_d64 *buff = buff_l + off; | |||
524 | ||||
525 | kw = m - off; | |||
526 | ||||
527 | if (kw > 2*MAX_KER7) kw = MAX_KER7; else | |||
528 | if (kw > MAX_KER7) kw = kw/2; | |||
529 | off += kw; | |||
530 | ||||
531 | sp = sl; | |||
532 | dp = dl; | |||
533 | ||||
534 | p2 = buff[0]; p3 = buff[1]; p4 = buff[2]; | |||
535 | p5 = buff[3]; p6 = buff[4]; p7 = buff[5]; | |||
536 | ||||
537 | k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; | |||
| ||||
538 | k4 = pk[4]; k5 = pk[5]; k6 = pk[6]; | |||
539 | pk += kw; | |||
540 | ||||
541 | if (kw == 7) { | |||
542 | ||||
543 | if (l < (n - 1) || off < m) { | |||
544 | for (i = 0; i <= (wid - 2); i += 2) { | |||
545 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; | |||
546 | ||||
547 | p6 = buff[i + 6]; p7 = buff[i + 7]; | |||
548 | ||||
549 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; | |||
550 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; | |||
551 | } | |||
552 | ||||
553 | } else { | |||
554 | for (i = 0; i <= (wid - 2); i += 2) { | |||
555 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; | |||
556 | ||||
557 | p6 = buff[i + 6]; p7 = buff[i + 7]; | |||
558 | ||||
559 | LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32) | (((mlib_s64)sp[0]) & 0xffffffff); | |||
560 | ||||
561 | dd.d64 = *(FTYPEmlib_d64 *)(buffi + i); | |||
562 | buffn[i ] = (FTYPEmlib_d64)dd.i32s.i0; | |||
563 | buffn[i + 1] = (FTYPEmlib_d64)dd.i32s.i1; | |||
564 | ||||
565 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd [i ])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]) ) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ])))); | |||
566 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd [i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1 *k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2 *k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]))) ); | |||
567 | ||||
568 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
569 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
570 | ||||
571 | buffd[i ] = 0.0; | |||
572 | buffd[i + 1] = 0.0; | |||
573 | ||||
574 | sp += chan2; | |||
575 | dp += chan2; | |||
576 | } | |||
577 | } | |||
578 | ||||
579 | } else if (kw == 6) { | |||
580 | ||||
581 | if (l < (n - 1) || off < m) { | |||
582 | for (i = 0; i <= (wid - 2); i += 2) { | |||
583 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; | |||
584 | ||||
585 | p5 = buff[i + 5]; p6 = buff[i + 6]; | |||
586 | ||||
587 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5; | |||
588 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5; | |||
589 | } | |||
590 | ||||
591 | } else { | |||
592 | for (i = 0; i <= (wid - 2); i += 2) { | |||
593 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; | |||
594 | ||||
595 | p5 = buff[i + 5]; p6 = buff[i + 6]; | |||
596 | ||||
597 | buffn[i ] = (FTYPEmlib_d64)sp[0]; | |||
598 | buffn[i + 1] = (FTYPEmlib_d64)sp[chan1]; | |||
599 | ||||
600 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ] )) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1 *k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])) >= 2147483647 ) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4 *k4 + p5*k5 + buffd[i ])))); | |||
601 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])))); | |||
602 | ||||
603 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
604 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
605 | ||||
606 | buffd[i ] = 0.0; | |||
607 | buffd[i + 1] = 0.0; | |||
608 | ||||
609 | sp += chan2; | |||
610 | dp += chan2; | |||
611 | } | |||
612 | } | |||
613 | ||||
614 | } else if (kw == 5) { | |||
615 | ||||
616 | if (l < (n - 1) || off < m) { | |||
617 | for (i = 0; i <= (wid - 2); i += 2) { | |||
618 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; | |||
619 | ||||
620 | p4 = buff[i + 4]; p5 = buff[i + 5]; | |||
621 | ||||
622 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4; | |||
623 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4; | |||
624 | } | |||
625 | ||||
626 | } else { | |||
627 | for (i = 0; i <= (wid - 2); i += 2) { | |||
628 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; | |||
629 | ||||
630 | p4 = buff[i + 4]; p5 = buff[i + 5]; | |||
631 | ||||
632 | buffn[i ] = (FTYPEmlib_d64)sp[0]; | |||
633 | buffn[i + 1] = (FTYPEmlib_d64)sp[chan1]; | |||
634 | ||||
635 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2 *k2 + p3*k3 + p4*k4 + buffd[i ])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[ i ])))); | |||
636 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3 *k2 + p4*k3 + p5*k4 + buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[ i + 1])))); | |||
637 | ||||
638 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
639 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
640 | ||||
641 | buffd[i ] = 0.0; | |||
642 | buffd[i + 1] = 0.0; | |||
643 | ||||
644 | sp += chan2; | |||
645 | dp += chan2; | |||
646 | } | |||
647 | } | |||
648 | ||||
649 | } else if (kw == 4) { | |||
650 | ||||
651 | if (l < (n - 1) || off < m) { | |||
652 | for (i = 0; i <= (wid - 2); i += 2) { | |||
653 | p0 = p2; p1 = p3; p2 = p4; | |||
654 | ||||
655 | p3 = buff[i + 3]; p4 = buff[i + 4]; | |||
656 | ||||
657 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; | |||
658 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; | |||
659 | } | |||
660 | ||||
661 | } else { | |||
662 | for (i = 0; i <= (wid - 2); i += 2) { | |||
663 | p0 = p2; p1 = p3; p2 = p4; | |||
664 | ||||
665 | p3 = buff[i + 3]; p4 = buff[i + 4]; | |||
666 | ||||
667 | buffn[i ] = (FTYPEmlib_d64)sp[0]; | |||
668 | buffn[i + 1] = (FTYPEmlib_d64)sp[chan1]; | |||
669 | ||||
670 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0* k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])))); | |||
671 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)(( p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])))); | |||
672 | ||||
673 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
674 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
675 | ||||
676 | buffd[i ] = 0.0; | |||
677 | buffd[i + 1] = 0.0; | |||
678 | ||||
679 | sp += chan2; | |||
680 | dp += chan2; | |||
681 | } | |||
682 | } | |||
683 | ||||
684 | } else if (kw == 3) { | |||
685 | ||||
686 | if (l < (n - 1) || off < m) { | |||
687 | for (i = 0; i <= (wid - 2); i += 2) { | |||
688 | p0 = p2; p1 = p3; | |||
689 | ||||
690 | p2 = buff[i + 2]; p3 = buff[i + 3]; | |||
691 | ||||
692 | buffd[i ] += p0*k0 + p1*k1 + p2*k2; | |||
693 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2; | |||
694 | } | |||
695 | ||||
696 | } else { | |||
697 | for (i = 0; i <= (wid - 2); i += 2) { | |||
698 | p0 = p2; p1 = p3; | |||
699 | ||||
700 | p2 = buff[i + 2]; p3 = buff[i + 3]; | |||
701 | ||||
702 | buffn[i ] = (FTYPEmlib_d64)sp[0]; | |||
703 | buffn[i + 1] = (FTYPEmlib_d64)sp[chan1]; | |||
704 | ||||
705 | d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + buffd[i ])) <= (-2147483647 -1 )) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + buffd[i ] )) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + buffd[i ])))); | |||
706 | d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + buffd[ i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])))); | |||
707 | ||||
708 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
709 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
710 | ||||
711 | buffd[i ] = 0.0; | |||
712 | buffd[i + 1] = 0.0; | |||
713 | ||||
714 | sp += chan2; | |||
715 | dp += chan2; | |||
716 | } | |||
717 | } | |||
718 | ||||
719 | } else /*if (kw == 2)*/ { | |||
720 | ||||
721 | if (l < (n - 1) || off < m) { | |||
722 | for (i = 0; i <= (wid - 2); i += 2) { | |||
723 | p0 = p2; | |||
724 | ||||
725 | p1 = buff[i + 1]; p2 = buff[i + 2]; | |||
726 | ||||
727 | buffd[i ] += p0*k0 + p1*k1; | |||
728 | buffd[i + 1] += p1*k0 + p2*k1; | |||
729 | } | |||
730 | ||||
731 | } else { | |||
732 | for (i = 0; i <= (wid - 2); i += 2) { | |||
733 | p0 = p2; | |||
734 | ||||
735 | p1 = buff[i + 1]; p2 = buff[i + 2]; | |||
736 | ||||
737 | buffn[i ] = (FTYPEmlib_d64)sp[0]; | |||
738 | buffn[i + 1] = (FTYPEmlib_d64)sp[chan1]; | |||
739 | ||||
740 | d0 = D2I(p0*k0 + p1*k1 + buffd[i ])((((p0*k0 + p1*k1 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + buffd[i ])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + buffd[i ])))); | |||
741 | d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1])((((p1*k0 + p2*k1 + buffd[i + 1])) <= (-2147483647 -1)) ? ( -2147483647 -1) : ((((p1*k0 + p2*k1 + buffd[i + 1])) >= 2147483647 ) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + buffd[i + 1])))); | |||
742 | ||||
743 | dp[0 ] = FROM_S32(d0)((d0) >> 16); | |||
744 | dp[chan1] = FROM_S32(d1)((d1) >> 16); | |||
745 | ||||
746 | buffd[i ] = 0.0; | |||
747 | buffd[i + 1] = 0.0; | |||
748 | ||||
749 | sp += chan2; | |||
750 | dp += chan2; | |||
751 | } | |||
752 | } | |||
753 | } | |||
754 | } | |||
755 | } | |||
756 | ||||
757 | /* last pixels */ | |||
758 | for (; i < wid; i++) { | |||
759 | FTYPEmlib_d64 *pk = k, s = 0; | |||
760 | mlib_s32 x, d0; | |||
761 | ||||
762 | for (l = 0; l < n; l++) { | |||
763 | FTYPEmlib_d64 *buff = buffc[l] + i; | |||
764 | ||||
765 | for (x = 0; x < m; x++) s += buff[x] * (*pk++); | |||
766 | } | |||
767 | ||||
768 | d0 = D2I(s)((((s)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((s)) >= 2147483647) ? 2147483647 : (mlib_s32)((s)))); | |||
769 | dp[0] = FROM_S32(d0)((d0) >> 16); | |||
770 | ||||
771 | buffn[i] = (FTYPEmlib_d64)sp[0]; | |||
772 | ||||
773 | sp += chan1; | |||
774 | dp += chan1; | |||
775 | } | |||
776 | ||||
777 | for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1]; | |||
778 | ||||
779 | /* next line */ | |||
780 | sl += sll; | |||
781 | dl += dll; | |||
782 | ||||
783 | buff_ind++; | |||
784 | ||||
785 | if (buff_ind >= n + 1) buff_ind = 0; | |||
786 | } | |||
787 | } | |||
788 | ||||
789 | FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free (k); return status; | |||
790 | } | |||
791 | ||||
792 | /***************************************************************/ | |||
793 | /* for x86, using integer multiplies is faster */ | |||
794 | ||||
795 | #define STORE_RES(res, x)x >>= shift2; if (x >= 32767) res = 32767; else if ( x <= (-32767 -1)) res = (-32767 -1); else res = (mlib_s16) x \ | |||
796 | x >>= shift2; \ | |||
797 | CLAMP_STORE(res, x)if (x >= 32767) res = 32767; else if (x <= (-32767 -1)) res = (-32767 -1); else res = (mlib_s16)x | |||
798 | ||||
799 | mlib_status CONV_FUNC_I(MxN)mlib_i_convMxNnw_s16(mlib_image *dst, | |||
800 | const mlib_image *src, | |||
801 | const mlib_s32 *kernel, | |||
802 | mlib_s32 m, | |||
803 | mlib_s32 n, | |||
804 | mlib_s32 dm, | |||
805 | mlib_s32 dn, | |||
806 | mlib_s32 scale, | |||
807 | mlib_s32 cmask) | |||
808 | { | |||
809 | mlib_s32 buff[BUFF_SIZE1600], *buffd = buff; | |||
810 | mlib_s32 l, off, kw; | |||
811 | mlib_s32 d0, d1, shift1, shift2; | |||
812 | mlib_s32 k0, k1, k2, k3, k4, k5, k6; | |||
813 | mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7; | |||
814 | DTYPEmlib_s16 *adr_src, *sl, *sp = NULL((void*)0); | |||
815 | DTYPEmlib_s16 *adr_dst, *dl, *dp = NULL((void*)0); | |||
816 | mlib_s32 wid, hgt, sll, dll; | |||
817 | mlib_s32 nchannel, chan1; | |||
818 | mlib_s32 i, j, c; | |||
819 | mlib_s32 chan2; | |||
820 | mlib_s32 k_locl[MAX_N15*MAX_N15], *k = k_locl; | |||
821 | GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src) ; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride (src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof (mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst = (mlib_s16 *)mlib_ImageGetData(dst); | |||
822 | ||||
823 | #if IMG_TYPE2 != 1 | |||
824 | shift1 = 16; | |||
825 | #else | |||
826 | shift1 = 8; | |||
827 | #endif /* IMG_TYPE != 1 */ | |||
828 | shift2 = scale - shift1; | |||
829 | ||||
830 | chan1 = nchannel; | |||
831 | chan2 = chan1 + chan1; | |||
832 | ||||
833 | wid -= (m - 1); | |||
834 | hgt -= (n - 1); | |||
835 | adr_dst += dn*dll + dm*nchannel; | |||
836 | ||||
837 | if (wid > BUFF_SIZE1600) { | |||
838 | buffd = mlib_malloc(sizeof(mlib_s32)*wid); | |||
839 | ||||
840 | if (buffd == NULL((void*)0)) return MLIB_FAILURE; | |||
841 | } | |||
842 | ||||
843 | if (m*n > MAX_N15*MAX_N15) { | |||
844 | k = mlib_malloc(sizeof(mlib_s32)*(m*n)); | |||
845 | ||||
846 | if (k == NULL((void*)0)) { | |||
847 | if (buffd != buff) mlib_free(buffd); | |||
848 | return MLIB_FAILURE; | |||
849 | } | |||
850 | } | |||
851 | ||||
852 | for (i = 0; i < m*n; i++) { | |||
853 | k[i] = kernel[i] >> shift1; | |||
854 | } | |||
855 | ||||
856 | for (c = 0; c < nchannel; c++) { | |||
857 | if (!(cmask & (1 << (nchannel - 1 - c)))) continue; | |||
858 | ||||
859 | sl = adr_src + c; | |||
860 | dl = adr_dst + c; | |||
861 | ||||
862 | for (i = 0; i < wid; i++) buffd[i] = 0; | |||
863 | ||||
864 | for (j = 0; j < hgt; j++) { | |||
865 | mlib_s32 *pk = k; | |||
866 | ||||
867 | for (l = 0; l < n; l++) { | |||
868 | DTYPEmlib_s16 *sp0 = sl + l*sll; | |||
869 | ||||
870 | for (off = 0; off < m;) { | |||
871 | sp = sp0 + off*chan1; | |||
872 | dp = dl; | |||
873 | ||||
874 | kw = m - off; | |||
875 | ||||
876 | if (kw > 2*MAX_KER7) kw = MAX_KER7; else | |||
877 | if (kw > MAX_KER7) kw = kw/2; | |||
878 | off += kw; | |||
879 | ||||
880 | p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2]; | |||
881 | p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1]; | |||
882 | ||||
883 | k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3]; | |||
884 | k4 = pk[4]; k5 = pk[5]; k6 = pk[6]; | |||
885 | pk += kw; | |||
886 | ||||
887 | sp += (kw - 1)*chan1; | |||
888 | ||||
889 | if (kw == 7) { | |||
890 | ||||
891 | if (l < (n - 1) || off < m) { | |||
892 | for (i = 0; i <= (wid - 2); i += 2) { | |||
893 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; | |||
894 | p6 = sp[0]; | |||
895 | p7 = sp[chan1]; | |||
896 | ||||
897 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6; | |||
898 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6; | |||
899 | ||||
900 | sp += chan2; | |||
901 | } | |||
902 | ||||
903 | } else { | |||
904 | for (i = 0; i <= (wid - 2); i += 2) { | |||
905 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7; | |||
906 | p6 = sp[0]; | |||
907 | p7 = sp[chan1]; | |||
908 | ||||
909 | d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]); | |||
910 | d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]); | |||
911 | ||||
912 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
913 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
914 | ||||
915 | buffd[i ] = 0; | |||
916 | buffd[i + 1] = 0; | |||
917 | ||||
918 | sp += chan2; | |||
919 | dp += chan2; | |||
920 | } | |||
921 | } | |||
922 | ||||
923 | } else if (kw == 6) { | |||
924 | ||||
925 | if (l < (n - 1) || off < m) { | |||
926 | for (i = 0; i <= (wid - 2); i += 2) { | |||
927 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; | |||
928 | p5 = sp[0]; | |||
929 | p6 = sp[chan1]; | |||
930 | ||||
931 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5; | |||
932 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5; | |||
933 | ||||
934 | sp += chan2; | |||
935 | } | |||
936 | ||||
937 | } else { | |||
938 | for (i = 0; i <= (wid - 2); i += 2) { | |||
939 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; | |||
940 | p5 = sp[0]; | |||
941 | p6 = sp[chan1]; | |||
942 | ||||
943 | d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]); | |||
944 | d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]); | |||
945 | ||||
946 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
947 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
948 | ||||
949 | buffd[i ] = 0; | |||
950 | buffd[i + 1] = 0; | |||
951 | ||||
952 | sp += chan2; | |||
953 | dp += chan2; | |||
954 | } | |||
955 | } | |||
956 | ||||
957 | } else if (kw == 5) { | |||
958 | ||||
959 | if (l < (n - 1) || off < m) { | |||
960 | for (i = 0; i <= (wid - 2); i += 2) { | |||
961 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; | |||
962 | p4 = sp[0]; | |||
963 | p5 = sp[chan1]; | |||
964 | ||||
965 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4; | |||
966 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4; | |||
967 | ||||
968 | sp += chan2; | |||
969 | } | |||
970 | ||||
971 | } else { | |||
972 | for (i = 0; i <= (wid - 2); i += 2) { | |||
973 | p0 = p2; p1 = p3; p2 = p4; p3 = p5; | |||
974 | p4 = sp[0]; | |||
975 | p5 = sp[chan1]; | |||
976 | ||||
977 | d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]); | |||
978 | d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]); | |||
979 | ||||
980 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
981 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
982 | ||||
983 | buffd[i ] = 0; | |||
984 | buffd[i + 1] = 0; | |||
985 | ||||
986 | sp += chan2; | |||
987 | dp += chan2; | |||
988 | } | |||
989 | } | |||
990 | ||||
991 | } else if (kw == 4) { | |||
992 | ||||
993 | if (l < (n - 1) || off < m) { | |||
994 | for (i = 0; i <= (wid - 2); i += 2) { | |||
995 | p0 = p2; p1 = p3; p2 = p4; | |||
996 | p3 = sp[0]; | |||
997 | p4 = sp[chan1]; | |||
998 | ||||
999 | buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3; | |||
1000 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3; | |||
1001 | ||||
1002 | sp += chan2; | |||
1003 | } | |||
1004 | ||||
1005 | } else { | |||
1006 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1007 | p0 = p2; p1 = p3; p2 = p4; | |||
1008 | p3 = sp[0]; | |||
1009 | p4 = sp[chan1]; | |||
1010 | ||||
1011 | d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]); | |||
1012 | d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]); | |||
1013 | ||||
1014 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
1015 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
1016 | ||||
1017 | buffd[i ] = 0; | |||
1018 | buffd[i + 1] = 0; | |||
1019 | ||||
1020 | sp += chan2; | |||
1021 | dp += chan2; | |||
1022 | } | |||
1023 | } | |||
1024 | ||||
1025 | } else if (kw == 3) { | |||
1026 | ||||
1027 | if (l < (n - 1) || off < m) { | |||
1028 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1029 | p0 = p2; p1 = p3; | |||
1030 | p2 = sp[0]; | |||
1031 | p3 = sp[chan1]; | |||
1032 | ||||
1033 | buffd[i ] += p0*k0 + p1*k1 + p2*k2; | |||
1034 | buffd[i + 1] += p1*k0 + p2*k1 + p3*k2; | |||
1035 | ||||
1036 | sp += chan2; | |||
1037 | } | |||
1038 | ||||
1039 | } else { | |||
1040 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1041 | p0 = p2; p1 = p3; | |||
1042 | p2 = sp[0]; | |||
1043 | p3 = sp[chan1]; | |||
1044 | ||||
1045 | d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i ]); | |||
1046 | d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]); | |||
1047 | ||||
1048 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
1049 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
1050 | ||||
1051 | buffd[i ] = 0; | |||
1052 | buffd[i + 1] = 0; | |||
1053 | ||||
1054 | sp += chan2; | |||
1055 | dp += chan2; | |||
1056 | } | |||
1057 | } | |||
1058 | ||||
1059 | } else if (kw == 2) { | |||
1060 | ||||
1061 | if (l < (n - 1) || off < m) { | |||
1062 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1063 | p0 = p2; | |||
1064 | p1 = sp[0]; | |||
1065 | p2 = sp[chan1]; | |||
1066 | ||||
1067 | buffd[i ] += p0*k0 + p1*k1; | |||
1068 | buffd[i + 1] += p1*k0 + p2*k1; | |||
1069 | ||||
1070 | sp += chan2; | |||
1071 | } | |||
1072 | ||||
1073 | } else { | |||
1074 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1075 | p0 = p2; | |||
1076 | p1 = sp[0]; | |||
1077 | p2 = sp[chan1]; | |||
1078 | ||||
1079 | d0 = (p0*k0 + p1*k1 + buffd[i ]); | |||
1080 | d1 = (p1*k0 + p2*k1 + buffd[i + 1]); | |||
1081 | ||||
1082 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
1083 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
1084 | ||||
1085 | buffd[i ] = 0; | |||
1086 | buffd[i + 1] = 0; | |||
1087 | ||||
1088 | sp += chan2; | |||
1089 | dp += chan2; | |||
1090 | } | |||
1091 | } | |||
1092 | ||||
1093 | } else /*if (kw == 1)*/ { | |||
1094 | ||||
1095 | if (l < (n - 1) || off < m) { | |||
1096 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1097 | p0 = sp[0]; | |||
1098 | p1 = sp[chan1]; | |||
1099 | ||||
1100 | buffd[i ] += p0*k0; | |||
1101 | buffd[i + 1] += p1*k0; | |||
1102 | ||||
1103 | sp += chan2; | |||
1104 | } | |||
1105 | ||||
1106 | } else { | |||
1107 | for (i = 0; i <= (wid - 2); i += 2) { | |||
1108 | p0 = sp[0]; | |||
1109 | p1 = sp[chan1]; | |||
1110 | ||||
1111 | d0 = (p0*k0 + buffd[i ]); | |||
1112 | d1 = (p1*k0 + buffd[i + 1]); | |||
1113 | ||||
1114 | STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] = (mlib_s16)d0; | |||
1115 | STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1 ] = (mlib_s16)d1; | |||
1116 | ||||
1117 | buffd[i ] = 0; | |||
1118 | buffd[i + 1] = 0; | |||
1119 | ||||
1120 | sp += chan2; | |||
1121 | dp += chan2; | |||
1122 | } | |||
1123 | } | |||
1124 | } | |||
1125 | } | |||
1126 | } | |||
1127 | ||||
1128 | /* last pixels */ | |||
1129 | for (; i < wid; i++) { | |||
1130 | mlib_s32 *pk = k, s = 0; | |||
1131 | mlib_s32 x; | |||
1132 | ||||
1133 | for (l = 0; l < n; l++) { | |||
1134 | sp = sl + l*sll + i*chan1; | |||
1135 | ||||
1136 | for (x = 0; x < m; x++) { | |||
1137 | s += sp[0] * pk[0]; | |||
1138 | sp += chan1; | |||
1139 | pk ++; | |||
1140 | } | |||
1141 | } | |||
1142 | ||||
1143 | STORE_RES(dp[0], s)s >>= shift2; if (s >= 32767) dp[0] = 32767; else if (s <= (-32767 -1)) dp[0] = (-32767 -1); else dp[0] = (mlib_s16 )s; | |||
1144 | ||||
1145 | sp += chan1; | |||
1146 | dp += chan1; | |||
1147 | } | |||
1148 | ||||
1149 | sl += sll; | |||
1150 | dl += dll; | |||
1151 | } | |||
1152 | } | |||
1153 | ||||
1154 | if (buffd != buff) mlib_free(buffd); | |||
1155 | if (k != k_locl) mlib_free(k); | |||
1156 | ||||
1157 | return MLIB_SUCCESS; | |||
1158 | } | |||
1159 | ||||
1160 | /***************************************************************/ |