Bug Summary

File:jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c
Warning:line 352, column 18
The right operand of '+' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name mlib_ImageConv_8nw.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjava -I /home/daniel/Projects/java/jdk/src/java.base/unix/native/libjava -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -D LIBC=gnu -D _GNU_SOURCE -D _REENTRANT -D _LARGEFILE64_SOURCE -D LINUX -D DEBUG -D _LITTLE_ENDIAN -D ARCH="amd64" -D amd64 -D _LP64=1 -D __USE_J2D_NAMES -D __MEDIALIB_OLD_NAMES -D MLIB_NO_LIBSUNMATH -D MLIB_OS64BIT -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/common/awt/medialib -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/headers/java.desktop -D _FORTIFY_SOURCE=2 -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-unused -Wno-unused-function -std=c99 -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_8nw.c
1/*
2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Internal functions for mlib_ImageConv* on U8/S16/U16 types and
30 * MLIB_EDGE_DST_NO_WRITE mask
31 */
32
33#include "mlib_image.h"
34#include "mlib_ImageConv.h"
35#include "mlib_c_ImageConv.h"
36
37/*
38 This define switches between functions of different data types
39*/
40#define IMG_TYPE1 1
41
42/***************************************************************/
43#if IMG_TYPE1 == 1
44
45#define DTYPEmlib_u8 mlib_u8
46#define CONV_FUNC(KERN)mlib_c_convKERNnw_u8 mlib_c_conv##KERN##nw_u8
47#define CONV_FUNC_I(KERN)mlib_i_convKERNnw_u8 mlib_i_conv##KERN##nw_u8
48#define DSCALE(1 << 24) (1 << 24)
49#define FROM_S32(x)(((x) >> 24) ^ 128) (((x) >> 24) ^ 128)
50#define S64TOS32(x)(x) (x)
51#define SAT_OFF-(1u << 31) -(1u << 31)
52
53#elif IMG_TYPE1 == 2
54
55#define DTYPEmlib_u8 mlib_s16
56#define CONV_FUNC(KERN)mlib_c_convKERNnw_u8 mlib_conv##KERN##nw_s16
57#define CONV_FUNC_I(KERN)mlib_i_convKERNnw_u8 mlib_i_conv##KERN##nw_s16
58#define DSCALE(1 << 24) 65536.0
59#define FROM_S32(x)(((x) >> 24) ^ 128) ((x) >> 16)
60#define S64TOS32(x)(x) ((x) & 0xffffffff)
61#define SAT_OFF-(1u << 31)
62
63#elif IMG_TYPE1 == 3
64
65#define DTYPEmlib_u8 mlib_u16
66#define CONV_FUNC(KERN)mlib_c_convKERNnw_u8 mlib_conv##KERN##nw_u16
67#define CONV_FUNC_I(KERN)mlib_i_convKERNnw_u8 mlib_i_conv##KERN##nw_u16
68#define DSCALE(1 << 24) 65536.0
69#define FROM_S32(x)(((x) >> 24) ^ 128) (((x) >> 16) ^ 0x8000)
70#define S64TOS32(x)(x) (x)
71#define SAT_OFF-(1u << 31) -(1u << 31)
72
73#endif /* IMG_TYPE == 1 */
74
75/***************************************************************/
76#define BUFF_SIZE1600 1600
77
78#define CACHE_SIZE(64*1024) (64*1024)
79
80/***************************************************************/
81#define FTYPEmlib_d64 mlib_d64
82
83#ifndef MLIB_USE_FTOI_CLAMPING
84
85#define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >=
2147483647) ? 2147483647 : (mlib_s32)(x)))
\
86 (((x) <= MLIB_S32_MIN(-2147483647 -1)) ? MLIB_S32_MIN(-2147483647 -1) : (((x) >= MLIB_S32_MAX2147483647) ? MLIB_S32_MAX2147483647 : (mlib_s32)(x)))
87
88#else
89
90#define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >=
2147483647) ? 2147483647 : (mlib_s32)(x)))
((mlib_s32)(x))
91
92#endif /* MLIB_USE_FTOI_CLAMPING */
93
94/***************************************************************/
95#define D2I(x)((((x) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((x) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((x) -(1u << 31))))
CLAMP_S32((x) SAT_OFF)((((x) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((x) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((x) -(1u << 31))))
96
97/***************************************************************/
98#ifdef _LITTLE_ENDIAN1
99
100#define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \
101 dp[0 ] = res1; \
102 dp[chan1] = res0
103
104#else
105
106#define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \
107 dp[0 ] = res0; \
108 dp[chan1] = res1
109
110#endif /* _LITTLE_ENDIAN */
111
112/***************************************************************/
113#ifdef _NO_LONGLONG
114
115#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
((mlib_s64)sp[0])
\
116 buff[i ] = sp[0]; \
117 buff[i + 1] = sp[chan1]
118
119#else /* _NO_LONGLONG */
120
121#ifdef _LITTLE_ENDIAN1
122
123#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
((mlib_s64)sp[0])
\
124 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])((mlib_s64)sp[0])
125
126#else /* _LITTLE_ENDIAN */
127
128#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
((mlib_s64)sp[0])
\
129 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])((mlib_s64)sp[chan1])
130
131#endif /* _LITTLE_ENDIAN */
132#endif /* _NO_LONGLONG */
133
134/***************************************************************/
135typedef union {
136 mlib_d64 d64;
137 struct {
138 mlib_s32 i0;
139 mlib_s32 i1;
140 } i32s;
141 struct {
142 mlib_s32 f0;
143 mlib_s32 f1;
144 } f32s;
145} d64_2x32;
146
147/***************************************************************/
148#define DEF_VARS(type)type *adr_src, *sl, *sp = ((void*)0); type *adr_dst, *dl, *dp
= ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt, sll
, dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c
\
149 type *adr_src, *sl, *sp = NULL((void*)0); \
150 type *adr_dst, *dl, *dp = NULL((void*)0); \
151 FTYPEmlib_d64 *pbuff = buff; \
152 mlib_s32 wid, hgt, sll, dll; \
153 mlib_s32 nchannel, chan1; \
154 mlib_s32 i, j, c
155
156/***************************************************************/
157#define GET_SRC_DST_PARAMETERS(type)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(type); dll = mlib_ImageGetStride(dst) / sizeof
(type); adr_src = (type *)mlib_ImageGetData(src); adr_dst = (
type *)mlib_ImageGetData(dst)
\
158 hgt = mlib_ImageGetHeight(src); \
159 wid = mlib_ImageGetWidth(src); \
160 nchannel = mlib_ImageGetChannels(src); \
161 sll = mlib_ImageGetStride(src) / sizeof(type); \
162 dll = mlib_ImageGetStride(dst) / sizeof(type); \
163 adr_src = (type *)mlib_ImageGetData(src); \
164 adr_dst = (type *)mlib_ImageGetData(dst)
165
166/***************************************************************/
167#if IMG_TYPE1 == 1
168
169/* Test for the presence of any "1" bit in bits
170 8 to 31 of val. If present, then val is either
171 negative or >255. If over/underflows of 8 bits
172 are uncommon, then this technique can be a win,
173 since only a single test, rather than two, is
174 necessary to determine if clamping is needed.
175 On the other hand, if over/underflows are common,
176 it adds an extra test.
177*/
178#define CLAMP_STORE(dst, val)if (val & 0xffffff00) { if (val < 0) dst = 0; else dst
= (127*2 +1); } else { dst = (mlib_u8)val; }
\
179 if (val & 0xffffff00) { \
180 if (val < MLIB_U8_MIN0) \
181 dst = MLIB_U8_MIN0; \
182 else \
183 dst = MLIB_U8_MAX(127*2 +1); \
184 } else { \
185 dst = (mlib_u8)val; \
186 }
187
188#elif IMG_TYPE1 == 2
189
190#define CLAMP_STORE(dst, val)if (val & 0xffffff00) { if (val < 0) dst = 0; else dst
= (127*2 +1); } else { dst = (mlib_u8)val; }
\
191 if (val >= MLIB_S16_MAX32767) \
192 dst = MLIB_S16_MAX32767; \
193 else if (val <= MLIB_S16_MIN(-32767 -1)) \
194 dst = MLIB_S16_MIN(-32767 -1); \
195 else \
196 dst = (mlib_s16)val
197
198#elif IMG_TYPE1 == 3
199
200#define CLAMP_STORE(dst, val)if (val & 0xffffff00) { if (val < 0) dst = 0; else dst
= (127*2 +1); } else { dst = (mlib_u8)val; }
\
201 if (val >= MLIB_U16_MAX(32767 *2 +1)) \
202 dst = MLIB_U16_MAX(32767 *2 +1); \
203 else if (val <= MLIB_U16_MIN0) \
204 dst = MLIB_U16_MIN0; \
205 else \
206 dst = (mlib_u16)val
207
208#endif /* IMG_TYPE == 1 */
209
210/***************************************************************/
211#define MAX_KER7 7
212#define MAX_N15 15
213
214static mlib_status mlib_ImageConv1xN(mlib_image *dst,
215 const mlib_image *src,
216 const mlib_d64 *k,
217 mlib_s32 n,
218 mlib_s32 dn,
219 mlib_s32 cmask)
220{
221 FTYPEmlib_d64 buff[BUFF_SIZE1600];
222 mlib_s32 off, kh;
223 mlib_s32 d0, d1;
224 const FTYPEmlib_d64 *pk;
225 FTYPEmlib_d64 k0, k1, k2, k3;
226 FTYPEmlib_d64 p0, p1, p2, p3, p4;
227 DEF_VARS(DTYPE)mlib_u8 *adr_src, *sl, *sp = ((void*)0); mlib_u8 *adr_dst, *dl
, *dp = ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt
, sll, dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c
;
228 DTYPEmlib_u8 *sl_c, *dl_c, *sl0;
229 mlib_s32 l, hsize, max_hsize;
230 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_u8); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_u8); adr_src = (mlib_u8 *)mlib_ImageGetData(src); adr_dst
= (mlib_u8 *)mlib_ImageGetData(dst)
;
231
232 hgt -= (n - 1);
233 adr_dst += dn*dll;
234
235 max_hsize = (CACHE_SIZE(64*1024)/sizeof(DTYPEmlib_u8))/sll;
236
237 if (!max_hsize) max_hsize = 1;
1
Assuming 'max_hsize' is not equal to 0
2
Taking false branch
238
239 if (max_hsize > BUFF_SIZE1600) {
3
Assuming 'max_hsize' is <= BUFF_SIZE
4
Taking false branch
240 pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*max_hsize);
241 }
242
243 chan1 = nchannel;
244
245 sl_c = adr_src;
246 dl_c = adr_dst;
247
248 for (l = 0; l < hgt; l += hsize) {
5
Assuming 'l' is < 'hgt'
6
Loop condition is true. Entering loop body
16
Assuming 'l' is < 'hgt'
17
Loop condition is true. Entering loop body
249 hsize = hgt - l;
250
251 if (hsize > max_hsize) hsize = max_hsize;
7
Assuming 'hsize' is > 'max_hsize'
8
Taking true branch
18
Assuming 'hsize' is <= 'max_hsize'
19
Taking false branch
252
253 for (c = 0; c < nchannel; c++) {
9
Assuming 'c' is < 'nchannel'
10
Loop condition is true. Entering loop body
14
Assuming 'c' is >= 'nchannel'
15
Loop condition is false. Execution continues on line 411
20
Loop condition is true. Entering loop body
254 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
11
Assuming the condition is true
12
Taking true branch
13
Execution continues on line 253
21
Assuming the condition is false
22
Taking false branch
255
256 sl = sl_c + c;
257 dl = dl_c + c;
258
259 for (j = 0; j < hsize; j++) pbuff[j] = 0.0;
23
Assuming 'j' is >= 'hsize'
24
Loop condition is false. Execution continues on line 261
260
261 for (i = 0; i < wid; i++) {
25
Assuming 'i' is < 'wid'
26
Loop condition is true. Entering loop body
262 sl0 = sl;
263
264 for (off = 0; off < (n - 4); off += 4) {
27
Assuming the condition is false
28
Loop condition is false. Execution continues on line 286
265 pk = k + off;
266 sp = sl0;
267
268 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
269 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
270 sp += 3*sll;
271
272 for (j = 0; j < hsize; j += 2) {
273 p0 = p2; p1 = p3; p2 = p4;
274 p3 = sp[0];
275 p4 = sp[sll];
276
277 pbuff[j ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
278 pbuff[j + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
279
280 sp += 2*sll;
281 }
282
283 sl0 += 4*sll;
284 }
285
286 pk = k + off;
287 sp = sl0;
288
289 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
290 p2 = sp[0]; p3 = sp[sll]; p4 = sp[2*sll];
291
292 dp = dl;
293 kh = n - off;
294
295 if (kh == 4) {
29
Assuming 'kh' is not equal to 4
30
Taking false branch
296 sp += 3*sll;
297
298 for (j = 0; j <= (hsize - 2); j += 2) {
299 p0 = p2; p1 = p3; p2 = p4;
300 p3 = sp[0];
301 p4 = sp[sll];
302
303 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]) -(1u << 31
)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1
*k1 + p2*k2 + p3*k3 + pbuff[j]) -(1u << 31)) >= 2147483647
) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff
[j]) -(1u << 31))))
;
304 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]) -(1u <<
31)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 +
p2*k1 + p3*k2 + p4*k3 + pbuff[j + 1]) -(1u << 31)) >=
2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2
+ p4*k3 + pbuff[j + 1]) -(1u << 31))))
;
305
306 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
307 dp[dll] = FROM_S32(d1)(((d1) >> 24) ^ 128);
308
309 pbuff[j] = 0;
310 pbuff[j + 1] = 0;
311
312 sp += 2*sll;
313 dp += 2*dll;
314 }
315
316 if (j < hsize) {
317 p0 = p2; p1 = p3; p2 = p4;
318 p3 = sp[0];
319
320 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff[j]) -(1u << 31
)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1
*k1 + p2*k2 + p3*k3 + pbuff[j]) -(1u << 31)) >= 2147483647
) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + pbuff
[j]) -(1u << 31))))
;
321
322 pbuff[j] = 0;
323
324 dp[0] = FROM_S32(d0)(((d0) >> 24) ^ 128);
325 }
326
327 } else if (kh == 3) {
31
Assuming 'kh' is equal to 3
32
Taking true branch
328 sp += 2*sll;
329
330 for (j = 0; j <= (hsize - 2); j += 2) {
33
The value 0 is assigned to 'j'
34
Assuming the condition is false
35
Loop condition is false. Execution continues on line 348
331 p0 = p2; p1 = p3;
332 p2 = sp[0];
333 p3 = sp[sll];
334
335 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + pbuff[j]) -(1u << 31)) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2
*k2 + pbuff[j]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + pbuff[j]) -(1u <<
31))))
;
336 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1])((((p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]) -(1u << 31)) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3
*k2 + pbuff[j + 1]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + pbuff[j + 1]) -(1u <<
31))))
;
337
338 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
339 dp[dll] = FROM_S32(d1)(((d1) >> 24) ^ 128);
340
341 pbuff[j] = 0;
342 pbuff[j + 1] = 0;
343
344 sp += 2*sll;
345 dp += 2*dll;
346 }
347
348 if (j < hsize) {
36
Assuming 'j' is < 'hsize'
37
Taking true branch
349 p0 = p2; p1 = p3;
350 p2 = sp[0];
351
352 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + pbuff[j])((((p0*k0 + p1*k1 + p2*k2 + pbuff[j]) -(1u << 31)) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2
*k2 + pbuff[j]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + pbuff[j]) -(1u <<
31))))
;
38
The right operand of '+' is a garbage value
353
354 pbuff[j] = 0;
355
356 dp[0] = FROM_S32(d0)(((d0) >> 24) ^ 128);
357 }
358
359 } else if (kh == 2) {
360 sp += sll;
361
362 for (j = 0; j <= (hsize - 2); j += 2) {
363 p0 = p2;
364 p1 = sp[0];
365 p2 = sp[sll];
366
367 d0 = D2I(p0*k0 + p1*k1 + pbuff[j])((((p0*k0 + p1*k1 + pbuff[j]) -(1u << 31)) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + pbuff[j]) -(1u
<< 31)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0
*k0 + p1*k1 + pbuff[j]) -(1u << 31))))
;
368 d1 = D2I(p1*k0 + p2*k1 + pbuff[j + 1])((((p1*k0 + p2*k1 + pbuff[j + 1]) -(1u << 31)) <= (-
2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + pbuff
[j + 1]) -(1u << 31)) >= 2147483647) ? 2147483647 : (
mlib_s32)((p1*k0 + p2*k1 + pbuff[j + 1]) -(1u << 31))))
;
369
370 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
371 dp[dll] = FROM_S32(d1)(((d1) >> 24) ^ 128);
372
373 pbuff[j] = 0;
374 pbuff[j + 1] = 0;
375
376 sp += 2*sll;
377 dp += 2*dll;
378 }
379
380 if (j < hsize) {
381 p0 = p2;
382 p1 = sp[0];
383
384 d0 = D2I(p0*k0 + p1*k1 + pbuff[j])((((p0*k0 + p1*k1 + pbuff[j]) -(1u << 31)) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + pbuff[j]) -(1u
<< 31)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0
*k0 + p1*k1 + pbuff[j]) -(1u << 31))))
;
385
386 pbuff[j] = 0;
387
388 dp[0] = FROM_S32(d0)(((d0) >> 24) ^ 128);
389 }
390
391 } else /* if (kh == 1) */ {
392 for (j = 0; j < hsize; j++) {
393 p0 = sp[0];
394
395 d0 = D2I(p0*k0 + pbuff[j])((((p0*k0 + pbuff[j]) -(1u << 31)) <= (-2147483647 -
1)) ? (-2147483647 -1) : ((((p0*k0 + pbuff[j]) -(1u << 31
)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + pbuff
[j]) -(1u << 31))))
;
396
397 dp[0] = FROM_S32(d0)(((d0) >> 24) ^ 128);
398
399 pbuff[j] = 0;
400
401 sp += sll;
402 dp += dll;
403 }
404 }
405
406 sl += chan1;
407 dl += chan1;
408 }
409 }
410
411 sl_c += max_hsize*sll;
412 dl_c += max_hsize*dll;
413 }
414
415 if (pbuff != buff) mlib_free(pbuff);
416
417 return MLIB_SUCCESS;
418}
419
420/***************************************************************/
421mlib_status CONV_FUNC(MxN)mlib_c_convMxNnw_u8(mlib_image *dst,
422 const mlib_image *src,
423 const mlib_s32 *kernel,
424 mlib_s32 m,
425 mlib_s32 n,
426 mlib_s32 dm,
427 mlib_s32 dn,
428 mlib_s32 scale,
429 mlib_s32 cmask)
430{
431 FTYPEmlib_d64 buff[BUFF_SIZE1600], *buffs_arr[2*(MAX_N15 + 1)];
432 FTYPEmlib_d64 **buffs = buffs_arr, *buffd;
433 FTYPEmlib_d64 akernel[256], *k = akernel, fscale = DSCALE(1 << 24);
434 mlib_s32 mn, l, off, kw, bsize, buff_ind;
435 mlib_s32 d0, d1;
436 FTYPEmlib_d64 k0, k1, k2, k3, k4, k5, k6;
437 FTYPEmlib_d64 p0, p1, p2, p3, p4, p5, p6, p7;
438 d64_2x32 dd;
439 DEF_VARS(DTYPE)mlib_u8 *adr_src, *sl, *sp = ((void*)0); mlib_u8 *adr_dst, *dl
, *dp = ((void*)0); mlib_d64 *pbuff = buff; mlib_s32 wid, hgt
, sll, dll; mlib_s32 nchannel, chan1; mlib_s32 i, j, c
;
440 mlib_s32 chan2;
441 mlib_s32 *buffo, *buffi;
442 mlib_status status = MLIB_SUCCESS;
443
444 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_u8); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_u8); adr_src = (mlib_u8 *)mlib_ImageGetData(src); adr_dst
= (mlib_u8 *)mlib_ImageGetData(dst)
;
445
446 if (scale > 30) {
447 fscale *= 1.0/(1 << 30);
448 scale -= 30;
449 }
450
451 fscale /= (1 << scale);
452
453 mn = m*n;
454
455 if (mn > 256) {
456 k = mlib_malloc(mn*sizeof(mlib_d64));
457
458 if (k == NULL((void*)0)) return MLIB_FAILURE;
459 }
460
461 for (i = 0; i < mn; i++) {
462 k[i] = kernel[i]*fscale;
463 }
464
465 if (m == 1) {
466 status = mlib_ImageConv1xN(dst, src, k, n, dn, cmask);
467 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
468 }
469
470 bsize = (n + 3)*wid;
471
472 if ((bsize > BUFF_SIZE1600) || (n > MAX_N15)) {
473 pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*bsize + sizeof(FTYPEmlib_d64 *)*2*(n + 1));
474
475 if (pbuff == NULL((void*)0)) {
476 status = MLIB_FAILURE;
477 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
478 }
479 buffs = (FTYPEmlib_d64 **)(pbuff + bsize);
480 }
481
482 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*wid;
483 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
484 buffd = buffs[n] + wid;
485 buffo = (mlib_s32*)(buffd + wid);
486 buffi = buffo + (wid &~ 1);
487
488 chan1 = nchannel;
489 chan2 = chan1 + chan1;
490
491 wid -= (m - 1);
492 hgt -= (n - 1);
493 adr_dst += dn*dll + dm*nchannel;
494
495 for (c = 0; c < nchannel; c++) {
496 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
497
498 sl = adr_src + c;
499 dl = adr_dst + c;
500
501 for (l = 0; l < n; l++) {
502 FTYPEmlib_d64 *buff = buffs[l];
503
504 for (i = 0; i < wid + (m - 1); i++) {
505 buff[i] = (FTYPEmlib_d64)sl[i*chan1];
506 }
507
508 sl += sll;
509 }
510
511 buff_ind = 0;
512
513 for (i = 0; i < wid; i++) buffd[i] = 0.0;
514
515 for (j = 0; j < hgt; j++) {
516 FTYPEmlib_d64 **buffc = buffs + buff_ind;
517 FTYPEmlib_d64 *buffn = buffc[n];
518 FTYPEmlib_d64 *pk = k;
519
520 for (l = 0; l < n; l++) {
521 FTYPEmlib_d64 *buff_l = buffc[l];
522
523 for (off = 0; off < m;) {
524 FTYPEmlib_d64 *buff = buff_l + off;
525
526 kw = m - off;
527
528 if (kw > 2*MAX_KER7) kw = MAX_KER7; else
529 if (kw > MAX_KER7) kw = kw/2;
530 off += kw;
531
532 sp = sl;
533 dp = dl;
534
535 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
536 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
537
538 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
539 k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
540 pk += kw;
541
542 if (kw == 7) {
543
544 if (l < (n - 1) || off < m) {
545 for (i = 0; i <= (wid - 2); i += 2) {
546 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
547
548 p6 = buff[i + 6]; p7 = buff[i + 7];
549
550 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
551 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
552 }
553
554 } else {
555 for (i = 0; i <= (wid - 2); i += 2) {
556 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
557
558 p6 = buff[i + 6]; p7 = buff[i + 7];
559
560 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| ((mlib_s64)sp[0])
;
561
562 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
563 buffn[i ] = (FTYPEmlib_d64)dd.i32s.i0;
564 buffn[i + 1] = (FTYPEmlib_d64)dd.i32s.i1;
565
566 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd
[i ]) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6
*k6 + buffd[i ]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 +
p6*k6 + buffd[i ]) -(1u << 31))))
;
567 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd
[i + 1]) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7
*k6 + buffd[i + 1]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 +
p7*k6 + buffd[i + 1]) -(1u << 31))))
;
568
569 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
570 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
571
572 buffd[i ] = 0.0;
573 buffd[i + 1] = 0.0;
574
575 sp += chan2;
576 dp += chan2;
577 }
578 }
579
580 } else if (kw == 6) {
581
582 if (l < (n - 1) || off < m) {
583 for (i = 0; i <= (wid - 2); i += 2) {
584 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
585
586 p5 = buff[i + 5]; p6 = buff[i + 6];
587
588 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
589 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
590 }
591
592 } else {
593 for (i = 0; i <= (wid - 2); i += 2) {
594 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
595
596 p5 = buff[i + 5]; p6 = buff[i + 6];
597
598 buffn[i ] = (FTYPEmlib_d64)sp[0];
599 buffn[i + 1] = (FTYPEmlib_d64)sp[chan1];
600
601 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]
) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647 -
1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd
[i ]) -(1u << 31)) >= 2147483647) ? 2147483647 : (mlib_s32
)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])
-(1u << 31))))
;
602 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i +
1]) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd
[i + 1]) -(1u << 31)) >= 2147483647) ? 2147483647 : (
mlib_s32)((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd
[i + 1]) -(1u << 31))))
;
603
604 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
605 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
606
607 buffd[i ] = 0.0;
608 buffd[i + 1] = 0.0;
609
610 sp += chan2;
611 dp += chan2;
612 }
613 }
614
615 } else if (kw == 5) {
616
617 if (l < (n - 1) || off < m) {
618 for (i = 0; i <= (wid - 2); i += 2) {
619 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
620
621 p4 = buff[i + 4]; p5 = buff[i + 5];
622
623 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
624 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
625 }
626
627 } else {
628 for (i = 0; i <= (wid - 2); i += 2) {
629 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
630
631 p4 = buff[i + 4]; p5 = buff[i + 5];
632
633 buffn[i ] = (FTYPEmlib_d64)sp[0];
634 buffn[i + 1] = (FTYPEmlib_d64)sp[chan1];
635
636 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]) -(1u <<
31)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 +
p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]) -(1u << 31)
) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 +
p2*k2 + p3*k3 + p4*k4 + buffd[i ]) -(1u << 31))))
;
637 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]) -(1u
<< 31)) <= (-2147483647 -1)) ? (-2147483647 -1) : (
(((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]) -(1u
<< 31)) >= 2147483647) ? 2147483647 : (mlib_s32)((p1
*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]) -(1u <<
31))))
;
638
639 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
640 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
641
642 buffd[i ] = 0.0;
643 buffd[i + 1] = 0.0;
644
645 sp += chan2;
646 dp += chan2;
647 }
648 }
649
650 } else if (kw == 4) {
651
652 if (l < (n - 1) || off < m) {
653 for (i = 0; i <= (wid - 2); i += 2) {
654 p0 = p2; p1 = p3; p2 = p4;
655
656 p3 = buff[i + 3]; p4 = buff[i + 4];
657
658 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
659 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
660 }
661
662 } else {
663 for (i = 0; i <= (wid - 2); i += 2) {
664 p0 = p2; p1 = p3; p2 = p4;
665
666 p3 = buff[i + 3]; p4 = buff[i + 4];
667
668 buffn[i ] = (FTYPEmlib_d64)sp[0];
669 buffn[i + 1] = (FTYPEmlib_d64)sp[chan1];
670
671 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]) -(1u << 31
)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1
*k1 + p2*k2 + p3*k3 + buffd[i ]) -(1u << 31)) >= 2147483647
) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd
[i ]) -(1u << 31))))
;
672 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]) -(1u <<
31)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 +
p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]) -(1u << 31)) >=
2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2
+ p4*k3 + buffd[i + 1]) -(1u << 31))))
;
673
674 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
675 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
676
677 buffd[i ] = 0.0;
678 buffd[i + 1] = 0.0;
679
680 sp += chan2;
681 dp += chan2;
682 }
683 }
684
685 } else if (kw == 3) {
686
687 if (l < (n - 1) || off < m) {
688 for (i = 0; i <= (wid - 2); i += 2) {
689 p0 = p2; p1 = p3;
690
691 p2 = buff[i + 2]; p3 = buff[i + 3];
692
693 buffd[i ] += p0*k0 + p1*k1 + p2*k2;
694 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
695 }
696
697 } else {
698 for (i = 0; i <= (wid - 2); i += 2) {
699 p0 = p2; p1 = p3;
700
701 p2 = buff[i + 2]; p3 = buff[i + 3];
702
703 buffn[i ] = (FTYPEmlib_d64)sp[0];
704 buffn[i + 1] = (FTYPEmlib_d64)sp[chan1];
705
706 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + buffd[i ]) -(1u << 31)) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2
*k2 + buffd[i ]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + buffd[i ]) -(1u <<
31))))
;
707 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]) -(1u << 31)) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3
*k2 + buffd[i + 1]) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]) -(1u <<
31))))
;
708
709 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
710 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
711
712 buffd[i ] = 0.0;
713 buffd[i + 1] = 0.0;
714
715 sp += chan2;
716 dp += chan2;
717 }
718 }
719
720 } else /*if (kw == 2)*/ {
721
722 if (l < (n - 1) || off < m) {
723 for (i = 0; i <= (wid - 2); i += 2) {
724 p0 = p2;
725
726 p1 = buff[i + 1]; p2 = buff[i + 2];
727
728 buffd[i ] += p0*k0 + p1*k1;
729 buffd[i + 1] += p1*k0 + p2*k1;
730 }
731
732 } else {
733 for (i = 0; i <= (wid - 2); i += 2) {
734 p0 = p2;
735
736 p1 = buff[i + 1]; p2 = buff[i + 2];
737
738 buffn[i ] = (FTYPEmlib_d64)sp[0];
739 buffn[i + 1] = (FTYPEmlib_d64)sp[chan1];
740
741 d0 = D2I(p0*k0 + p1*k1 + buffd[i ])((((p0*k0 + p1*k1 + buffd[i ]) -(1u << 31)) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + buffd[i ]) -(1u
<< 31)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0
*k0 + p1*k1 + buffd[i ]) -(1u << 31))))
;
742 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1])((((p1*k0 + p2*k1 + buffd[i + 1]) -(1u << 31)) <= (-
2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + buffd
[i + 1]) -(1u << 31)) >= 2147483647) ? 2147483647 : (
mlib_s32)((p1*k0 + p2*k1 + buffd[i + 1]) -(1u << 31))))
;
743
744 dp[0 ] = FROM_S32(d0)(((d0) >> 24) ^ 128);
745 dp[chan1] = FROM_S32(d1)(((d1) >> 24) ^ 128);
746
747 buffd[i ] = 0.0;
748 buffd[i + 1] = 0.0;
749
750 sp += chan2;
751 dp += chan2;
752 }
753 }
754 }
755 }
756 }
757
758 /* last pixels */
759 for (; i < wid; i++) {
760 FTYPEmlib_d64 *pk = k, s = 0;
761 mlib_s32 x, d0;
762
763 for (l = 0; l < n; l++) {
764 FTYPEmlib_d64 *buff = buffc[l] + i;
765
766 for (x = 0; x < m; x++) s += buff[x] * (*pk++);
767 }
768
769 d0 = D2I(s)((((s) -(1u << 31)) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((s) -(1u << 31)) >= 2147483647) ? 2147483647
: (mlib_s32)((s) -(1u << 31))))
;
770 dp[0] = FROM_S32(d0)(((d0) >> 24) ^ 128);
771
772 buffn[i] = (FTYPEmlib_d64)sp[0];
773
774 sp += chan1;
775 dp += chan1;
776 }
777
778 for (l = 0; l < (m - 1); l++) buffn[wid + l] = sp[l*chan1];
779
780 /* next line */
781 sl += sll;
782 dl += dll;
783
784 buff_ind++;
785
786 if (buff_ind >= n + 1) buff_ind = 0;
787 }
788 }
789
790 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
791}
792
793/***************************************************************/
794/* for x86, using integer multiplies is faster */
795
796#define STORE_RES(res, x)x >>= shift2; if (x & 0xffffff00) { if (x < 0) res
= 0; else res = (127*2 +1); } else { res = (mlib_u8)x; }
\
797 x >>= shift2; \
798 CLAMP_STORE(res, x)if (x & 0xffffff00) { if (x < 0) res = 0; else res = (
127*2 +1); } else { res = (mlib_u8)x; }
799
800mlib_status CONV_FUNC_I(MxN)mlib_i_convMxNnw_u8(mlib_image *dst,
801 const mlib_image *src,
802 const mlib_s32 *kernel,
803 mlib_s32 m,
804 mlib_s32 n,
805 mlib_s32 dm,
806 mlib_s32 dn,
807 mlib_s32 scale,
808 mlib_s32 cmask)
809{
810 mlib_s32 buff[BUFF_SIZE1600], *buffd = buff;
811 mlib_s32 l, off, kw;
812 mlib_s32 d0, d1, shift1, shift2;
813 mlib_s32 k0, k1, k2, k3, k4, k5, k6;
814 mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
815 DTYPEmlib_u8 *adr_src, *sl, *sp = NULL((void*)0);
816 DTYPEmlib_u8 *adr_dst, *dl, *dp = NULL((void*)0);
817 mlib_s32 wid, hgt, sll, dll;
818 mlib_s32 nchannel, chan1;
819 mlib_s32 i, j, c;
820 mlib_s32 chan2;
821 mlib_s32 k_locl[MAX_N15*MAX_N15], *k = k_locl;
822 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_u8); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_u8); adr_src = (mlib_u8 *)mlib_ImageGetData(src); adr_dst
= (mlib_u8 *)mlib_ImageGetData(dst)
;
823
824#if IMG_TYPE1 != 1
825 shift1 = 16;
826#else
827 shift1 = 8;
828#endif /* IMG_TYPE != 1 */
829 shift2 = scale - shift1;
830
831 chan1 = nchannel;
832 chan2 = chan1 + chan1;
833
834 wid -= (m - 1);
835 hgt -= (n - 1);
836 adr_dst += dn*dll + dm*nchannel;
837
838 if (wid > BUFF_SIZE1600) {
839 buffd = mlib_malloc(sizeof(mlib_s32)*wid);
840
841 if (buffd == NULL((void*)0)) return MLIB_FAILURE;
842 }
843
844 if (m*n > MAX_N15*MAX_N15) {
845 k = mlib_malloc(sizeof(mlib_s32)*(m*n));
846
847 if (k == NULL((void*)0)) {
848 if (buffd != buff) mlib_free(buffd);
849 return MLIB_FAILURE;
850 }
851 }
852
853 for (i = 0; i < m*n; i++) {
854 k[i] = kernel[i] >> shift1;
855 }
856
857 for (c = 0; c < nchannel; c++) {
858 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
859
860 sl = adr_src + c;
861 dl = adr_dst + c;
862
863 for (i = 0; i < wid; i++) buffd[i] = 0;
864
865 for (j = 0; j < hgt; j++) {
866 mlib_s32 *pk = k;
867
868 for (l = 0; l < n; l++) {
869 DTYPEmlib_u8 *sp0 = sl + l*sll;
870
871 for (off = 0; off < m;) {
872 sp = sp0 + off*chan1;
873 dp = dl;
874
875 kw = m - off;
876
877 if (kw > 2*MAX_KER7) kw = MAX_KER7; else
878 if (kw > MAX_KER7) kw = kw/2;
879 off += kw;
880
881 p2 = sp[0]; p3 = sp[chan1]; p4 = sp[chan2];
882 p5 = sp[chan2 + chan1]; p6 = sp[chan2 + chan2]; p7 = sp[5*chan1];
883
884 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
885 k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
886 pk += kw;
887
888 sp += (kw - 1)*chan1;
889
890 if (kw == 7) {
891
892 if (l < (n - 1) || off < m) {
893 for (i = 0; i <= (wid - 2); i += 2) {
894 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
895 p6 = sp[0];
896 p7 = sp[chan1];
897
898 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
899 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
900
901 sp += chan2;
902 }
903
904 } else {
905 for (i = 0; i <= (wid - 2); i += 2) {
906 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
907 p6 = sp[0];
908 p7 = sp[chan1];
909
910 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]);
911 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
912
913 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
914 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
915
916 buffd[i ] = 0;
917 buffd[i + 1] = 0;
918
919 sp += chan2;
920 dp += chan2;
921 }
922 }
923
924 } else if (kw == 6) {
925
926 if (l < (n - 1) || off < m) {
927 for (i = 0; i <= (wid - 2); i += 2) {
928 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
929 p5 = sp[0];
930 p6 = sp[chan1];
931
932 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
933 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
934
935 sp += chan2;
936 }
937
938 } else {
939 for (i = 0; i <= (wid - 2); i += 2) {
940 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
941 p5 = sp[0];
942 p6 = sp[chan1];
943
944 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]);
945 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
946
947 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
948 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
949
950 buffd[i ] = 0;
951 buffd[i + 1] = 0;
952
953 sp += chan2;
954 dp += chan2;
955 }
956 }
957
958 } else if (kw == 5) {
959
960 if (l < (n - 1) || off < m) {
961 for (i = 0; i <= (wid - 2); i += 2) {
962 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
963 p4 = sp[0];
964 p5 = sp[chan1];
965
966 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
967 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
968
969 sp += chan2;
970 }
971
972 } else {
973 for (i = 0; i <= (wid - 2); i += 2) {
974 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
975 p4 = sp[0];
976 p5 = sp[chan1];
977
978 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]);
979 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
980
981 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
982 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
983
984 buffd[i ] = 0;
985 buffd[i + 1] = 0;
986
987 sp += chan2;
988 dp += chan2;
989 }
990 }
991
992 } else if (kw == 4) {
993
994 if (l < (n - 1) || off < m) {
995 for (i = 0; i <= (wid - 2); i += 2) {
996 p0 = p2; p1 = p3; p2 = p4;
997 p3 = sp[0];
998 p4 = sp[chan1];
999
1000 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1001 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1002
1003 sp += chan2;
1004 }
1005
1006 } else {
1007 for (i = 0; i <= (wid - 2); i += 2) {
1008 p0 = p2; p1 = p3; p2 = p4;
1009 p3 = sp[0];
1010 p4 = sp[chan1];
1011
1012 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]);
1013 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1014
1015 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
1016 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
1017
1018 buffd[i ] = 0;
1019 buffd[i + 1] = 0;
1020
1021 sp += chan2;
1022 dp += chan2;
1023 }
1024 }
1025
1026 } else if (kw == 3) {
1027
1028 if (l < (n - 1) || off < m) {
1029 for (i = 0; i <= (wid - 2); i += 2) {
1030 p0 = p2; p1 = p3;
1031 p2 = sp[0];
1032 p3 = sp[chan1];
1033
1034 buffd[i ] += p0*k0 + p1*k1 + p2*k2;
1035 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1036
1037 sp += chan2;
1038 }
1039
1040 } else {
1041 for (i = 0; i <= (wid - 2); i += 2) {
1042 p0 = p2; p1 = p3;
1043 p2 = sp[0];
1044 p3 = sp[chan1];
1045
1046 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i ]);
1047 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1048
1049 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
1050 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
1051
1052 buffd[i ] = 0;
1053 buffd[i + 1] = 0;
1054
1055 sp += chan2;
1056 dp += chan2;
1057 }
1058 }
1059
1060 } else if (kw == 2) {
1061
1062 if (l < (n - 1) || off < m) {
1063 for (i = 0; i <= (wid - 2); i += 2) {
1064 p0 = p2;
1065 p1 = sp[0];
1066 p2 = sp[chan1];
1067
1068 buffd[i ] += p0*k0 + p1*k1;
1069 buffd[i + 1] += p1*k0 + p2*k1;
1070
1071 sp += chan2;
1072 }
1073
1074 } else {
1075 for (i = 0; i <= (wid - 2); i += 2) {
1076 p0 = p2;
1077 p1 = sp[0];
1078 p2 = sp[chan1];
1079
1080 d0 = (p0*k0 + p1*k1 + buffd[i ]);
1081 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1082
1083 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
1084 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
1085
1086 buffd[i ] = 0;
1087 buffd[i + 1] = 0;
1088
1089 sp += chan2;
1090 dp += chan2;
1091 }
1092 }
1093
1094 } else /*if (kw == 1)*/ {
1095
1096 if (l < (n - 1) || off < m) {
1097 for (i = 0; i <= (wid - 2); i += 2) {
1098 p0 = sp[0];
1099 p1 = sp[chan1];
1100
1101 buffd[i ] += p0*k0;
1102 buffd[i + 1] += p1*k0;
1103
1104 sp += chan2;
1105 }
1106
1107 } else {
1108 for (i = 0; i <= (wid - 2); i += 2) {
1109 p0 = sp[0];
1110 p1 = sp[chan1];
1111
1112 d0 = (p0*k0 + buffd[i ]);
1113 d1 = (p1*k0 + buffd[i + 1]);
1114
1115 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 & 0xffffff00) { if (d0 < 0
) dp[0 ] = 0; else dp[0 ] = (127*2 +1); } else { dp[0 ] = (mlib_u8
)d0; }
;
1116 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 & 0xffffff00) { if (d1 < 0
) dp[chan1] = 0; else dp[chan1] = (127*2 +1); } else { dp[chan1
] = (mlib_u8)d1; }
;
1117
1118 buffd[i ] = 0;
1119 buffd[i + 1] = 0;
1120
1121 sp += chan2;
1122 dp += chan2;
1123 }
1124 }
1125 }
1126 }
1127 }
1128
1129 /* last pixels */
1130 for (; i < wid; i++) {
1131 mlib_s32 *pk = k, s = 0;
1132 mlib_s32 x;
1133
1134 for (l = 0; l < n; l++) {
1135 sp = sl + l*sll + i*chan1;
1136
1137 for (x = 0; x < m; x++) {
1138 s += sp[0] * pk[0];
1139 sp += chan1;
1140 pk ++;
1141 }
1142 }
1143
1144 STORE_RES(dp[0], s)s >>= shift2; if (s & 0xffffff00) { if (s < 0) dp
[0] = 0; else dp[0] = (127*2 +1); } else { dp[0] = (mlib_u8)s
; }
;
1145
1146 sp += chan1;
1147 dp += chan1;
1148 }
1149
1150 sl += sll;
1151 dl += dll;
1152 }
1153 }
1154
1155 if (buffd != buff) mlib_free(buffd);
1156 if (k != k_locl) mlib_free(k);
1157
1158 return MLIB_SUCCESS;
1159}
1160
1161/***************************************************************/