Bug Summary

File:jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c
Warning:line 313, column 28
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name mlib_ImageConv_16ext.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjava -I /home/daniel/Projects/java/jdk/src/java.base/unix/native/libjava -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -D LIBC=gnu -D _GNU_SOURCE -D _REENTRANT -D _LARGEFILE64_SOURCE -D LINUX -D DEBUG -D _LITTLE_ENDIAN -D ARCH="amd64" -D amd64 -D _LP64=1 -D __USE_J2D_NAMES -D __MEDIALIB_OLD_NAMES -D MLIB_NO_LIBSUNMATH -D MLIB_OS64BIT -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/common/awt/medialib -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/headers/java.desktop -D _FORTIFY_SOURCE=2 -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-unused -Wno-unused-function -std=c99 -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConv_16ext.c
1/*
2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Internal functions for mlib_ImageConv* on U8/S16/U16 type and
30 * MLIB_EDGE_SRC_EXTEND mask
31 */
32
33#include "mlib_image.h"
34#include "mlib_ImageConv.h"
35#include "mlib_c_ImageConv.h"
36
37/*
38 * This define switches between functions of different data types
39 */
40
41#define IMG_TYPE2 2
42
43/***************************************************************/
44#if IMG_TYPE2 == 1
45
46#define DTYPEmlib_s16 mlib_u8
47#define CONV_FUNC(KERN)mlib_convKERNext_s16(mlib_image *dst, const mlib_image *src, mlib_s32
dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32
*kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_c_conv##KERN##ext_u8(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
48#define CONV_FUNC_MxNmlib_convMxNext_s16(mlib_image *dst, const mlib_image *src, const
mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32
cmask)
mlib_c_convMxNext_u8(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
49#define CONV_FUNC_I(KERN)mlib_i_convKERNext_s16(mlib_image *dst, const mlib_image *src
, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b,
const mlib_s32 *kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_i_conv##KERN##ext_u8(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
50#define CONV_FUNC_MxN_Imlib_i_convMxNext_s16(mlib_image *dst, const mlib_image *src,
const mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l
, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale
, mlib_s32 cmask)
mlib_i_convMxNext_u8(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
51#define DSCALE65536.0 (1 << 24)
52#define FROM_S32(x)((x) >> 16) (((x) >> 24) ^ 128)
53#define S64TOS32(x)((x) & 0xffffffff) (x)
54#define SAT_OFF -(1u << 31)
55
56#elif IMG_TYPE2 == 2
57
58#define DTYPEmlib_s16 mlib_s16
59#define CONV_FUNC(KERN)mlib_convKERNext_s16(mlib_image *dst, const mlib_image *src, mlib_s32
dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32
*kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_conv##KERN##ext_s16(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
60#define CONV_FUNC_MxNmlib_convMxNext_s16(mlib_image *dst, const mlib_image *src, const
mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32
cmask)
mlib_convMxNext_s16(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
61#define CONV_FUNC_I(KERN)mlib_i_convKERNext_s16(mlib_image *dst, const mlib_image *src
, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b,
const mlib_s32 *kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_i_conv##KERN##ext_s16(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
62#define CONV_FUNC_MxN_Imlib_i_convMxNext_s16(mlib_image *dst, const mlib_image *src,
const mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l
, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale
, mlib_s32 cmask)
mlib_i_convMxNext_s16(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
63#define DSCALE65536.0 65536.0
64#define FROM_S32(x)((x) >> 16) ((x) >> 16)
65#define S64TOS32(x)((x) & 0xffffffff) ((x) & 0xffffffff)
66#define SAT_OFF
67
68#elif IMG_TYPE2 == 3
69
70#define DTYPEmlib_s16 mlib_u16
71#define CONV_FUNC(KERN)mlib_convKERNext_s16(mlib_image *dst, const mlib_image *src, mlib_s32
dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32
*kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_conv##KERN##ext_u16(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
72#define CONV_FUNC_MxNmlib_convMxNext_s16(mlib_image *dst, const mlib_image *src, const
mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32
cmask)
mlib_convMxNext_u16(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
73#define CONV_FUNC_I(KERN)mlib_i_convKERNext_s16(mlib_image *dst, const mlib_image *src
, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b,
const mlib_s32 *kern, mlib_s32 scalef_expon, mlib_s32 cmask)
mlib_i_conv##KERN##ext_u16(PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
)
74#define CONV_FUNC_MxN_Imlib_i_convMxNext_s16(mlib_image *dst, const mlib_image *src,
const mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l
, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale
, mlib_s32 cmask)
mlib_i_convMxNext_u16(PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
)
75#define DSCALE65536.0 65536.0
76#define FROM_S32(x)((x) >> 16) (((x) >> 16) ^ 0x8000)
77#define S64TOS32(x)((x) & 0xffffffff) (x)
78#define SAT_OFF -(1u << 31)
79
80#endif /* IMG_TYPE == 1 */
81
82/***************************************************************/
83#define PARAMmlib_image *dst, const mlib_image *src, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, const mlib_s32 *kern, mlib_s32
scalef_expon, mlib_s32 cmask
\
84 mlib_image *dst, \
85 const mlib_image *src, \
86 mlib_s32 dx_l, \
87 mlib_s32 dx_r, \
88 mlib_s32 dy_t, \
89 mlib_s32 dy_b, \
90 const mlib_s32 *kern, \
91 mlib_s32 scalef_expon, \
92 mlib_s32 cmask
93
94/***************************************************************/
95#define PARAM_MxNmlib_image *dst, const mlib_image *src, const mlib_s32 *kernel
, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32 dx_r, mlib_s32
dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32 cmask
\
96 mlib_image *dst, \
97 const mlib_image *src, \
98 const mlib_s32 *kernel, \
99 mlib_s32 m, \
100 mlib_s32 n, \
101 mlib_s32 dx_l, \
102 mlib_s32 dx_r, \
103 mlib_s32 dy_t, \
104 mlib_s32 dy_b, \
105 mlib_s32 scale, \
106 mlib_s32 cmask
107
108/***************************************************************/
109#define FTYPEmlib_d64 mlib_d64
110
111#ifndef MLIB_USE_FTOI_CLAMPING
112
113#define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >=
2147483647) ? 2147483647 : (mlib_s32)(x)))
\
114 (((x) <= MLIB_S32_MIN(-2147483647 -1)) ? MLIB_S32_MIN(-2147483647 -1) : (((x) >= MLIB_S32_MAX2147483647) ? MLIB_S32_MAX2147483647 : (mlib_s32)(x)))
115
116#else
117
118#define CLAMP_S32(x)(((x) <= (-2147483647 -1)) ? (-2147483647 -1) : (((x) >=
2147483647) ? 2147483647 : (mlib_s32)(x)))
((mlib_s32)(x))
119
120#endif /* MLIB_USE_FTOI_CLAMPING */
121
122/***************************************************************/
123#define D2I(x)((((x)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((x)) >=
2147483647) ? 2147483647 : (mlib_s32)((x))))
CLAMP_S32((x) SAT_OFF)((((x)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((x)) >=
2147483647) ? 2147483647 : (mlib_s32)((x))))
124
125/***************************************************************/
126#ifdef _LITTLE_ENDIAN1
127
128#define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \
129 dp[0 ] = res1; \
130 dp[chan1] = res0
131
132#else
133
134#define STORE2(res0, res1)dp[0 ] = res1; dp[chan1] = res0 \
135 dp[0 ] = res0; \
136 dp[chan1] = res1
137
138#endif /* _LITTLE_ENDIAN */
139
140/***************************************************************/
141#ifdef _NO_LONGLONG
142
143#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
(((mlib_s64)sp[0]) & 0xffffffff)
\
144 buff[i ] = sp[0]; \
145 buff[i + 1] = sp[chan1]
146
147#else /* _NO_LONGLONG */
148
149#ifdef _LITTLE_ENDIAN1
150
151#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
(((mlib_s64)sp[0]) & 0xffffffff)
\
152 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | S64TOS32((mlib_s64)sp[0])(((mlib_s64)sp[0]) & 0xffffffff)
153
154#else /* _LITTLE_ENDIAN */
155
156#define LOAD_BUFF(buff)*(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) |
(((mlib_s64)sp[0]) & 0xffffffff)
\
157 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | S64TOS32((mlib_s64)sp[chan1])(((mlib_s64)sp[chan1]) & 0xffffffff)
158
159#endif /* _LITTLE_ENDIAN */
160#endif /* _NO_LONGLONG */
161
162/***************************************************************/
163typedef union {
164 mlib_d64 d64;
165 struct {
166 mlib_s32 i0;
167 mlib_s32 i1;
168 } i32s;
169} d64_2x32;
170
171/***************************************************************/
172#define GET_SRC_DST_PARAMETERS(type)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(type); dll = mlib_ImageGetStride(dst) / sizeof
(type); adr_src = (type *)mlib_ImageGetData(src); adr_dst = (
type *)mlib_ImageGetData(dst)
\
173 hgt = mlib_ImageGetHeight(src); \
174 wid = mlib_ImageGetWidth(src); \
175 nchannel = mlib_ImageGetChannels(src); \
176 sll = mlib_ImageGetStride(src) / sizeof(type); \
177 dll = mlib_ImageGetStride(dst) / sizeof(type); \
178 adr_src = (type *)mlib_ImageGetData(src); \
179 adr_dst = (type *)mlib_ImageGetData(dst)
180
181/***************************************************************/
182#if IMG_TYPE2 == 1
183
184/*
185 * Test for the presence of any "1" bit in bits
186 8 to 31 of val. If present, then val is either
187 negative or >255. If over/underflows of 8 bits
188 are uncommon, then this technique can be a win,
189 since only a single test, rather than two, is
190 necessary to determine if clamping is needed.
191 On the other hand, if over/underflows are common,
192 it adds an extra test.
193*/
194#define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 -
1)) dst = (-32767 -1); else dst = (mlib_s16)val
\
195 if (val & 0xffffff00) { \
196 if (val < MLIB_U8_MIN0) \
197 dst = MLIB_U8_MIN0; \
198 else \
199 dst = MLIB_U8_MAX(127*2 +1); \
200 } else { \
201 dst = (mlib_u8)val; \
202 }
203
204#elif IMG_TYPE2 == 2
205
206#define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 -
1)) dst = (-32767 -1); else dst = (mlib_s16)val
\
207 if (val >= MLIB_S16_MAX32767) \
208 dst = MLIB_S16_MAX32767; \
209 else if (val <= MLIB_S16_MIN(-32767 -1)) \
210 dst = MLIB_S16_MIN(-32767 -1); \
211 else \
212 dst = (mlib_s16)val
213
214#elif IMG_TYPE2 == 3
215
216#define CLAMP_STORE(dst, val)if (val >= 32767) dst = 32767; else if (val <= (-32767 -
1)) dst = (-32767 -1); else dst = (mlib_s16)val
\
217 if (val >= MLIB_U16_MAX(32767 *2 +1)) \
218 dst = MLIB_U16_MAX(32767 *2 +1); \
219 else if (val <= MLIB_U16_MIN0) \
220 dst = MLIB_U16_MIN0; \
221 else \
222 dst = (mlib_u16)val
223
224#endif /* IMG_TYPE == 1 */
225
226/***************************************************************/
227#define MAX_KER7 7
228#define MAX_N15 15
229#define BUFF_SIZE1600 1600
230#define CACHE_SIZE(64*1024) (64*1024)
231
232static mlib_status mlib_ImageConv1xN_ext(mlib_image *dst,
233 const mlib_image *src,
234 const mlib_d64 *k,
235 mlib_s32 n,
236 mlib_s32 dy_t,
237 mlib_s32 dy_b,
238 mlib_s32 cmask)
239{
240 DTYPEmlib_s16 *adr_src, *sl;
241 DTYPEmlib_s16 *adr_dst, *dl, *dp;
242 FTYPEmlib_d64 buff[BUFF_SIZE1600];
243 FTYPEmlib_d64 *buffd;
244 FTYPEmlib_d64 *pbuff = buff;
245 const FTYPEmlib_d64 *pk;
246 FTYPEmlib_d64 k0, k1, k2, k3;
247 FTYPEmlib_d64 p0, p1, p2, p3, p4;
248 FTYPEmlib_d64 *sbuff;
249 mlib_s32 l, k_off, off, bsize;
250 mlib_s32 max_hsize, smax_hsize, shgt, hsize, kh;
251 mlib_s32 d0, d1, ii;
252 mlib_s32 wid, hgt, sll, dll;
253 mlib_s32 nchannel;
254 mlib_s32 i, j, c;
255 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst
= (mlib_s16 *)mlib_ImageGetData(dst)
;
256
257 max_hsize = ((CACHE_SIZE(64*1024)/sizeof(DTYPEmlib_s16))/sll) - (n - 1);
258
259 if (max_hsize < 1) max_hsize = 1;
1
Assuming 'max_hsize' is >= 1
2
Taking false branch
260 if (max_hsize > hgt) max_hsize = hgt;
3
Assuming 'max_hsize' is <= 'hgt'
4
Taking false branch
261
262 shgt = hgt + (n - 1);
263 smax_hsize = max_hsize + (n - 1);
264
265 bsize = 2 * (smax_hsize + 1);
266
267 if (bsize > BUFF_SIZE1600) {
5
Assuming 'bsize' is <= BUFF_SIZE
6
Taking false branch
268 pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*bsize);
269
270 if (pbuff == NULL((void*)0)) return MLIB_FAILURE;
271 }
272
273 sbuff = pbuff;
274 buffd = sbuff + smax_hsize;
275
276 shgt -= (dy_t + dy_b);
277 k_off = 0;
278
279 for (l = 0; l < hgt; l += hsize) {
7
Assuming 'l' is < 'hgt'
8
Loop condition is true. Entering loop body
280 hsize = hgt - l;
281
282 if (hsize > max_hsize) hsize = max_hsize;
9
Assuming 'hsize' is <= 'max_hsize'
10
Taking false branch
283
284 smax_hsize = hsize + (n - 1);
285
286 for (c = 0; c < nchannel; c++) {
11
Assuming 'c' is < 'nchannel'
12
Loop condition is true. Entering loop body
287 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
13
Assuming the condition is false
14
Taking false branch
288
289 sl = adr_src + c;
290 dl = adr_dst + c;
291
292 for (i = 0; i
14.1
'i' is < 'hsize'
< hsize
; i++) buffd[i] = 0.0;
15
Loop condition is true. Entering loop body
16
Assuming 'i' is >= 'hsize'
17
Loop condition is false. Execution continues on line 294
293
294 for (j = 0; j < wid; j++) {
18
Assuming 'j' is < 'wid'
19
Loop condition is true. Entering loop body
295 FTYPEmlib_d64 *buff = sbuff;
296
297 for (i = k_off, ii = 0; (i < dy_t) && (ii < smax_hsize); i++, ii++) {
20
Assuming 'i' is < 'dy_t'
21
Assuming 'ii' is < 'smax_hsize'
22
Loop condition is true. Entering loop body
23
Assuming 'i' is >= 'dy_t'
298 sbuff[i - k_off] = (FTYPEmlib_d64)sl[0];
299 }
300
301 for (; (i < shgt + dy_t) && (ii < smax_hsize); i++, ii++) {
24
Assuming the condition is false
302 sbuff[i - k_off] = (FTYPEmlib_d64)sl[(i - dy_t)*sll];
303 }
304
305 for (; (i < shgt + dy_t + dy_b) && (ii < smax_hsize); i++, ii++) {
25
Assuming the condition is false
306 sbuff[i - k_off] = (FTYPEmlib_d64)sl[(shgt - 1)*sll];
307 }
308
309 pk = k;
310
311 for (off = 0; off < (n - 4); off += 4) {
26
Assuming the condition is true
27
Loop condition is true. Entering loop body
312
313 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
28
Assigned value is garbage or undefined
314 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
315
316 for (i = 0; i < hsize; i += 2) {
317 p0 = p2; p1 = p3; p2 = p4;
318
319 p3 = buff[i + 3]; p4 = buff[i + 4];
320
321 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
322 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
323 }
324
325 pk += 4;
326 buff += 4;
327 }
328
329 dp = dl;
330 kh = n - off;
331
332 if (kh == 4) {
333 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
334 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
335
336 for (i = 0; i <= (hsize - 2); i += 2) {
337 p0 = p2; p1 = p3; p2 = p4;
338
339 p3 = buff[i + 3]; p4 = buff[i + 4];
340
341 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 +
buffd[i ])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*
k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]))))
;
342 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 +
buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((
p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]))))
;
343
344 dp[0 ] = FROM_S32(d0)((d0) >> 16);
345 dp[dll] = FROM_S32(d1)((d1) >> 16);
346
347 buffd[i ] = 0.0;
348 buffd[i + 1] = 0.0;
349
350 dp += 2*dll;
351 }
352
353 if (i < hsize) {
354 p0 = p2; p1 = p3; p2 = p4;
355 p3 = buff[i + 3];
356 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 +
buffd[i])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0
+ p1*k1 + p2*k2 + p3*k3 + buffd[i]))))
;
357 dp[0] = FROM_S32(d0)((d0) >> 16);
358 buffd[i] = 0.0;
359 }
360
361 } else if (kh == 3) {
362
363 p2 = buff[0]; p3 = buff[1];
364 k0 = pk[0]; k1 = pk[1]; k2 = pk[2];
365
366 for (i = 0; i <= (hsize - 2); i += 2) {
367 p0 = p2; p1 = p3;
368
369 p2 = buff[i + 2]; p3 = buff[i + 3];
370
371 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + buffd[i ])) <= (-2147483647 -1
)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + buffd[i ]
)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1
+ p2*k2 + buffd[i ]))))
;
372 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + buffd[
i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 +
p2*k1 + p3*k2 + buffd[i + 1]))))
;
373
374 dp[0 ] = FROM_S32(d0)((d0) >> 16);
375 dp[dll] = FROM_S32(d1)((d1) >> 16);
376
377 buffd[i ] = 0.0;
378 buffd[i + 1] = 0.0;
379
380 dp += 2*dll;
381 }
382
383 if (i < hsize) {
384 p0 = p2; p1 = p3;
385 p2 = buff[i + 2];
386 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i])((((p0*k0 + p1*k1 + p2*k2 + buffd[i])) <= (-2147483647 -1)
) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + buffd[i]))
>= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 +
p2*k2 + buffd[i]))))
;
387 dp[0] = FROM_S32(d0)((d0) >> 16);
388
389 buffd[i] = 0.0;
390 }
391
392 } else if (kh == 2) {
393
394 p2 = buff[0];
395 k0 = pk[0]; k1 = pk[1];
396
397 for (i = 0; i <= (hsize - 2); i += 2) {
398 p0 = p2;
399
400 p1 = buff[i + 1]; p2 = buff[i + 2];
401
402 d0 = D2I(p0*k0 + p1*k1 + buffd[i ])((((p0*k0 + p1*k1 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + p1*k1 + buffd[i ])) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + buffd[i ]))))
;
403 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1])((((p1*k0 + p2*k1 + buffd[i + 1])) <= (-2147483647 -1)) ? (
-2147483647 -1) : ((((p1*k0 + p2*k1 + buffd[i + 1])) >= 2147483647
) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + buffd[i + 1]))))
;
404
405 dp[0 ] = FROM_S32(d0)((d0) >> 16);
406 dp[dll] = FROM_S32(d1)((d1) >> 16);
407
408 buffd[i ] = 0.0;
409 buffd[i + 1] = 0.0;
410
411 dp += 2*dll;
412 }
413
414 if (i < hsize) {
415 p0 = p2;
416 p1 = buff[i + 1];
417 d0 = D2I(p0*k0 + p1*k1 + buffd[i])((((p0*k0 + p1*k1 + buffd[i])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + p1*k1 + buffd[i])) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + buffd[i]))))
;
418 dp[0] = FROM_S32(d0)((d0) >> 16);
419
420 buffd[i] = 0.0;
421 }
422
423 } else /* kh == 1 */{
424
425 k0 = pk[0];
426
427 for (i = 0; i <= (hsize - 2); i += 2) {
428 p0 = buff[i]; p1 = buff[i + 1];
429
430 d0 = D2I(p0*k0 + buffd[i ])((((p0*k0 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + buffd[i ])) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + buffd[i ]))))
;
431 d1 = D2I(p1*k0 + buffd[i + 1])((((p1*k0 + buffd[i + 1])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p1*k0 + buffd[i + 1])) >= 2147483647) ? 2147483647
: (mlib_s32)((p1*k0 + buffd[i + 1]))))
;
432
433 dp[0 ] = FROM_S32(d0)((d0) >> 16);
434 dp[dll] = FROM_S32(d1)((d1) >> 16);
435
436 buffd[i ] = 0.0;
437 buffd[i + 1] = 0.0;
438
439 dp += 2*dll;
440 }
441
442 if (i < hsize) {
443 p0 = buff[i];
444 d0 = D2I(p0*k0 + buffd[i])((((p0*k0 + buffd[i])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + buffd[i])) >= 2147483647) ? 2147483647 :
(mlib_s32)((p0*k0 + buffd[i]))))
;
445 dp[0] = FROM_S32(d0)((d0) >> 16);
446
447 buffd[i] = 0.0;
448 }
449 }
450
451 /* next line */
452 sl += nchannel;
453 dl += nchannel;
454 }
455 }
456
457 k_off += max_hsize;
458 adr_dst += max_hsize*dll;
459 }
460
461 if (pbuff != buff) mlib_free(pbuff);
462
463 return MLIB_SUCCESS;
464}
465
466/***************************************************************/
467mlib_status CONV_FUNC_MxNmlib_convMxNext_s16(mlib_image *dst, const mlib_image *src, const
mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l, mlib_s32
dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale, mlib_s32
cmask)
468{
469 DTYPEmlib_s16 *adr_src, *sl, *sp = NULL((void*)0);
470 DTYPEmlib_s16 *adr_dst, *dl, *dp = NULL((void*)0);
471 FTYPEmlib_d64 buff[BUFF_SIZE1600], *buffs_arr[2*(MAX_N15 + 1)];
472 FTYPEmlib_d64 **buffs = buffs_arr, *buffd;
473 FTYPEmlib_d64 akernel[256], *k = akernel, fscale = DSCALE65536.0;
474 FTYPEmlib_d64 *pbuff = buff;
475 FTYPEmlib_d64 k0, k1, k2, k3, k4, k5, k6;
476 FTYPEmlib_d64 p0, p1, p2, p3, p4, p5, p6, p7;
477 mlib_s32 *buffi;
478 mlib_s32 mn, l, off, kw, bsize, buff_ind;
479 mlib_s32 d0, d1;
480 mlib_s32 wid, hgt, sll, dll;
481 mlib_s32 nchannel, chan1, chan2;
482 mlib_s32 i, j, c, swid;
483 d64_2x32 dd;
484 mlib_status status = MLIB_SUCCESS;
485
486 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst
= (mlib_s16 *)mlib_ImageGetData(dst)
;
487
488 if (scale > 30) {
489 fscale *= 1.0/(1 << 30);
490 scale -= 30;
491 }
492
493 fscale /= (1 << scale);
494
495 mn = m*n;
496
497 if (mn > 256) {
498 k = mlib_malloc(mn*sizeof(mlib_d64));
499
500 if (k == NULL((void*)0)) return MLIB_FAILURE;
501 }
502
503 for (i = 0; i < mn; i++) {
504 k[i] = kernel[i]*fscale;
505 }
506
507 if (m == 1) {
508 status = mlib_ImageConv1xN_ext(dst, src, k, n, dy_t, dy_b, cmask);
509 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
510 }
511
512 swid = wid + (m - 1);
513
514 bsize = (n + 3)*swid;
515
516 if ((bsize > BUFF_SIZE1600) || (n > MAX_N15)) {
517 pbuff = mlib_malloc(sizeof(FTYPEmlib_d64)*bsize + sizeof(FTYPEmlib_d64 *)*2*(n + 1));
518
519 if (pbuff == NULL((void*)0)) {
520 status = MLIB_FAILURE;
521 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
522 }
523 buffs = (FTYPEmlib_d64 **)(pbuff + bsize);
524 }
525
526 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*swid;
527 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
528 buffd = buffs[n] + swid;
529 buffi = (mlib_s32*)(buffd + swid);
530
531 chan1 = nchannel;
532 chan2 = chan1 + chan1;
533
534 swid -= (dx_l + dx_r);
535
536 for (c = 0; c < nchannel; c++) {
537 if (!(cmask & (1 << (chan1 - 1 - c)))) continue;
538
539 sl = adr_src + c;
540 dl = adr_dst + c;
541
542 for (l = 0; l < n; l++) {
543 FTYPEmlib_d64 *buff = buffs[l];
544
545 for (i = 0; i < dx_l; i++) {
546 buff[i] = (FTYPEmlib_d64)sl[0];
547 }
548
549 for (i = 0; i < swid; i++) {
550 buff[i + dx_l] = (FTYPEmlib_d64)sl[i*chan1];
551 }
552
553 for (i = 0; i < dx_r; i++) {
554 buff[swid + dx_l + i] = buff[swid + dx_l - 1];
555 }
556
557 if ((l >= dy_t) && (l < hgt + n - dy_b - 2)) sl += sll;
558 }
559
560 buff_ind = 0;
561
562 for (i = 0; i < wid; i++) buffd[i] = 0.0;
563
564 for (j = 0; j < hgt; j++) {
565 FTYPEmlib_d64 **buffc = buffs + buff_ind;
566 FTYPEmlib_d64 *buffn = buffc[n];
567 FTYPEmlib_d64 *pk = k;
568
569 for (l = 0; l < n; l++) {
570 FTYPEmlib_d64 *buff_l = buffc[l];
571
572 for (off = 0; off < m;) {
573 FTYPEmlib_d64 *buff = buff_l + off;
574
575 kw = m - off;
576
577 if (kw > 2*MAX_KER7) kw = MAX_KER7; else
578 if (kw > MAX_KER7) kw = kw/2;
579 off += kw;
580
581 sp = sl;
582 dp = dl;
583
584 if (kw == 7) {
585
586 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
587 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
588
589 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
590 k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
591
592 if (l < (n - 1) || off < m) {
593 for (i = 0; i <= (wid - 2); i += 2) {
594 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
595
596 p6 = buff[i + 6]; p7 = buff[i + 7];
597
598 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
599 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
600 }
601
602 } else {
603 for (i = 0; i <= (wid - 2); i += 2) {
604 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
605
606 p6 = buff[i + 6]; p7 = buff[i + 7];
607
608 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
609
610 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
611 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
612 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
613
614 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd
[i ])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0
+ p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ])
) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 +
p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]))))
;
615 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd
[i + 1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1
*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i
+ 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2
*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1])))
)
;
616
617 dp[0 ] = FROM_S32(d0)((d0) >> 16);
618 dp[chan1] = FROM_S32(d1)((d1) >> 16);
619
620 buffd[i ] = 0.0;
621 buffd[i + 1] = 0.0;
622
623 sp += chan2;
624 dp += chan2;
625 }
626 }
627
628 } else if (kw == 6) {
629
630 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
631 p5 = buff[3]; p6 = buff[4];
632
633 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
634 k4 = pk[4]; k5 = pk[5];
635
636 if (l < (n - 1) || off < m) {
637 for (i = 0; i <= (wid - 2); i += 2) {
638 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
639
640 p5 = buff[i + 5]; p6 = buff[i + 6];
641
642 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
643 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
644 }
645
646 } else {
647 for (i = 0; i <= (wid - 2); i += 2) {
648 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
649
650 p5 = buff[i + 5]; p6 = buff[i + 6];
651
652 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
653
654 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
655 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
656 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
657
658 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]
)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1
*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ])) >= 2147483647
) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4
*k4 + p5*k5 + buffd[i ]))))
;
659 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i +
1])) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 +
p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1])) >=
2147483647) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + p3*k2
+ p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]))))
;
660
661 dp[0 ] = FROM_S32(d0)((d0) >> 16);
662 dp[chan1] = FROM_S32(d1)((d1) >> 16);
663
664 buffd[i ] = 0.0;
665 buffd[i + 1] = 0.0;
666
667 sp += chan2;
668 dp += chan2;
669 }
670 }
671
672 } else if (kw == 5) {
673
674 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
675 p5 = buff[3];
676
677 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
678 k4 = pk[4];
679
680 if (l < (n - 1) || off < m) {
681 for (i = 0; i <= (wid - 2); i += 2) {
682 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
683
684 p4 = buff[i + 4]; p5 = buff[i + 5];
685
686 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
687 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
688 }
689
690 } else {
691 for (i = 0; i <= (wid - 2); i += 2) {
692 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
693
694 p4 = buff[i + 4]; p5 = buff[i + 5];
695
696 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
697
698 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
699 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
700 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
701
702 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ])) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2
*k2 + p3*k3 + p4*k4 + buffd[i ])) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[
i ]))))
;
703 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1])) <=
(-2147483647 -1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3
*k2 + p4*k3 + p5*k4 + buffd[i + 1])) >= 2147483647) ? 2147483647
: (mlib_s32)((p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[
i + 1]))))
;
704
705 dp[0 ] = FROM_S32(d0)((d0) >> 16);
706 dp[chan1] = FROM_S32(d1)((d1) >> 16);
707
708 buffd[i ] = 0.0;
709 buffd[i + 1] = 0.0;
710
711 sp += chan2;
712 dp += chan2;
713 }
714 }
715
716 } else if (kw == 4) {
717
718 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
719
720 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
721
722 if (l < (n - 1) || off < m) {
723 for (i = 0; i <= (wid - 2); i += 2) {
724 p0 = p2; p1 = p3; p2 = p4;
725
726 p3 = buff[i + 3]; p4 = buff[i + 4];
727
728 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
729 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
730 }
731
732 } else {
733 for (i = 0; i <= (wid - 2); i += 2) {
734 p0 = p2; p1 = p3; p2 = p4;
735
736 p3 = buff[i + 3]; p4 = buff[i + 4];
737
738 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
739
740 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
741 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
742 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
743
744 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + p3*k3 +
buffd[i ])) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*
k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]))))
;
745 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + p4*k3 +
buffd[i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((
p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]))))
;
746
747 dp[0 ] = FROM_S32(d0)((d0) >> 16);
748 dp[chan1] = FROM_S32(d1)((d1) >> 16);
749
750 buffd[i ] = 0.0;
751 buffd[i + 1] = 0.0;
752
753 sp += chan2;
754 dp += chan2;
755 }
756 }
757
758 } else if (kw == 3) {
759
760 p2 = buff[0]; p3 = buff[1];
761 k0 = pk[0]; k1 = pk[1]; k2 = pk[2];
762
763 if (l < (n - 1) || off < m) {
764 for (i = 0; i <= (wid - 2); i += 2) {
765 p0 = p2; p1 = p3;
766
767 p2 = buff[i + 2]; p3 = buff[i + 3];
768
769 buffd[i ] += p0*k0 + p1*k1 + p2*k2;
770 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
771 }
772
773 } else {
774 for (i = 0; i <= (wid - 2); i += 2) {
775 p0 = p2; p1 = p3;
776
777 p2 = buff[i + 2]; p3 = buff[i + 3];
778
779 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
780
781 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
782 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
783 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
784
785 d0 = D2I(p0*k0 + p1*k1 + p2*k2 + buffd[i ])((((p0*k0 + p1*k1 + p2*k2 + buffd[i ])) <= (-2147483647 -1
)) ? (-2147483647 -1) : ((((p0*k0 + p1*k1 + p2*k2 + buffd[i ]
)) >= 2147483647) ? 2147483647 : (mlib_s32)((p0*k0 + p1*k1
+ p2*k2 + buffd[i ]))))
;
786 d1 = D2I(p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])((((p1*k0 + p2*k1 + p3*k2 + buffd[i + 1])) <= (-2147483647
-1)) ? (-2147483647 -1) : ((((p1*k0 + p2*k1 + p3*k2 + buffd[
i + 1])) >= 2147483647) ? 2147483647 : (mlib_s32)((p1*k0 +
p2*k1 + p3*k2 + buffd[i + 1]))))
;
787
788 dp[0 ] = FROM_S32(d0)((d0) >> 16);
789 dp[chan1] = FROM_S32(d1)((d1) >> 16);
790
791 buffd[i ] = 0.0;
792 buffd[i + 1] = 0.0;
793
794 sp += chan2;
795 dp += chan2;
796 }
797 }
798
799 } else /* if (kw == 2) */ {
800
801 p2 = buff[0];
802 k0 = pk[0]; k1 = pk[1];
803
804 if (l < (n - 1) || off < m) {
805 for (i = 0; i <= (wid - 2); i += 2) {
806 p0 = p2;
807
808 p1 = buff[i + 1]; p2 = buff[i + 2];
809
810 buffd[i ] += p0*k0 + p1*k1;
811 buffd[i + 1] += p1*k0 + p2*k1;
812 }
813
814 } else {
815 for (i = 0; i <= (wid - 2); i += 2) {
816 p0 = p2;
817
818 p1 = buff[i + 1]; p2 = buff[i + 2];
819
820 LOAD_BUFF(buffi)*(mlib_s64*)(buffi + i) = (((mlib_s64)sp[chan1]) << 32)
| (((mlib_s64)sp[0]) & 0xffffffff)
;
821
822 dd.d64 = *(FTYPEmlib_d64 *)(buffi + i);
823 buffn[i + dx_l ] = (FTYPEmlib_d64)dd.i32s.i0;
824 buffn[i + dx_l + 1] = (FTYPEmlib_d64)dd.i32s.i1;
825
826 d0 = D2I(p0*k0 + p1*k1 + buffd[i ])((((p0*k0 + p1*k1 + buffd[i ])) <= (-2147483647 -1)) ? (-2147483647
-1) : ((((p0*k0 + p1*k1 + buffd[i ])) >= 2147483647) ? 2147483647
: (mlib_s32)((p0*k0 + p1*k1 + buffd[i ]))))
;
827 d1 = D2I(p1*k0 + p2*k1 + buffd[i + 1])((((p1*k0 + p2*k1 + buffd[i + 1])) <= (-2147483647 -1)) ? (
-2147483647 -1) : ((((p1*k0 + p2*k1 + buffd[i + 1])) >= 2147483647
) ? 2147483647 : (mlib_s32)((p1*k0 + p2*k1 + buffd[i + 1]))))
;
828
829 dp[0 ] = FROM_S32(d0)((d0) >> 16);
830 dp[chan1] = FROM_S32(d1)((d1) >> 16);
831
832 buffd[i ] = 0.0;
833 buffd[i + 1] = 0.0;
834
835 sp += chan2;
836 dp += chan2;
837 }
838 }
839 }
840
841 pk += kw;
842 }
843 }
844
845 /* last pixels */
846 for (; i < wid; i++) {
847 FTYPEmlib_d64 *pk = k, s = 0;
848 mlib_s32 x, d0;
849
850 for (l = 0; l < n; l++) {
851 FTYPEmlib_d64 *buff = buffc[l] + i;
852
853 for (x = 0; x < m; x++) s += buff[x] * (*pk++);
854 }
855
856 d0 = D2I(s)((((s)) <= (-2147483647 -1)) ? (-2147483647 -1) : ((((s)) >=
2147483647) ? 2147483647 : (mlib_s32)((s))))
;
857 dp[0] = FROM_S32(d0)((d0) >> 16);
858
859 buffn[i + dx_l] = (FTYPEmlib_d64)sp[0];
860
861 sp += chan1;
862 dp += chan1;
863 }
864
865 for (; i < swid; i++) {
866 buffn[i + dx_l] = (FTYPEmlib_d64)sp[0];
867 sp += chan1;
868 }
869
870 for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l];
871 for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1];
872
873 /* next line */
874
875 if (j < hgt - dy_b - 2) sl += sll;
876 dl += dll;
877
878 buff_ind++;
879
880 if (buff_ind >= n + 1) buff_ind = 0;
881 }
882 }
883
884 FREE_AND_RETURN_STATUSif (pbuff != buff) mlib_free(pbuff); if (k != akernel) mlib_free
(k); return status
;
885}
886
887/***************************************************************/
888/* for x86, using integer multiplies is faster */
889
890#define STORE_RES(res, x)x >>= shift2; if (x >= 32767) res = 32767; else if (
x <= (-32767 -1)) res = (-32767 -1); else res = (mlib_s16)
x
\
891 x >>= shift2; \
892 CLAMP_STORE(res, x)if (x >= 32767) res = 32767; else if (x <= (-32767 -1))
res = (-32767 -1); else res = (mlib_s16)x
893
894mlib_status CONV_FUNC_MxN_Imlib_i_convMxNext_s16(mlib_image *dst, const mlib_image *src,
const mlib_s32 *kernel, mlib_s32 m, mlib_s32 n, mlib_s32 dx_l
, mlib_s32 dx_r, mlib_s32 dy_t, mlib_s32 dy_b, mlib_s32 scale
, mlib_s32 cmask)
895{
896 DTYPEmlib_s16 *adr_src, *sl, *sp = NULL((void*)0);
897 DTYPEmlib_s16 *adr_dst, *dl, *dp = NULL((void*)0);
898 mlib_s32 buff[BUFF_SIZE1600], *buffs_arr[2*(MAX_N15 + 1)];
899 mlib_s32 *pbuff = buff;
900 mlib_s32 **buffs = buffs_arr, *buffd;
901 mlib_s32 l, off, kw, bsize, buff_ind;
902 mlib_s32 d0, d1, shift1, shift2;
903 mlib_s32 k0, k1, k2, k3, k4, k5, k6;
904 mlib_s32 p0, p1, p2, p3, p4, p5, p6, p7;
905 mlib_s32 wid, hgt, sll, dll;
906 mlib_s32 nchannel, chan1;
907 mlib_s32 i, j, c, swid;
908 mlib_s32 chan2;
909 mlib_s32 k_locl[MAX_N15*MAX_N15], *k = k_locl;
910 GET_SRC_DST_PARAMETERS(DTYPE)hgt = mlib_ImageGetHeight(src); wid = mlib_ImageGetWidth(src)
; nchannel = mlib_ImageGetChannels(src); sll = mlib_ImageGetStride
(src) / sizeof(mlib_s16); dll = mlib_ImageGetStride(dst) / sizeof
(mlib_s16); adr_src = (mlib_s16 *)mlib_ImageGetData(src); adr_dst
= (mlib_s16 *)mlib_ImageGetData(dst)
;
911
912#if IMG_TYPE2 != 1
913 shift1 = 16;
914#else
915 shift1 = 8;
916#endif /* IMG_TYPE != 1 */
917 shift2 = scale - shift1;
918
919 chan1 = nchannel;
920 chan2 = chan1 + chan1;
921
922 swid = wid + (m - 1);
923
924 bsize = (n + 2)*swid;
925
926 if ((bsize > BUFF_SIZE1600) || (n > MAX_N15)) {
927 pbuff = mlib_malloc(sizeof(mlib_s32)*bsize + sizeof(mlib_s32 *)*2*(n + 1));
928
929 if (pbuff == NULL((void*)0)) return MLIB_FAILURE;
930 buffs = (mlib_s32 **)(pbuff + bsize);
931 }
932
933 for (l = 0; l < (n + 1); l++) buffs[l] = pbuff + l*swid;
934 for (l = 0; l < (n + 1); l++) buffs[l + (n + 1)] = buffs[l];
935 buffd = buffs[n] + swid;
936
937 if (m*n > MAX_N15*MAX_N15) {
938 k = mlib_malloc(sizeof(mlib_s32)*(m*n));
939
940 if (k == NULL((void*)0)) {
941 if (pbuff != buff) mlib_free(pbuff);
942 return MLIB_FAILURE;
943 }
944 }
945
946 for (i = 0; i < m*n; i++) {
947 k[i] = kernel[i] >> shift1;
948 }
949
950 swid -= (dx_l + dx_r);
951
952 for (c = 0; c < nchannel; c++) {
953 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
954
955 sl = adr_src + c;
956 dl = adr_dst + c;
957
958 for (l = 0; l < n; l++) {
959 mlib_s32 *buff = buffs[l];
960
961 for (i = 0; i < dx_l; i++) {
962 buff[i] = (mlib_s32)sl[0];
963 }
964
965 for (i = 0; i < swid; i++) {
966 buff[i + dx_l] = (mlib_s32)sl[i*chan1];
967 }
968
969 for (i = 0; i < dx_r; i++) {
970 buff[swid + dx_l + i] = buff[swid + dx_l - 1];
971 }
972
973 if ((l >= dy_t) && (l < hgt + n - dy_b - 2)) sl += sll;
974 }
975
976 buff_ind = 0;
977
978 for (i = 0; i < wid; i++) buffd[i] = 0;
979
980 for (j = 0; j < hgt; j++) {
981 mlib_s32 **buffc = buffs + buff_ind;
982 mlib_s32 *buffn = buffc[n];
983 mlib_s32 *pk = k;
984
985 for (l = 0; l < n; l++) {
986 mlib_s32 *buff_l = buffc[l];
987
988 for (off = 0; off < m;) {
989 mlib_s32 *buff = buff_l + off;
990
991 sp = sl;
992 dp = dl;
993
994 kw = m - off;
995
996 if (kw > 2*MAX_KER7) kw = MAX_KER7; else
997 if (kw > MAX_KER7) kw = kw/2;
998 off += kw;
999
1000 if (kw == 7) {
1001
1002 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1003 p5 = buff[3]; p6 = buff[4]; p7 = buff[5];
1004
1005 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1006 k4 = pk[4]; k5 = pk[5]; k6 = pk[6];
1007
1008 if (l < (n - 1) || off < m) {
1009 for (i = 0; i <= (wid - 2); i += 2) {
1010 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1011
1012 p6 = buff[i + 6]; p7 = buff[i + 7];
1013
1014 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6;
1015 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6;
1016 }
1017
1018 } else {
1019 for (i = 0; i <= (wid - 2); i += 2) {
1020 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6; p5 = p7;
1021
1022 p6 = buff[i + 6]; p7 = buff[i + 7];
1023
1024 buffn[i + dx_l ] = (mlib_s32)sp[0];
1025 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1026
1027 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + p6*k6 + buffd[i ]);
1028 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + p7*k6 + buffd[i + 1]);
1029
1030 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1031 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1032
1033 buffd[i ] = 0;
1034 buffd[i + 1] = 0;
1035
1036 sp += chan2;
1037 dp += chan2;
1038 }
1039 }
1040
1041 } else if (kw == 6) {
1042
1043 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1044 p5 = buff[3]; p6 = buff[4];
1045
1046 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1047 k4 = pk[4]; k5 = pk[5];
1048
1049 if (l < (n - 1) || off < m) {
1050 for (i = 0; i <= (wid - 2); i += 2) {
1051 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1052
1053 p5 = buff[i + 5]; p6 = buff[i + 6];
1054
1055 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5;
1056 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5;
1057 }
1058
1059 } else {
1060 for (i = 0; i <= (wid - 2); i += 2) {
1061 p0 = p2; p1 = p3; p2 = p4; p3 = p5; p4 = p6;
1062
1063 p5 = buff[i + 5]; p6 = buff[i + 6];
1064
1065 buffn[i + dx_l ] = (mlib_s32)sp[0];
1066 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1067
1068 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + p5*k5 + buffd[i ]);
1069 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + p6*k5 + buffd[i + 1]);
1070
1071 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1072 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1073
1074 buffd[i ] = 0;
1075 buffd[i + 1] = 0;
1076
1077 sp += chan2;
1078 dp += chan2;
1079 }
1080 }
1081
1082 } else if (kw == 5) {
1083
1084 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1085 p5 = buff[3];
1086
1087 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1088 k4 = pk[4];
1089
1090 if (l < (n - 1) || off < m) {
1091 for (i = 0; i <= (wid - 2); i += 2) {
1092 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1093
1094 p4 = buff[i + 4]; p5 = buff[i + 5];
1095
1096 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4;
1097 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4;
1098 }
1099
1100 } else {
1101 for (i = 0; i <= (wid - 2); i += 2) {
1102 p0 = p2; p1 = p3; p2 = p4; p3 = p5;
1103
1104 p4 = buff[i + 4]; p5 = buff[i + 5];
1105
1106 buffn[i + dx_l ] = (mlib_s32)sp[0];
1107 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1108
1109 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + p4*k4 + buffd[i ]);
1110 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + p5*k4 + buffd[i + 1]);
1111
1112 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1113 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1114
1115 buffd[i ] = 0;
1116 buffd[i + 1] = 0;
1117
1118 sp += chan2;
1119 dp += chan2;
1120 }
1121 }
1122
1123 } else if (kw == 4) {
1124
1125 p2 = buff[0]; p3 = buff[1]; p4 = buff[2];
1126
1127 k0 = pk[0]; k1 = pk[1]; k2 = pk[2]; k3 = pk[3];
1128
1129 if (l < (n - 1) || off < m) {
1130 for (i = 0; i <= (wid - 2); i += 2) {
1131 p0 = p2; p1 = p3; p2 = p4;
1132
1133 p3 = buff[i + 3]; p4 = buff[i + 4];
1134
1135 buffd[i ] += p0*k0 + p1*k1 + p2*k2 + p3*k3;
1136 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2 + p4*k3;
1137 }
1138
1139 } else {
1140 for (i = 0; i <= (wid - 2); i += 2) {
1141 p0 = p2; p1 = p3; p2 = p4;
1142
1143 p3 = buff[i + 3]; p4 = buff[i + 4];
1144
1145 buffn[i + dx_l ] = (mlib_s32)sp[0];
1146 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1147
1148 d0 = (p0*k0 + p1*k1 + p2*k2 + p3*k3 + buffd[i ]);
1149 d1 = (p1*k0 + p2*k1 + p3*k2 + p4*k3 + buffd[i + 1]);
1150
1151 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1152 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1153
1154 buffd[i ] = 0;
1155 buffd[i + 1] = 0;
1156
1157 sp += chan2;
1158 dp += chan2;
1159 }
1160 }
1161
1162 } else if (kw == 3) {
1163
1164 p2 = buff[0]; p3 = buff[1];
1165 k0 = pk[0]; k1 = pk[1]; k2 = pk[2];
1166
1167 if (l < (n - 1) || off < m) {
1168 for (i = 0; i <= (wid - 2); i += 2) {
1169 p0 = p2; p1 = p3;
1170
1171 p2 = buff[i + 2]; p3 = buff[i + 3];
1172
1173 buffd[i ] += p0*k0 + p1*k1 + p2*k2;
1174 buffd[i + 1] += p1*k0 + p2*k1 + p3*k2;
1175 }
1176
1177 } else {
1178 for (i = 0; i <= (wid - 2); i += 2) {
1179 p0 = p2; p1 = p3;
1180
1181 p2 = buff[i + 2]; p3 = buff[i + 3];
1182
1183 buffn[i + dx_l ] = (mlib_s32)sp[0];
1184 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1185
1186 d0 = (p0*k0 + p1*k1 + p2*k2 + buffd[i ]);
1187 d1 = (p1*k0 + p2*k1 + p3*k2 + buffd[i + 1]);
1188
1189 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1190 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1191
1192 buffd[i ] = 0;
1193 buffd[i + 1] = 0;
1194
1195 sp += chan2;
1196 dp += chan2;
1197 }
1198 }
1199
1200 } else if (kw == 2) {
1201
1202 p2 = buff[0];
1203 k0 = pk[0]; k1 = pk[1];
1204
1205 if (l < (n - 1) || off < m) {
1206 for (i = 0; i <= (wid - 2); i += 2) {
1207 p0 = p2;
1208
1209 p1 = buff[i + 1]; p2 = buff[i + 2];
1210
1211 buffd[i ] += p0*k0 + p1*k1;
1212 buffd[i + 1] += p1*k0 + p2*k1;
1213 }
1214
1215 } else {
1216 for (i = 0; i <= (wid - 2); i += 2) {
1217 p0 = p2;
1218
1219 p1 = buff[i + 1]; p2 = buff[i + 2];
1220
1221 buffn[i + dx_l ] = (mlib_s32)sp[0];
1222 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1223
1224 d0 = (p0*k0 + p1*k1 + buffd[i ]);
1225 d1 = (p1*k0 + p2*k1 + buffd[i + 1]);
1226
1227 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1228 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1229
1230 buffd[i ] = 0;
1231 buffd[i + 1] = 0;
1232
1233 sp += chan2;
1234 dp += chan2;
1235 }
1236 }
1237
1238 } else /* kw == 1 */{
1239
1240 k0 = pk[0];
1241
1242 if (l < (n - 1) || off < m) {
1243 for (i = 0; i <= (wid - 2); i += 2) {
1244 p0 = buff[i]; p1 = buff[i + 1];
1245
1246 buffd[i ] += p0*k0;
1247 buffd[i + 1] += p1*k0;
1248 }
1249
1250 } else {
1251 for (i = 0; i <= (wid - 2); i += 2) {
1252 p0 = buff[i]; p1 = buff[i + 1];
1253
1254 buffn[i + dx_l ] = (mlib_s32)sp[0];
1255 buffn[i + dx_l + 1] = (mlib_s32)sp[chan1];
1256
1257 d0 = (p0*k0 + buffd[i ]);
1258 d1 = (p1*k0 + buffd[i + 1]);
1259
1260 STORE_RES(dp[0 ], d0)d0 >>= shift2; if (d0 >= 32767) dp[0 ] = 32767; else
if (d0 <= (-32767 -1)) dp[0 ] = (-32767 -1); else dp[0 ] =
(mlib_s16)d0
;
1261 STORE_RES(dp[chan1], d1)d1 >>= shift2; if (d1 >= 32767) dp[chan1] = 32767; else
if (d1 <= (-32767 -1)) dp[chan1] = (-32767 -1); else dp[chan1
] = (mlib_s16)d1
;
1262
1263 buffd[i ] = 0;
1264 buffd[i + 1] = 0;
1265
1266 sp += chan2;
1267 dp += chan2;
1268 }
1269 }
1270 }
1271
1272 pk += kw;
1273 }
1274 }
1275
1276 /* last pixels */
1277 for (; i < wid; i++) {
1278 mlib_s32 *pk = k, x, s = 0;
1279
1280 for (l = 0; l < n; l++) {
1281 mlib_s32 *buff = buffc[l] + i;
1282
1283 for (x = 0; x < m; x++) s += buff[x] * (*pk++);
1284 }
1285
1286 STORE_RES(dp[0], s)s >>= shift2; if (s >= 32767) dp[0] = 32767; else if
(s <= (-32767 -1)) dp[0] = (-32767 -1); else dp[0] = (mlib_s16
)s
;
1287
1288 buffn[i + dx_l] = (mlib_s32)sp[0];
1289
1290 sp += chan1;
1291 dp += chan1;
1292 }
1293
1294 for (; i < swid; i++) {
1295 buffn[i + dx_l] = (mlib_s32)sp[0];
1296 sp += chan1;
1297 }
1298
1299 for (i = 0; i < dx_l; i++) buffn[i] = buffn[dx_l];
1300 for (i = 0; i < dx_r; i++) buffn[swid + dx_l + i] = buffn[swid + dx_l - 1];
1301
1302 /* next line */
1303
1304 if (j < hgt - dy_b - 2) sl += sll;
1305 dl += dll;
1306
1307 buff_ind++;
1308
1309 if (buff_ind >= n + 1) buff_ind = 0;
1310 }
1311 }
1312
1313 if (pbuff != buff) mlib_free(pbuff);
1314 if (k != k_locl) mlib_free(k);
1315
1316 return MLIB_SUCCESS;
1317}
1318
1319/***************************************************************/