clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name mlib_ImageConvMxN_Fp.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjava -I /home/daniel/Projects/java/jdk/src/java.base/unix/native/libjava -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -D LIBC=gnu -D _GNU_SOURCE -D _REENTRANT -D _LARGEFILE64_SOURCE -D LINUX -D DEBUG -D _LITTLE_ENDIAN -D ARCH="amd64" -D amd64 -D _LP64=1 -D __USE_J2D_NAMES -D __MEDIALIB_OLD_NAMES -D MLIB_NO_LIBSUNMATH -D MLIB_OS64BIT -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image -I /home/daniel/Projects/java/jdk/src/java.desktop/share/native/common/awt/medialib -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/headers/java.desktop -D _FORTIFY_SOURCE=2 -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-unused-parameter -Wno-unused -Wno-unused-function -std=c99 -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c /home/daniel/Projects/java/jdk/src/java.desktop/share/native/libmlib_image/mlib_ImageConvMxN_Fp.c
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | |
42 | |
43 | |
44 | |
45 | |
46 | |
47 | |
48 | |
49 | |
50 | |
51 | |
52 | |
53 | |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | |
60 | |
61 | |
62 | |
63 | |
64 | |
65 | |
66 | |
67 | |
68 | |
69 | |
70 | |
71 | |
72 | |
73 | |
74 | |
75 | #include "mlib_image.h" |
76 | #include "mlib_ImageCheck.h" |
77 | #include "mlib_SysMath.h" |
78 | #include "mlib_ImageConv.h" |
79 | |
80 | |
81 | static void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, |
82 | const mlib_f32 *src, |
83 | const mlib_d64 *kernel, |
84 | mlib_s32 n, |
85 | mlib_s32 m, |
86 | mlib_s32 nch, |
87 | mlib_s32 dnch); |
88 | |
89 | static void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, |
90 | const mlib_f32 *src, |
91 | mlib_s32 n, |
92 | mlib_s32 nch, |
93 | mlib_s32 dx_l, |
94 | mlib_s32 dx_r); |
95 | |
96 | static void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, |
97 | const mlib_d64 *src, |
98 | const mlib_d64 *kernel, |
99 | mlib_s32 n, |
100 | mlib_s32 m, |
101 | mlib_s32 nch, |
102 | mlib_s32 dnch); |
103 | |
104 | static void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, |
105 | const mlib_d64 *src, |
106 | mlib_s32 n, |
107 | mlib_s32 nch, |
108 | mlib_s32 dx_l, |
109 | mlib_s32 dx_r); |
110 | |
111 | |
112 | #if 0 |
113 | static void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, |
114 | mlib_f32 *vdst, |
115 | const mlib_f32 *src, |
116 | const mlib_d64 *hfilter, |
117 | const mlib_d64 *vfilter, |
118 | mlib_s32 n, |
119 | mlib_s32 m, |
120 | mlib_s32 nch, |
121 | mlib_s32 dnch); |
122 | |
123 | static void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, |
124 | mlib_d64 *vdst, |
125 | const mlib_d64 *src, |
126 | const mlib_d64 *hfilter, |
127 | const mlib_d64 *vfilter, |
128 | mlib_s32 n, |
129 | mlib_s32 m, |
130 | mlib_s32 nch, |
131 | mlib_s32 dnch); |
132 | #endif /* 0 */ |
133 | |
134 | |
135 | mlib_status mlib_ImageConvMxN_Fp(mlib_image *dst, |
136 | const mlib_image *src, |
137 | const mlib_d64 *kernel, |
138 | mlib_s32 m, |
139 | mlib_s32 n, |
140 | mlib_s32 dm, |
141 | mlib_s32 dn, |
142 | mlib_s32 cmask, |
143 | mlib_edge edge) |
144 | { |
145 | mlib_type type; |
146 | |
147 | MLIB_IMAGE_CHECK(dst); |
148 | type = mlib_ImageGetType(dst); |
149 | |
150 | if (type != MLIB_FLOAT && type != MLIB_DOUBLE) |
151 | return MLIB_FAILURE; |
152 | |
153 | return mlib_ImageConvMxN_f(dst, src, kernel, m, n, dm, dn, 0, cmask, edge); |
154 | } |
155 | |
156 | |
157 | void mlib_ImageConvMxNMulAdd_F32(mlib_f32 *dst, |
158 | const mlib_f32 *src, |
159 | const mlib_d64 *kernel, |
160 | mlib_s32 n, |
161 | mlib_s32 m, |
162 | mlib_s32 nch, |
163 | mlib_s32 dnch) |
164 | { |
165 | mlib_f32 *hdst1 = dst + dnch; |
166 | mlib_s32 i, j; |
167 | |
168 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { |
| 25 | | Assuming the condition is false | |
|
| 26 | | Loop condition is false. Execution continues on line 191 | |
|
169 | const mlib_f32 *src2 = src + 2 * nch; |
170 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
171 | mlib_f32 hval1 = (mlib_f32) kernel[1]; |
172 | mlib_f32 hval2 = (mlib_f32) kernel[2]; |
173 | mlib_f32 val0 = src[0]; |
174 | mlib_f32 val1 = src[nch]; |
175 | mlib_f32 hdvl = dst[0]; |
176 | |
177 | for (i = 0; i < n; i++) { |
178 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
179 | mlib_f32 val2 = src2[i * nch]; |
180 | |
181 | hdvl = hdst1[i * dnch]; |
182 | hdvl0 += val1 * hval1; |
183 | hdvl0 += val2 * hval2; |
184 | val0 = val1; |
185 | val1 = val2; |
186 | |
187 | dst[i * dnch] = hdvl0; |
188 | } |
189 | } |
190 | |
191 | if (j < m - 1) { |
| 27 | | Assuming the condition is false | |
|
| |
192 | const mlib_f32 *src2 = src + 2 * nch; |
193 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
194 | mlib_f32 hval1 = (mlib_f32) kernel[1]; |
195 | mlib_f32 val0 = src[0]; |
196 | mlib_f32 val1 = src[nch]; |
197 | mlib_f32 hdvl = dst[0]; |
198 | for (i = 0; i < n; i++) { |
199 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
200 | mlib_f32 val2 = src2[i * nch]; |
201 | |
202 | hdvl = hdst1[i * dnch]; |
203 | hdvl0 += val1 * hval1; |
204 | val0 = val1; |
205 | val1 = val2; |
206 | |
207 | dst[i * dnch] = hdvl0; |
208 | } |
209 | |
210 | } |
211 | else if (j < m) { |
| |
| |
212 | const mlib_f32 *src2 = src + 2 * nch; |
213 | mlib_f32 hval0 = (mlib_f32) kernel[0]; |
214 | mlib_f32 val0 = src[0]; |
215 | mlib_f32 val1 = src[nch]; |
| 31 | | Assigned value is garbage or undefined |
|
216 | mlib_f32 hdvl = dst[0]; |
217 | |
218 | for (i = 0; i < n; i++) { |
219 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
220 | mlib_f32 val2 = src2[i * nch]; |
221 | |
222 | hdvl = hdst1[i * dnch]; |
223 | val0 = val1; |
224 | val1 = val2; |
225 | |
226 | dst[i * dnch] = hdvl0; |
227 | } |
228 | } |
229 | } |
230 | |
231 | |
232 | void mlib_ImageConvMxNF322F32_ext(mlib_f32 *dst, |
233 | const mlib_f32 *src, |
234 | mlib_s32 n, |
235 | mlib_s32 nch, |
236 | mlib_s32 dx_l, |
237 | mlib_s32 dx_r) |
238 | { |
239 | mlib_s32 i; |
240 | mlib_f32 val = src[0]; |
241 | |
242 | for (i = 0; i < dx_l; i++) |
| 14 | | Assuming 'i' is >= 'dx_l' | |
|
| 15 | | Loop condition is false. Execution continues on line 244 | |
|
243 | dst[i] = val; |
244 | for (; i < n - dx_r; i++) |
| 16 | | Assuming the condition is false | |
|
| 17 | | Loop condition is false. Execution continues on line 246 | |
|
245 | dst[i] = src[nch * (i - dx_l)]; |
246 | val = dst[n - dx_r - 1]; |
247 | for (; i < n; i++) |
| |
| 19 | | Loop condition is true. Entering loop body | |
|
| |
| 21 | | Loop condition is false. Execution continues on line 247 | |
|
248 | dst[i] = val; |
249 | } |
250 | |
251 | |
252 | mlib_status mlib_convMxNext_f32(mlib_image *dst, |
253 | const mlib_image *src, |
254 | const mlib_d64 *kernel, |
255 | mlib_s32 m, |
256 | mlib_s32 n, |
257 | mlib_s32 dx_l, |
258 | mlib_s32 dx_r, |
259 | mlib_s32 dy_t, |
260 | mlib_s32 dy_b, |
261 | mlib_s32 cmask) |
262 | { |
263 | mlib_d64 dspace[1024], *dsa = dspace; |
264 | mlib_s32 wid_e = mlib_ImageGetWidth(src); |
265 | mlib_f32 *fsa; |
266 | mlib_f32 *da = mlib_ImageGetData(dst); |
267 | mlib_f32 *sa = mlib_ImageGetData(src); |
268 | mlib_s32 dlb = mlib_ImageGetStride(dst) >> 2; |
269 | mlib_s32 slb = mlib_ImageGetStride(src) >> 2; |
270 | mlib_s32 dw = mlib_ImageGetWidth(dst); |
271 | mlib_s32 dh = mlib_ImageGetHeight(dst); |
272 | mlib_s32 nch = mlib_ImageGetChannels(dst); |
273 | mlib_s32 i, j, j1, k; |
274 | |
275 | if (3 * wid_e + m > 1024) { |
| 1 | Assuming the condition is false | |
|
| |
276 | dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); |
277 | |
278 | if (dsa == NULL) |
279 | return MLIB_FAILURE; |
280 | } |
281 | |
282 | fsa = (mlib_f32 *) dsa; |
283 | |
284 | for (j = 0; j < dh; j++, da += dlb) { |
| |
| 4 | | Loop condition is true. Entering loop body | |
|
285 | for (k = 0; k < nch; k++) |
| |
| 6 | | Loop condition is true. Entering loop body | |
|
286 | if (cmask & (1 << (nch - 1 - k))) { |
| 7 | | Assuming the condition is true | |
|
| |
287 | const mlib_f32 *sa1 = sa + k; |
288 | mlib_f32 *da1 = da + k; |
289 | const mlib_d64 *kernel1 = kernel; |
290 | |
291 | for (i = 0; i < dw; i++) |
| |
| 10 | | Loop condition is false. Execution continues on line 293 | |
|
292 | da1[i * nch] = 0.f; |
293 | for (j1 = 0; j1 < n; j1++, kernel1 += m) { |
| |
| 12 | | Loop condition is true. Entering loop body | |
|
294 | mlib_ImageConvMxNF322F32_ext(fsa, sa1, dw + m - 1, nch, dx_l, dx_r); |
| 13 | | Calling 'mlib_ImageConvMxNF322F32_ext' | |
|
| 22 | | Returning from 'mlib_ImageConvMxNF322F32_ext' | |
|
295 | mlib_ImageConvMxNMulAdd_F32(da1, fsa, kernel1, dw, m, 1, nch); |
| 23 | | Passing the value 1 via 6th parameter 'nch' | |
|
| 24 | | Calling 'mlib_ImageConvMxNMulAdd_F32' | |
|
296 | |
297 | if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) |
298 | sa1 += slb; |
299 | } |
300 | } |
301 | |
302 | if ((j >= dy_t) && (j < dh + n - dy_b - 2)) |
303 | sa += slb; |
304 | } |
305 | |
306 | if (dsa != dspace) |
307 | mlib_free(dsa); |
308 | return MLIB_SUCCESS; |
309 | } |
310 | |
311 | |
312 | #if 0 |
313 | |
314 | void mlib_ImageConvMxNMulAdd2_F32(mlib_f32 *hdst, |
315 | mlib_f32 *vdst, |
316 | const mlib_f32 *src, |
317 | const mlib_d64 *hfilter, |
318 | const mlib_d64 *vfilter, |
319 | mlib_s32 n, |
320 | mlib_s32 m, |
321 | mlib_s32 nch, |
322 | mlib_s32 dnch) |
323 | { |
324 | mlib_f32 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; |
325 | mlib_s32 i, j; |
326 | |
327 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { |
328 | mlib_f32 *src2 = src + 2 * nch; |
329 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
330 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
331 | mlib_f32 hval1 = (mlib_f32) hfilter[1]; |
332 | mlib_f32 vval1 = (mlib_f32) vfilter[1]; |
333 | mlib_f32 hval2 = (mlib_f32) hfilter[2]; |
334 | mlib_f32 vval2 = (mlib_f32) vfilter[2]; |
335 | mlib_f32 val0 = src[0]; |
336 | mlib_f32 val1 = src[nch]; |
337 | mlib_f32 hdvl = hdst[0]; |
338 | mlib_f32 vdvl = vdst[0]; |
339 | |
340 | for (i = 0; i < n; i++) { |
341 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
342 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
343 | mlib_f32 val2 = src2[i * nch]; |
344 | |
345 | hdvl = hdst1[i * dnch]; |
346 | vdvl = vdst1[i * dnch]; |
347 | hdvl0 += val1 * hval1; |
348 | vdvl0 += val1 * vval1; |
349 | hdvl0 += val2 * hval2; |
350 | vdvl0 += val2 * vval2; |
351 | val0 = val1; |
352 | val1 = val2; |
353 | |
354 | hdst[i * dnch] = hdvl0; |
355 | vdst[i * dnch] = vdvl0; |
356 | } |
357 | } |
358 | |
359 | if (j < m - 1) { |
360 | mlib_f32 *src2 = src + 2 * nch; |
361 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
362 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
363 | mlib_f32 hval1 = (mlib_f32) hfilter[1]; |
364 | mlib_f32 vval1 = (mlib_f32) vfilter[1]; |
365 | mlib_f32 val0 = src[0]; |
366 | mlib_f32 val1 = src[nch]; |
367 | mlib_f32 hdvl = hdst[0]; |
368 | mlib_f32 vdvl = vdst[0]; |
369 | |
370 | for (i = 0; i < n; i++) { |
371 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
372 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
373 | mlib_f32 val2 = src2[i * nch]; |
374 | |
375 | hdvl = hdst1[i * dnch]; |
376 | vdvl = vdst1[i * dnch]; |
377 | hdvl0 += val1 * hval1; |
378 | vdvl0 += val1 * vval1; |
379 | val0 = val1; |
380 | val1 = val2; |
381 | |
382 | hdst[i * dnch] = hdvl0; |
383 | vdst[i * dnch] = vdvl0; |
384 | } |
385 | |
386 | } |
387 | else if (j < m) { |
388 | mlib_f32 *src2 = src + 2 * nch; |
389 | mlib_f32 hval0 = (mlib_f32) hfilter[0]; |
390 | mlib_f32 vval0 = (mlib_f32) vfilter[0]; |
391 | mlib_f32 val0 = src[0]; |
392 | mlib_f32 val1 = src[nch]; |
393 | mlib_f32 hdvl = hdst[0]; |
394 | mlib_f32 vdvl = vdst[0]; |
395 | |
396 | for (i = 0; i < n; i++) { |
397 | mlib_f32 hdvl0 = val0 * hval0 + hdvl; |
398 | mlib_f32 vdvl0 = val0 * vval0 + vdvl; |
399 | mlib_f32 val2 = src2[i * nch]; |
400 | |
401 | hdvl = hdst1[i * dnch]; |
402 | vdvl = vdst1[i * dnch]; |
403 | val0 = val1; |
404 | val1 = val2; |
405 | |
406 | hdst[i * dnch] = hdvl0; |
407 | vdst[i * dnch] = vdvl0; |
408 | } |
409 | } |
410 | } |
411 | |
412 | |
413 | void mlib_ImageConvMxNMulAdd2_D64(mlib_d64 *hdst, |
414 | mlib_d64 *vdst, |
415 | const mlib_d64 *src, |
416 | const mlib_d64 *hfilter, |
417 | const mlib_d64 *vfilter, |
418 | mlib_s32 n, |
419 | mlib_s32 m, |
420 | mlib_s32 nch, |
421 | mlib_s32 dnch) |
422 | { |
423 | mlib_d64 *hdst1 = hdst + dnch, *vdst1 = vdst + dnch; |
424 | mlib_s32 i, j; |
425 | |
426 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, hfilter += 3, vfilter += 3) { |
427 | mlib_d64 *src2 = src + 2 * nch; |
428 | mlib_d64 hval0 = hfilter[0]; |
429 | mlib_d64 vval0 = vfilter[0]; |
430 | mlib_d64 hval1 = hfilter[1]; |
431 | mlib_d64 vval1 = vfilter[1]; |
432 | mlib_d64 hval2 = hfilter[2]; |
433 | mlib_d64 vval2 = vfilter[2]; |
434 | mlib_d64 val0 = src[0]; |
435 | mlib_d64 val1 = src[nch]; |
436 | mlib_d64 hdvl = hdst[0]; |
437 | mlib_d64 vdvl = vdst[0]; |
438 | |
439 | for (i = 0; i < n; i++) { |
440 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
441 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
442 | mlib_d64 val2 = src2[i * nch]; |
443 | |
444 | hdvl = hdst1[i * dnch]; |
445 | vdvl = vdst1[i * dnch]; |
446 | hdvl0 += val1 * hval1; |
447 | vdvl0 += val1 * vval1; |
448 | hdvl0 += val2 * hval2; |
449 | vdvl0 += val2 * vval2; |
450 | val0 = val1; |
451 | val1 = val2; |
452 | |
453 | hdst[i * dnch] = hdvl0; |
454 | vdst[i * dnch] = vdvl0; |
455 | } |
456 | } |
457 | |
458 | if (j < m - 1) { |
459 | mlib_d64 *src2 = src + 2 * nch; |
460 | mlib_d64 hval0 = hfilter[0]; |
461 | mlib_d64 vval0 = vfilter[0]; |
462 | mlib_d64 hval1 = hfilter[1]; |
463 | mlib_d64 vval1 = vfilter[1]; |
464 | mlib_d64 val0 = src[0]; |
465 | mlib_d64 val1 = src[nch]; |
466 | mlib_d64 hdvl = hdst[0]; |
467 | mlib_d64 vdvl = vdst[0]; |
468 | |
469 | for (i = 0; i < n; i++) { |
470 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
471 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
472 | mlib_d64 val2 = src2[i * nch]; |
473 | |
474 | hdvl = hdst1[i * dnch]; |
475 | vdvl = vdst1[i * dnch]; |
476 | hdvl0 += val1 * hval1; |
477 | vdvl0 += val1 * vval1; |
478 | val0 = val1; |
479 | val1 = val2; |
480 | |
481 | hdst[i * dnch] = hdvl0; |
482 | vdst[i * dnch] = vdvl0; |
483 | } |
484 | |
485 | } |
486 | else if (j < m) { |
487 | mlib_d64 *src2 = src + 2 * nch; |
488 | mlib_d64 hval0 = hfilter[0]; |
489 | mlib_d64 vval0 = vfilter[0]; |
490 | mlib_d64 val0 = src[0]; |
491 | mlib_d64 val1 = src[nch]; |
492 | mlib_d64 hdvl = hdst[0]; |
493 | mlib_d64 vdvl = vdst[0]; |
494 | |
495 | for (i = 0; i < n; i++) { |
496 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
497 | mlib_d64 vdvl0 = val0 * vval0 + vdvl; |
498 | mlib_d64 val2 = src2[i * nch]; |
499 | |
500 | hdvl = hdst1[i * dnch]; |
501 | vdvl = vdst1[i * dnch]; |
502 | val0 = val1; |
503 | val1 = val2; |
504 | |
505 | hdst[i * dnch] = hdvl0; |
506 | vdst[i * dnch] = vdvl0; |
507 | } |
508 | } |
509 | } |
510 | |
511 | #endif /* 0 */ |
512 | |
513 | |
514 | void mlib_ImageConvMxNMulAdd_D64(mlib_d64 *dst, |
515 | const mlib_d64 *src, |
516 | const mlib_d64 *kernel, |
517 | mlib_s32 n, |
518 | mlib_s32 m, |
519 | mlib_s32 nch, |
520 | mlib_s32 dnch) |
521 | { |
522 | mlib_d64 *hdst1 = dst + dnch; |
523 | mlib_s32 i, j; |
524 | |
525 | for (j = 0; j < m - 2; j += 3, src += 3 * nch, kernel += 3) { |
526 | const mlib_d64 *src2 = src + 2 * nch; |
527 | mlib_d64 hval0 = kernel[0]; |
528 | mlib_d64 hval1 = kernel[1]; |
529 | mlib_d64 hval2 = kernel[2]; |
530 | mlib_d64 val0 = src[0]; |
531 | mlib_d64 val1 = src[nch]; |
532 | mlib_d64 hdvl = dst[0]; |
533 | |
534 | for (i = 0; i < n; i++) { |
535 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
536 | mlib_d64 val2 = src2[i * nch]; |
537 | |
538 | hdvl = hdst1[i * dnch]; |
539 | hdvl0 += val1 * hval1; |
540 | hdvl0 += val2 * hval2; |
541 | val0 = val1; |
542 | val1 = val2; |
543 | |
544 | dst[i * dnch] = hdvl0; |
545 | } |
546 | } |
547 | |
548 | if (j < m - 1) { |
549 | const mlib_d64 *src2 = src + 2 * nch; |
550 | mlib_d64 hval0 = kernel[0]; |
551 | mlib_d64 hval1 = kernel[1]; |
552 | mlib_d64 val0 = src[0]; |
553 | mlib_d64 val1 = src[nch]; |
554 | mlib_d64 hdvl = dst[0]; |
555 | |
556 | for (i = 0; i < n; i++) { |
557 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
558 | mlib_d64 val2 = src2[i * nch]; |
559 | |
560 | hdvl = hdst1[i * dnch]; |
561 | hdvl0 += val1 * hval1; |
562 | val0 = val1; |
563 | val1 = val2; |
564 | |
565 | dst[i * dnch] = hdvl0; |
566 | } |
567 | |
568 | } |
569 | else if (j < m) { |
570 | const mlib_d64 *src2 = src + 2 * nch; |
571 | mlib_d64 hval0 = kernel[0]; |
572 | mlib_d64 val0 = src[0]; |
573 | mlib_d64 val1 = src[nch]; |
574 | mlib_d64 hdvl = dst[0]; |
575 | |
576 | for (i = 0; i < n; i++) { |
577 | mlib_d64 hdvl0 = val0 * hval0 + hdvl; |
578 | mlib_d64 val2 = src2[i * nch]; |
579 | |
580 | hdvl = hdst1[i * dnch]; |
581 | val0 = val1; |
582 | val1 = val2; |
583 | |
584 | dst[i * dnch] = hdvl0; |
585 | } |
586 | } |
587 | } |
588 | |
589 | |
590 | void mlib_ImageConvMxND642D64_ext(mlib_d64 *dst, |
591 | const mlib_d64 *src, |
592 | mlib_s32 n, |
593 | mlib_s32 nch, |
594 | mlib_s32 dx_l, |
595 | mlib_s32 dx_r) |
596 | { |
597 | mlib_s32 i; |
598 | mlib_d64 val = src[0]; |
599 | |
600 | for (i = 0; i < dx_l; i++) |
601 | dst[i] = val; |
602 | for (; i < n - dx_r; i++) |
603 | dst[i] = src[nch * (i - dx_l)]; |
604 | val = dst[n - dx_r - 1]; |
605 | for (; i < n; i++) |
606 | dst[i] = val; |
607 | } |
608 | |
609 | |
610 | mlib_status mlib_convMxNext_d64(mlib_image *dst, |
611 | const mlib_image *src, |
612 | const mlib_d64 *kernel, |
613 | mlib_s32 m, |
614 | mlib_s32 n, |
615 | mlib_s32 dx_l, |
616 | mlib_s32 dx_r, |
617 | mlib_s32 dy_t, |
618 | mlib_s32 dy_b, |
619 | mlib_s32 cmask) |
620 | { |
621 | mlib_d64 dspace[1024], *dsa = dspace; |
622 | mlib_s32 wid_e = mlib_ImageGetWidth(src); |
623 | mlib_d64 *da = mlib_ImageGetData(dst); |
624 | mlib_d64 *sa = mlib_ImageGetData(src); |
625 | mlib_s32 dlb = mlib_ImageGetStride(dst) >> 3; |
626 | mlib_s32 slb = mlib_ImageGetStride(src) >> 3; |
627 | mlib_s32 dw = mlib_ImageGetWidth(dst); |
628 | mlib_s32 dh = mlib_ImageGetHeight(dst); |
629 | mlib_s32 nch = mlib_ImageGetChannels(dst); |
630 | mlib_s32 i, j, j1, k; |
631 | |
632 | if (3 * wid_e + m > 1024) { |
633 | dsa = mlib_malloc((3 * wid_e + m) * sizeof(mlib_d64)); |
634 | |
635 | if (dsa == NULL) |
636 | return MLIB_FAILURE; |
637 | } |
638 | |
639 | for (j = 0; j < dh; j++, da += dlb) { |
640 | for (k = 0; k < nch; k++) |
641 | if (cmask & (1 << (nch - 1 - k))) { |
642 | mlib_d64 *sa1 = sa + k; |
643 | mlib_d64 *da1 = da + k; |
644 | const mlib_d64 *kernel1 = kernel; |
645 | |
646 | for (i = 0; i < dw; i++) |
647 | da1[i * nch] = 0.; |
648 | for (j1 = 0; j1 < n; j1++, kernel1 += m) { |
649 | mlib_ImageConvMxND642D64_ext(dsa, sa1, dw + m - 1, nch, dx_l, dx_r); |
650 | mlib_ImageConvMxNMulAdd_D64(da1, dsa, kernel1, dw, m, 1, nch); |
651 | |
652 | if ((j + j1 >= dy_t) && (j + j1 < dh + n - dy_b - 2)) |
653 | sa1 += slb; |
654 | } |
655 | } |
656 | |
657 | if ((j >= dy_t) && (j < dh + n - dy_b - 2)) |
658 | sa += slb; |
659 | } |
660 | |
661 | if (dsa != dspace) |
662 | mlib_free(dsa); |
663 | return MLIB_SUCCESS; |
664 | } |
665 | |
666 | |