Bug Summary

File:jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Warning:line 1577, column 13
Value stored to 'ucme_exit_pc' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name stubGenerator_x86_64.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
1/*
2 * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "ci/ciUtilities.hpp"
29#include "compiler/oopMap.hpp"
30#include "gc/shared/barrierSet.hpp"
31#include "gc/shared/barrierSetAssembler.hpp"
32#include "gc/shared/barrierSetNMethod.hpp"
33#include "gc/shared/gc_globals.hpp"
34#include "interpreter/interpreter.hpp"
35#include "memory/universe.hpp"
36#include "nativeInst_x86.hpp"
37#include "oops/instanceOop.hpp"
38#include "oops/method.hpp"
39#include "oops/objArrayKlass.hpp"
40#include "oops/oop.inline.hpp"
41#include "prims/methodHandles.hpp"
42#include "runtime/arguments.hpp"
43#include "runtime/frame.inline.hpp"
44#include "runtime/handles.inline.hpp"
45#include "runtime/sharedRuntime.hpp"
46#include "runtime/stubCodeGenerator.hpp"
47#include "runtime/stubRoutines.hpp"
48#include "runtime/thread.inline.hpp"
49#ifdef COMPILER21
50#include "opto/runtime.hpp"
51#endif
52#if INCLUDE_JVMCI1
53#include "jvmci/jvmci_globals.hpp"
54#endif
55#if INCLUDE_ZGC1
56#include "gc/z/zThreadLocalData.hpp"
57#endif
58
59// Declaration and definition of StubGenerator (no .hpp file).
60// For a more detailed description of the stub routine structure
61// see the comment in stubRoutines.hpp
62
63#define __masm-> _masm->
64#define TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8) (UseCompressedOops ? Address::times_4 : Address::times_8)
65#define a__((Assembler*)_masm)-> ((Assembler*)_masm)->
66
67#ifdef PRODUCT
68#define BLOCK_COMMENT(str)masm-> block_comment(str) /* nothing */
69#else
70#define BLOCK_COMMENT(str)masm-> block_comment(str) __masm-> block_comment(str)
71#endif
72
73#define BIND(label)bind(label); masm-> block_comment("label" ":") bind(label); BLOCK_COMMENT(#label ":")masm-> block_comment(#label ":")
74const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
75
76// Stub Code definitions
77
78class StubGenerator: public StubCodeGenerator {
79 private:
80
81#ifdef PRODUCT
82#define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_
(counter);
((void)0)
83#else
84 void inc_counter_np_(int& counter) {
85 // This can destroy rscratch1 if counter is far from the code cache
86 __masm-> incrementl(ExternalAddress((address)&counter));
87 }
88#define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_
(counter);
\
89 BLOCK_COMMENT("inc_counter " #counter)masm-> block_comment("inc_counter " #counter); \
90 inc_counter_np_(counter);
91#endif
92
93 // Call stubs are used to call Java from C
94 //
95 // Linux Arguments:
96 // c_rarg0: call wrapper address address
97 // c_rarg1: result address
98 // c_rarg2: result type BasicType
99 // c_rarg3: method Method*
100 // c_rarg4: (interpreter) entry point address
101 // c_rarg5: parameters intptr_t*
102 // 16(rbp): parameter size (in words) int
103 // 24(rbp): thread Thread*
104 //
105 // [ return_from_Java ] <--- rsp
106 // [ argument word n ]
107 // ...
108 // -12 [ argument word 1 ]
109 // -11 [ saved r15 ] <--- rsp_after_call
110 // -10 [ saved r14 ]
111 // -9 [ saved r13 ]
112 // -8 [ saved r12 ]
113 // -7 [ saved rbx ]
114 // -6 [ call wrapper ]
115 // -5 [ result ]
116 // -4 [ result type ]
117 // -3 [ method ]
118 // -2 [ entry point ]
119 // -1 [ parameters ]
120 // 0 [ saved rbp ] <--- rbp
121 // 1 [ return address ]
122 // 2 [ parameter size ]
123 // 3 [ thread ]
124 //
125 // Windows Arguments:
126 // c_rarg0: call wrapper address address
127 // c_rarg1: result address
128 // c_rarg2: result type BasicType
129 // c_rarg3: method Method*
130 // 48(rbp): (interpreter) entry point address
131 // 56(rbp): parameters intptr_t*
132 // 64(rbp): parameter size (in words) int
133 // 72(rbp): thread Thread*
134 //
135 // [ return_from_Java ] <--- rsp
136 // [ argument word n ]
137 // ...
138 // -60 [ argument word 1 ]
139 // -59 [ saved xmm31 ] <--- rsp after_call
140 // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank)
141 // -27 [ saved xmm15 ]
142 // [ saved xmm7-xmm14 ]
143 // -9 [ saved xmm6 ] (each xmm register takes 2 slots)
144 // -7 [ saved r15 ]
145 // -6 [ saved r14 ]
146 // -5 [ saved r13 ]
147 // -4 [ saved r12 ]
148 // -3 [ saved rdi ]
149 // -2 [ saved rsi ]
150 // -1 [ saved rbx ]
151 // 0 [ saved rbp ] <--- rbp
152 // 1 [ return address ]
153 // 2 [ call wrapper ]
154 // 3 [ result ]
155 // 4 [ result type ]
156 // 5 [ method ]
157 // 6 [ entry point ]
158 // 7 [ parameters ]
159 // 8 [ parameter size ]
160 // 9 [ thread ]
161 //
162 // Windows reserves the callers stack space for arguments 1-4.
163 // We spill c_rarg0-c_rarg3 to this space.
164
165 // Call stub stack layout word offsets from rbp
166 enum call_stub_layout {
167#ifdef _WIN64
168 xmm_save_first = 6, // save from xmm6
169 xmm_save_last = 31, // to xmm31
170 xmm_save_base = -9,
171 rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
172 r15_off = -7,
173 r14_off = -6,
174 r13_off = -5,
175 r12_off = -4,
176 rdi_off = -3,
177 rsi_off = -2,
178 rbx_off = -1,
179 rbp_off = 0,
180 retaddr_off = 1,
181 call_wrapper_off = 2,
182 result_off = 3,
183 result_type_off = 4,
184 method_off = 5,
185 entry_point_off = 6,
186 parameters_off = 7,
187 parameter_size_off = 8,
188 thread_off = 9
189#else
190 rsp_after_call_off = -12,
191 mxcsr_off = rsp_after_call_off,
192 r15_off = -11,
193 r14_off = -10,
194 r13_off = -9,
195 r12_off = -8,
196 rbx_off = -7,
197 call_wrapper_off = -6,
198 result_off = -5,
199 result_type_off = -4,
200 method_off = -3,
201 entry_point_off = -2,
202 parameters_off = -1,
203 rbp_off = 0,
204 retaddr_off = 1,
205 parameter_size_off = 2,
206 thread_off = 3
207#endif
208 };
209
210#ifdef _WIN64
211 Address xmm_save(int reg) {
212 assert(reg >= xmm_save_first && reg <= xmm_save_last, "XMM register number out of range")do { if (!(reg >= xmm_save_first && reg <= xmm_save_last
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 212, "assert(" "reg >= xmm_save_first && reg <= xmm_save_last"
") failed", "XMM register number out of range"); ::breakpoint
(); } } while (0)
;
213 return Address(rbp, (xmm_save_base - (reg - xmm_save_first) * 2) * wordSize);
214 }
215#endif
216
217 address generate_call_stub(address& return_address) {
218 assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
219 (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
220 "adjust this code")do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
;
221 StubCodeMark mark(this, "StubRoutines", "call_stub");
222 address start = __masm-> pc();
223
224 // same as in generate_catch_exception()!
225 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
226
227 const Address call_wrapper (rbp, call_wrapper_off * wordSize);
228 const Address result (rbp, result_off * wordSize);
229 const Address result_type (rbp, result_type_off * wordSize);
230 const Address method (rbp, method_off * wordSize);
231 const Address entry_point (rbp, entry_point_off * wordSize);
232 const Address parameters (rbp, parameters_off * wordSize);
233 const Address parameter_size(rbp, parameter_size_off * wordSize);
234
235 // same as in generate_catch_exception()!
236 const Address thread (rbp, thread_off * wordSize);
237
238 const Address r15_save(rbp, r15_off * wordSize);
239 const Address r14_save(rbp, r14_off * wordSize);
240 const Address r13_save(rbp, r13_off * wordSize);
241 const Address r12_save(rbp, r12_off * wordSize);
242 const Address rbx_save(rbp, rbx_off * wordSize);
243
244 // stub code
245 __masm-> enter();
246 __masm-> subptr(rsp, -rsp_after_call_off * wordSize);
247
248 // save register parameters
249#ifndef _WIN64
250 __masm-> movptr(parameters, c_rarg5); // parameters
251 __masm-> movptr(entry_point, c_rarg4); // entry_point
252#endif
253
254 __masm-> movptr(method, c_rarg3); // method
255 __masm-> movl(result_type, c_rarg2); // result type
256 __masm-> movptr(result, c_rarg1); // result
257 __masm-> movptr(call_wrapper, c_rarg0); // call wrapper
258
259 // save regs belonging to calling function
260 __masm-> movptr(rbx_save, rbx);
261 __masm-> movptr(r12_save, r12);
262 __masm-> movptr(r13_save, r13);
263 __masm-> movptr(r14_save, r14);
264 __masm-> movptr(r15_save, r15);
265
266#ifdef _WIN64
267 int last_reg = 15;
268 if (UseAVX > 2) {
269 last_reg = 31;
270 }
271 if (VM_Version::supports_evex()) {
272 for (int i = xmm_save_first; i <= last_reg; i++) {
273 __masm-> vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
274 }
275 } else {
276 for (int i = xmm_save_first; i <= last_reg; i++) {
277 __masm-> movdqu(xmm_save(i), as_XMMRegister(i));
278 }
279 }
280
281 const Address rdi_save(rbp, rdi_off * wordSize);
282 const Address rsi_save(rbp, rsi_off * wordSize);
283
284 __masm-> movptr(rsi_save, rsi);
285 __masm-> movptr(rdi_save, rdi);
286#else
287 const Address mxcsr_save(rbp, mxcsr_off * wordSize);
288 {
289 Label skip_ldmx;
290 __masm-> stmxcsr(mxcsr_save);
291 __masm-> movl(rax, mxcsr_save);
292 __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits
293 ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
294 __masm-> cmp32(rax, mxcsr_std);
295 __masm-> jcc(Assembler::equal, skip_ldmx);
296 __masm-> ldmxcsr(mxcsr_std);
297 __masm-> bind(skip_ldmx);
298 }
299#endif
300
301 // Load up thread register
302 __masm-> movptr(r15_thread, thread);
303 __masm-> reinit_heapbase();
304
305#ifdef ASSERT1
306 // make sure we have no pending exceptions
307 {
308 Label L;
309 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L);
310 __masm-> jcc(Assembler::equal, L);
311 __masm-> stop("StubRoutines::call_stub: entered with pending exception");
312 __masm-> bind(L);
313 }
314#endif
315
316 // pass parameters if any
317 BLOCK_COMMENT("pass parameters if any")masm-> block_comment("pass parameters if any");
318 Label parameters_done;
319 __masm-> movl(c_rarg3, parameter_size);
320 __masm-> testl(c_rarg3, c_rarg3);
321 __masm-> jcc(Assembler::zero, parameters_done);
322
323 Label loop;
324 __masm-> movptr(c_rarg2, parameters); // parameter pointer
325 __masm-> movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1
326 __masm-> BIND(loop)bind(loop); masm-> block_comment("loop" ":");
327 __masm-> movptr(rax, Address(c_rarg2, 0));// get parameter
328 __masm-> addptr(c_rarg2, wordSize); // advance to next parameter
329 __masm-> decrementl(c_rarg1); // decrement counter
330 __masm-> push(rax); // pass parameter
331 __masm-> jcc(Assembler::notZero, loop);
332
333 // call Java function
334 __masm-> BIND(parameters_done)bind(parameters_done); masm-> block_comment("parameters_done"
":")
;
335 __masm-> movptr(rbx, method); // get Method*
336 __masm-> movptr(c_rarg1, entry_point); // get entry_point
337 __masm-> mov(r13, rsp); // set sender sp
338 BLOCK_COMMENT("call Java function")masm-> block_comment("call Java function");
339 __masm-> call(c_rarg1);
340
341 BLOCK_COMMENT("call_stub_return_address:")masm-> block_comment("call_stub_return_address:");
342 return_address = __masm-> pc();
343
344 // store result depending on type (everything that is not
345 // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
346 __masm-> movptr(c_rarg0, result);
347 Label is_long, is_float, is_double, exit;
348 __masm-> movl(c_rarg1, result_type);
349 __masm-> cmpl(c_rarg1, T_OBJECT);
350 __masm-> jcc(Assembler::equal, is_long);
351 __masm-> cmpl(c_rarg1, T_LONG);
352 __masm-> jcc(Assembler::equal, is_long);
353 __masm-> cmpl(c_rarg1, T_FLOAT);
354 __masm-> jcc(Assembler::equal, is_float);
355 __masm-> cmpl(c_rarg1, T_DOUBLE);
356 __masm-> jcc(Assembler::equal, is_double);
357
358 // handle T_INT case
359 __masm-> movl(Address(c_rarg0, 0), rax);
360
361 __masm-> BIND(exit)bind(exit); masm-> block_comment("exit" ":");
362
363 // pop parameters
364 __masm-> lea(rsp, rsp_after_call);
365
366#ifdef ASSERT1
367 // verify that threads correspond
368 {
369 Label L1, L2, L3;
370 __masm-> cmpptr(r15_thread, thread);
371 __masm-> jcc(Assembler::equal, L1);
372 __masm-> stop("StubRoutines::call_stub: r15_thread is corrupted");
373 __masm-> bind(L1);
374 __masm-> get_thread(rbx);
375 __masm-> cmpptr(r15_thread, thread);
376 __masm-> jcc(Assembler::equal, L2);
377 __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call");
378 __masm-> bind(L2);
379 __masm-> cmpptr(r15_thread, rbx);
380 __masm-> jcc(Assembler::equal, L3);
381 __masm-> stop("StubRoutines::call_stub: threads must correspond");
382 __masm-> bind(L3);
383 }
384#endif
385
386 // restore regs belonging to calling function
387#ifdef _WIN64
388 // emit the restores for xmm regs
389 if (VM_Version::supports_evex()) {
390 for (int i = xmm_save_first; i <= last_reg; i++) {
391 __masm-> vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
392 }
393 } else {
394 for (int i = xmm_save_first; i <= last_reg; i++) {
395 __masm-> movdqu(as_XMMRegister(i), xmm_save(i));
396 }
397 }
398#endif
399 __masm-> movptr(r15, r15_save);
400 __masm-> movptr(r14, r14_save);
401 __masm-> movptr(r13, r13_save);
402 __masm-> movptr(r12, r12_save);
403 __masm-> movptr(rbx, rbx_save);
404
405#ifdef _WIN64
406 __masm-> movptr(rdi, rdi_save);
407 __masm-> movptr(rsi, rsi_save);
408#else
409 __masm-> ldmxcsr(mxcsr_save);
410#endif
411
412 // restore rsp
413 __masm-> addptr(rsp, -rsp_after_call_off * wordSize);
414
415 // return
416 __masm-> vzeroupper();
417 __masm-> pop(rbp);
418 __masm-> ret(0);
419
420 // handle return types different from T_INT
421 __masm-> BIND(is_long)bind(is_long); masm-> block_comment("is_long" ":");
422 __masm-> movq(Address(c_rarg0, 0), rax);
423 __masm-> jmp(exit);
424
425 __masm-> BIND(is_float)bind(is_float); masm-> block_comment("is_float" ":");
426 __masm-> movflt(Address(c_rarg0, 0), xmm0);
427 __masm-> jmp(exit);
428
429 __masm-> BIND(is_double)bind(is_double); masm-> block_comment("is_double" ":");
430 __masm-> movdbl(Address(c_rarg0, 0), xmm0);
431 __masm-> jmp(exit);
432
433 return start;
434 }
435
436 // Return point for a Java call if there's an exception thrown in
437 // Java code. The exception is caught and transformed into a
438 // pending exception stored in JavaThread that can be tested from
439 // within the VM.
440 //
441 // Note: Usually the parameters are removed by the callee. In case
442 // of an exception crossing an activation frame boundary, that is
443 // not the case if the callee is compiled code => need to setup the
444 // rsp.
445 //
446 // rax: exception oop
447
448 address generate_catch_exception() {
449 StubCodeMark mark(this, "StubRoutines", "catch_exception");
450 address start = __masm-> pc();
451
452 // same as in generate_call_stub():
453 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
454 const Address thread (rbp, thread_off * wordSize);
455
456#ifdef ASSERT1
457 // verify that threads correspond
458 {
459 Label L1, L2, L3;
460 __masm-> cmpptr(r15_thread, thread);
461 __masm-> jcc(Assembler::equal, L1);
462 __masm-> stop("StubRoutines::catch_exception: r15_thread is corrupted");
463 __masm-> bind(L1);
464 __masm-> get_thread(rbx);
465 __masm-> cmpptr(r15_thread, thread);
466 __masm-> jcc(Assembler::equal, L2);
467 __masm-> stop("StubRoutines::catch_exception: r15_thread is modified by call");
468 __masm-> bind(L2);
469 __masm-> cmpptr(r15_thread, rbx);
470 __masm-> jcc(Assembler::equal, L3);
471 __masm-> stop("StubRoutines::catch_exception: threads must correspond");
472 __masm-> bind(L3);
473 }
474#endif
475
476 // set pending exception
477 __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 477)
;
478
479 __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), rax);
480 __masm-> lea(rscratch1, ExternalAddress((address)__FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"));
481 __masm-> movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
482 __masm-> movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__482);
483
484 // complete return to VM
485 assert(StubRoutines::_call_stub_return_address != NULL,do { if (!(StubRoutines::_call_stub_return_address != __null)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 486, "assert(" "StubRoutines::_call_stub_return_address != __null"
") failed", "_call_stub_return_address must have been generated before"
); ::breakpoint(); } } while (0)
486 "_call_stub_return_address must have been generated before")do { if (!(StubRoutines::_call_stub_return_address != __null)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 486, "assert(" "StubRoutines::_call_stub_return_address != __null"
") failed", "_call_stub_return_address must have been generated before"
); ::breakpoint(); } } while (0)
;
487 __masm-> jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
488
489 return start;
490 }
491
492 // Continuation point for runtime calls returning with a pending
493 // exception. The pending exception check happened in the runtime
494 // or native call stub. The pending exception in Thread is
495 // converted into a Java-level exception.
496 //
497 // Contract with Java-level exception handlers:
498 // rax: exception
499 // rdx: throwing pc
500 //
501 // NOTE: At entry of this stub, exception-pc must be on stack !!
502
503 address generate_forward_exception() {
504 StubCodeMark mark(this, "StubRoutines", "forward exception");
505 address start = __masm-> pc();
506
507 // Upon entry, the sp points to the return address returning into
508 // Java (interpreted or compiled) code; i.e., the return address
509 // becomes the throwing pc.
510 //
511 // Arguments pushed before the runtime call are still on the stack
512 // but the exception handler will reset the stack pointer ->
513 // ignore them. A potential result in registers can be ignored as
514 // well.
515
516#ifdef ASSERT1
517 // make sure this code is only executed if there is a pending exception
518 {
519 Label L;
520 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL__null);
521 __masm-> jcc(Assembler::notEqual, L);
522 __masm-> stop("StubRoutines::forward exception: no pending exception (1)");
523 __masm-> bind(L);
524 }
525#endif
526
527 // compute exception handler into rbx
528 __masm-> movptr(c_rarg0, Address(rsp, 0));
529 BLOCK_COMMENT("call exception_handler_for_return_address")masm-> block_comment("call exception_handler_for_return_address"
)
;
530 __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime::exception_handler_for_return_address
)))
531 SharedRuntime::exception_handler_for_return_address)((address)((address_word)(SharedRuntime::exception_handler_for_return_address
)))
,
532 r15_thread, c_rarg0);
533 __masm-> mov(rbx, rax);
534
535 // setup rax & rdx, remove return address & clear pending exception
536 __masm-> pop(rdx);
537 __masm-> movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
538 __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L);
539
540#ifdef ASSERT1
541 // make sure exception is set
542 {
543 Label L;
544 __masm-> testptr(rax, rax);
545 __masm-> jcc(Assembler::notEqual, L);
546 __masm-> stop("StubRoutines::forward exception: no pending exception (2)");
547 __masm-> bind(L);
548 }
549#endif
550
551 // continue at exception handler (return address removed)
552 // rax: exception
553 // rbx: exception handler
554 // rdx: throwing pc
555 __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 555)
;
556 __masm-> jmp(rbx);
557
558 return start;
559 }
560
561 // Support for intptr_t OrderAccess::fence()
562 //
563 // Arguments :
564 //
565 // Result:
566 address generate_orderaccess_fence() {
567 StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
568 address start = __masm-> pc();
569 __masm-> membar(Assembler::StoreLoad);
570 __masm-> ret(0);
571
572 return start;
573 }
574
575
576 // Support for intptr_t get_previous_sp()
577 //
578 // This routine is used to find the previous stack pointer for the
579 // caller.
580 address generate_get_previous_sp() {
581 StubCodeMark mark(this, "StubRoutines", "get_previous_sp");
582 address start = __masm-> pc();
583
584 __masm-> movptr(rax, rsp);
585 __masm-> addptr(rax, 8); // return address is at the top of the stack.
586 __masm-> ret(0);
587
588 return start;
589 }
590
591 //----------------------------------------------------------------------------------------------------
592 // Support for void verify_mxcsr()
593 //
594 // This routine is used with -Xcheck:jni to verify that native
595 // JNI code does not return to Java code without restoring the
596 // MXCSR register to our expected state.
597
598 address generate_verify_mxcsr() {
599 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
600 address start = __masm-> pc();
601
602 const Address mxcsr_save(rsp, 0);
603
604 if (CheckJNICalls) {
605 Label ok_ret;
606 ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
607 __masm-> push(rax);
608 __masm-> subptr(rsp, wordSize); // allocate a temp location
609 __masm-> stmxcsr(mxcsr_save);
610 __masm-> movl(rax, mxcsr_save);
611 __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits
612 __masm-> cmp32(rax, mxcsr_std);
613 __masm-> jcc(Assembler::equal, ok_ret);
614
615 __masm-> warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
616
617 __masm-> ldmxcsr(mxcsr_std);
618
619 __masm-> bind(ok_ret);
620 __masm-> addptr(rsp, wordSize);
621 __masm-> pop(rax);
622 }
623
624 __masm-> ret(0);
625
626 return start;
627 }
628
629 address generate_f2i_fixup() {
630 StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
631 Address inout(rsp, 5 * wordSize); // return address + 4 saves
632
633 address start = __masm-> pc();
634
635 Label L;
636
637 __masm-> push(rax);
638 __masm-> push(c_rarg3);
639 __masm-> push(c_rarg2);
640 __masm-> push(c_rarg1);
641
642 __masm-> movl(rax, 0x7f800000);
643 __masm-> xorl(c_rarg3, c_rarg3);
644 __masm-> movl(c_rarg2, inout);
645 __masm-> movl(c_rarg1, c_rarg2);
646 __masm-> andl(c_rarg1, 0x7fffffff);
647 __masm-> cmpl(rax, c_rarg1); // NaN? -> 0
648 __masm-> jcc(Assembler::negative, L);
649 __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
650 __masm-> movl(c_rarg3, 0x80000000);
651 __masm-> movl(rax, 0x7fffffff);
652 __masm-> cmovl(Assembler::positive, c_rarg3, rax);
653
654 __masm-> bind(L);
655 __masm-> movptr(inout, c_rarg3);
656
657 __masm-> pop(c_rarg1);
658 __masm-> pop(c_rarg2);
659 __masm-> pop(c_rarg3);
660 __masm-> pop(rax);
661
662 __masm-> ret(0);
663
664 return start;
665 }
666
667 address generate_f2l_fixup() {
668 StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
669 Address inout(rsp, 5 * wordSize); // return address + 4 saves
670 address start = __masm-> pc();
671
672 Label L;
673
674 __masm-> push(rax);
675 __masm-> push(c_rarg3);
676 __masm-> push(c_rarg2);
677 __masm-> push(c_rarg1);
678
679 __masm-> movl(rax, 0x7f800000);
680 __masm-> xorl(c_rarg3, c_rarg3);
681 __masm-> movl(c_rarg2, inout);
682 __masm-> movl(c_rarg1, c_rarg2);
683 __masm-> andl(c_rarg1, 0x7fffffff);
684 __masm-> cmpl(rax, c_rarg1); // NaN? -> 0
685 __masm-> jcc(Assembler::negative, L);
686 __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
687 __masm-> mov64(c_rarg3, 0x8000000000000000);
688 __masm-> mov64(rax, 0x7fffffffffffffff);
689 __masm-> cmov(Assembler::positive, c_rarg3, rax);
690
691 __masm-> bind(L);
692 __masm-> movptr(inout, c_rarg3);
693
694 __masm-> pop(c_rarg1);
695 __masm-> pop(c_rarg2);
696 __masm-> pop(c_rarg3);
697 __masm-> pop(rax);
698
699 __masm-> ret(0);
700
701 return start;
702 }
703
704 address generate_d2i_fixup() {
705 StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
706 Address inout(rsp, 6 * wordSize); // return address + 5 saves
707
708 address start = __masm-> pc();
709
710 Label L;
711
712 __masm-> push(rax);
713 __masm-> push(c_rarg3);
714 __masm-> push(c_rarg2);
715 __masm-> push(c_rarg1);
716 __masm-> push(c_rarg0);
717
718 __masm-> movl(rax, 0x7ff00000);
719 __masm-> movq(c_rarg2, inout);
720 __masm-> movl(c_rarg3, c_rarg2);
721 __masm-> mov(c_rarg1, c_rarg2);
722 __masm-> mov(c_rarg0, c_rarg2);
723 __masm-> negl(c_rarg3);
724 __masm-> shrptr(c_rarg1, 0x20);
725 __masm-> orl(c_rarg3, c_rarg2);
726 __masm-> andl(c_rarg1, 0x7fffffff);
727 __masm-> xorl(c_rarg2, c_rarg2);
728 __masm-> shrl(c_rarg3, 0x1f);
729 __masm-> orl(c_rarg1, c_rarg3);
730 __masm-> cmpl(rax, c_rarg1);
731 __masm-> jcc(Assembler::negative, L); // NaN -> 0
732 __masm-> testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
733 __masm-> movl(c_rarg2, 0x80000000);
734 __masm-> movl(rax, 0x7fffffff);
735 __masm-> cmov(Assembler::positive, c_rarg2, rax);
736
737 __masm-> bind(L);
738 __masm-> movptr(inout, c_rarg2);
739
740 __masm-> pop(c_rarg0);
741 __masm-> pop(c_rarg1);
742 __masm-> pop(c_rarg2);
743 __masm-> pop(c_rarg3);
744 __masm-> pop(rax);
745
746 __masm-> ret(0);
747
748 return start;
749 }
750
751 address generate_d2l_fixup() {
752 StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
753 Address inout(rsp, 6 * wordSize); // return address + 5 saves
754
755 address start = __masm-> pc();
756
757 Label L;
758
759 __masm-> push(rax);
760 __masm-> push(c_rarg3);
761 __masm-> push(c_rarg2);
762 __masm-> push(c_rarg1);
763 __masm-> push(c_rarg0);
764
765 __masm-> movl(rax, 0x7ff00000);
766 __masm-> movq(c_rarg2, inout);
767 __masm-> movl(c_rarg3, c_rarg2);
768 __masm-> mov(c_rarg1, c_rarg2);
769 __masm-> mov(c_rarg0, c_rarg2);
770 __masm-> negl(c_rarg3);
771 __masm-> shrptr(c_rarg1, 0x20);
772 __masm-> orl(c_rarg3, c_rarg2);
773 __masm-> andl(c_rarg1, 0x7fffffff);
774 __masm-> xorl(c_rarg2, c_rarg2);
775 __masm-> shrl(c_rarg3, 0x1f);
776 __masm-> orl(c_rarg1, c_rarg3);
777 __masm-> cmpl(rax, c_rarg1);
778 __masm-> jcc(Assembler::negative, L); // NaN -> 0
779 __masm-> testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
780 __masm-> mov64(c_rarg2, 0x8000000000000000);
781 __masm-> mov64(rax, 0x7fffffffffffffff);
782 __masm-> cmovq(Assembler::positive, c_rarg2, rax);
783
784 __masm-> bind(L);
785 __masm-> movq(inout, c_rarg2);
786
787 __masm-> pop(c_rarg0);
788 __masm-> pop(c_rarg1);
789 __masm-> pop(c_rarg2);
790 __masm-> pop(c_rarg3);
791 __masm-> pop(rax);
792
793 __masm-> ret(0);
794
795 return start;
796 }
797
798 address generate_iota_indices(const char *stub_name) {
799 __masm-> align(CodeEntryAlignment);
800 StubCodeMark mark(this, "StubRoutines", stub_name);
801 address start = __masm-> pc();
802 __masm-> emit_data64(0x0706050403020100, relocInfo::none);
803 __masm-> emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
804 __masm-> emit_data64(0x1716151413121110, relocInfo::none);
805 __masm-> emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
806 __masm-> emit_data64(0x2726252423222120, relocInfo::none);
807 __masm-> emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
808 __masm-> emit_data64(0x3736353433323130, relocInfo::none);
809 __masm-> emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
810 return start;
811 }
812
813 address generate_vector_byte_shuffle_mask(const char *stub_name) {
814 __masm-> align(CodeEntryAlignment);
815 StubCodeMark mark(this, "StubRoutines", stub_name);
816 address start = __masm-> pc();
817 __masm-> emit_data64(0x7070707070707070, relocInfo::none);
818 __masm-> emit_data64(0x7070707070707070, relocInfo::none);
819 __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
820 __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
821 return start;
822 }
823
824 address generate_fp_mask(const char *stub_name, int64_t mask) {
825 __masm-> align(CodeEntryAlignment);
826 StubCodeMark mark(this, "StubRoutines", stub_name);
827 address start = __masm-> pc();
828
829 __masm-> emit_data64( mask, relocInfo::none );
830 __masm-> emit_data64( mask, relocInfo::none );
831
832 return start;
833 }
834
835 address generate_vector_mask(const char *stub_name, int64_t mask) {
836 __masm-> align(CodeEntryAlignment);
837 StubCodeMark mark(this, "StubRoutines", stub_name);
838 address start = __masm-> pc();
839
840 __masm-> emit_data64(mask, relocInfo::none);
841 __masm-> emit_data64(mask, relocInfo::none);
842 __masm-> emit_data64(mask, relocInfo::none);
843 __masm-> emit_data64(mask, relocInfo::none);
844 __masm-> emit_data64(mask, relocInfo::none);
845 __masm-> emit_data64(mask, relocInfo::none);
846 __masm-> emit_data64(mask, relocInfo::none);
847 __masm-> emit_data64(mask, relocInfo::none);
848
849 return start;
850 }
851
852 address generate_vector_byte_perm_mask(const char *stub_name) {
853 __masm-> align(CodeEntryAlignment);
854 StubCodeMark mark(this, "StubRoutines", stub_name);
855 address start = __masm-> pc();
856
857 __masm-> emit_data64(0x0000000000000001, relocInfo::none);
858 __masm-> emit_data64(0x0000000000000003, relocInfo::none);
859 __masm-> emit_data64(0x0000000000000005, relocInfo::none);
860 __masm-> emit_data64(0x0000000000000007, relocInfo::none);
861 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
862 __masm-> emit_data64(0x0000000000000002, relocInfo::none);
863 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
864 __masm-> emit_data64(0x0000000000000006, relocInfo::none);
865
866 return start;
867 }
868
869 address generate_vector_fp_mask(const char *stub_name, int64_t mask) {
870 __masm-> align(CodeEntryAlignment);
871 StubCodeMark mark(this, "StubRoutines", stub_name);
872 address start = __masm-> pc();
873
874 __masm-> emit_data64(mask, relocInfo::none);
875 __masm-> emit_data64(mask, relocInfo::none);
876 __masm-> emit_data64(mask, relocInfo::none);
877 __masm-> emit_data64(mask, relocInfo::none);
878 __masm-> emit_data64(mask, relocInfo::none);
879 __masm-> emit_data64(mask, relocInfo::none);
880 __masm-> emit_data64(mask, relocInfo::none);
881 __masm-> emit_data64(mask, relocInfo::none);
882
883 return start;
884 }
885
886 address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
887 int32_t val0, int32_t val1, int32_t val2, int32_t val3,
888 int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
889 int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
890 int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
891 __masm-> align(CodeEntryAlignment);
892 StubCodeMark mark(this, "StubRoutines", stub_name);
893 address start = __masm-> pc();
894
895 assert(len != Assembler::AVX_NoVec, "vector len must be specified")do { if (!(len != Assembler::AVX_NoVec)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 895, "assert(" "len != Assembler::AVX_NoVec" ") failed", "vector len must be specified"
); ::breakpoint(); } } while (0)
;
896 __masm-> emit_data(val0, relocInfo::none, 0);
897 __masm-> emit_data(val1, relocInfo::none, 0);
898 __masm-> emit_data(val2, relocInfo::none, 0);
899 __masm-> emit_data(val3, relocInfo::none, 0);
900 if (len >= Assembler::AVX_256bit) {
901 __masm-> emit_data(val4, relocInfo::none, 0);
902 __masm-> emit_data(val5, relocInfo::none, 0);
903 __masm-> emit_data(val6, relocInfo::none, 0);
904 __masm-> emit_data(val7, relocInfo::none, 0);
905 if (len >= Assembler::AVX_512bit) {
906 __masm-> emit_data(val8, relocInfo::none, 0);
907 __masm-> emit_data(val9, relocInfo::none, 0);
908 __masm-> emit_data(val10, relocInfo::none, 0);
909 __masm-> emit_data(val11, relocInfo::none, 0);
910 __masm-> emit_data(val12, relocInfo::none, 0);
911 __masm-> emit_data(val13, relocInfo::none, 0);
912 __masm-> emit_data(val14, relocInfo::none, 0);
913 __masm-> emit_data(val15, relocInfo::none, 0);
914 }
915 }
916
917 return start;
918 }
919
920 // Non-destructive plausibility checks for oops
921 //
922 // Arguments:
923 // all args on stack!
924 //
925 // Stack after saving c_rarg3:
926 // [tos + 0]: saved c_rarg3
927 // [tos + 1]: saved c_rarg2
928 // [tos + 2]: saved r12 (several TemplateTable methods use it)
929 // [tos + 3]: saved flags
930 // [tos + 4]: return address
931 // * [tos + 5]: error message (char*)
932 // * [tos + 6]: object to verify (oop)
933 // * [tos + 7]: saved rax - saved by caller and bashed
934 // * [tos + 8]: saved r10 (rscratch1) - saved by caller
935 // * = popped on exit
936 address generate_verify_oop() {
937 StubCodeMark mark(this, "StubRoutines", "verify_oop");
938 address start = __masm-> pc();
939
940 Label exit, error;
941
942 __masm-> pushf();
943 __masm-> incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
944
945 __masm-> push(r12);
946
947 // save c_rarg2 and c_rarg3
948 __masm-> push(c_rarg2);
949 __masm-> push(c_rarg3);
950
951 enum {
952 // After previous pushes.
953 oop_to_verify = 6 * wordSize,
954 saved_rax = 7 * wordSize,
955 saved_r10 = 8 * wordSize,
956
957 // Before the call to MacroAssembler::debug(), see below.
958 return_addr = 16 * wordSize,
959 error_msg = 17 * wordSize
960 };
961
962 // get object
963 __masm-> movptr(rax, Address(rsp, oop_to_verify));
964
965 // make sure object is 'reasonable'
966 __masm-> testptr(rax, rax);
967 __masm-> jcc(Assembler::zero, exit); // if obj is NULL it is OK
968
969#if INCLUDE_ZGC1
970 if (UseZGC) {
971 // Check if metadata bits indicate a bad oop
972 __masm-> testptr(rax, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
973 __masm-> jcc(Assembler::notZero, error);
974 }
975#endif
976
977 // Check if the oop is in the right area of memory
978 __masm-> movptr(c_rarg2, rax);
979 __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask());
980 __masm-> andptr(c_rarg2, c_rarg3);
981 __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits());
982 __masm-> cmpptr(c_rarg2, c_rarg3);
983 __masm-> jcc(Assembler::notZero, error);
984
985 // make sure klass is 'reasonable', which is not zero.
986 __masm-> load_klass(rax, rax, rscratch1); // get klass
987 __masm-> testptr(rax, rax);
988 __masm-> jcc(Assembler::zero, error); // if klass is NULL it is broken
989
990 // return if everything seems ok
991 __masm-> bind(exit);
992 __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back
993 __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
994 __masm-> pop(c_rarg3); // restore c_rarg3
995 __masm-> pop(c_rarg2); // restore c_rarg2
996 __masm-> pop(r12); // restore r12
997 __masm-> popf(); // restore flags
998 __masm-> ret(4 * wordSize); // pop caller saved stuff
999
1000 // handle errors
1001 __masm-> bind(error);
1002 __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back
1003 __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
1004 __masm-> pop(c_rarg3); // get saved c_rarg3 back
1005 __masm-> pop(c_rarg2); // get saved c_rarg2 back
1006 __masm-> pop(r12); // get saved r12 back
1007 __masm-> popf(); // get saved flags off stack --
1008 // will be ignored
1009
1010 __masm-> pusha(); // push registers
1011 // (rip is already
1012 // already pushed)
1013 // debug(char* msg, int64_t pc, int64_t regs[])
1014 // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and
1015 // pushed all the registers, so now the stack looks like:
1016 // [tos + 0] 16 saved registers
1017 // [tos + 16] return address
1018 // * [tos + 17] error message (char*)
1019 // * [tos + 18] object to verify (oop)
1020 // * [tos + 19] saved rax - saved by caller and bashed
1021 // * [tos + 20] saved r10 (rscratch1) - saved by caller
1022 // * = popped on exit
1023
1024 __masm-> movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message
1025 __masm-> movptr(c_rarg1, Address(rsp, return_addr)); // pass return address
1026 __masm-> movq(c_rarg2, rsp); // pass address of regs on stack
1027 __masm-> mov(r12, rsp); // remember rsp
1028 __masm-> subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1029 __masm-> andptr(rsp, -16); // align stack as required by ABI
1030 BLOCK_COMMENT("call MacroAssembler::debug")masm-> block_comment("call MacroAssembler::debug");
1031 __masm-> call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64)))));
1032 __masm-> hlt();
1033 return start;
1034 }
1035
1036 //
1037 // Verify that a register contains clean 32-bits positive value
1038 // (high 32-bits are 0) so it could be used in 64-bits shifts.
1039 //
1040 // Input:
1041 // Rint - 32-bits value
1042 // Rtmp - scratch
1043 //
1044 void assert_clean_int(Register Rint, Register Rtmp) {
1045#ifdef ASSERT1
1046 Label L;
1047 assert_different_registers(Rtmp, Rint);
1048 __masm-> movslq(Rtmp, Rint);
1049 __masm-> cmpq(Rtmp, Rint);
1050 __masm-> jcc(Assembler::equal, L);
1051 __masm-> stop("high 32-bits of int value are not 0");
1052 __masm-> bind(L);
1053#endif
1054 }
1055
1056 // Generate overlap test for array copy stubs
1057 //
1058 // Input:
1059 // c_rarg0 - from
1060 // c_rarg1 - to
1061 // c_rarg2 - element count
1062 //
1063 // Output:
1064 // rax - &from[element count - 1]
1065 //
1066 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
1067 assert(no_overlap_target != NULL, "must be generated")do { if (!(no_overlap_target != __null)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1067, "assert(" "no_overlap_target != __null" ") failed", "must be generated"
); ::breakpoint(); } } while (0)
;
1068 array_overlap_test(no_overlap_target, NULL__null, sf);
1069 }
1070 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
1071 array_overlap_test(NULL__null, &L_no_overlap, sf);
1072 }
1073 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
1074 const Register from = c_rarg0;
1075 const Register to = c_rarg1;
1076 const Register count = c_rarg2;
1077 const Register end_from = rax;
1078
1079 __masm-> cmpptr(to, from);
1080 __masm-> lea(end_from, Address(from, count, sf, 0));
1081 if (NOLp == NULL__null) {
1082 ExternalAddress no_overlap(no_overlap_target);
1083 __masm-> jump_cc(Assembler::belowEqual, no_overlap);
1084 __masm-> cmpptr(to, end_from);
1085 __masm-> jump_cc(Assembler::aboveEqual, no_overlap);
1086 } else {
1087 __masm-> jcc(Assembler::belowEqual, (*NOLp));
1088 __masm-> cmpptr(to, end_from);
1089 __masm-> jcc(Assembler::aboveEqual, (*NOLp));
1090 }
1091 }
1092
1093 // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1094 //
1095 // Outputs:
1096 // rdi - rcx
1097 // rsi - rdx
1098 // rdx - r8
1099 // rcx - r9
1100 //
1101 // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1102 // are non-volatile. r9 and r10 should not be used by the caller.
1103 //
1104 DEBUG_ONLY(bool regs_in_thread;)bool regs_in_thread;
1105
1106 void setup_arg_regs(int nargs = 3) {
1107 const Register saved_rdi = r9;
1108 const Register saved_rsi = r10;
1109 assert(nargs == 3 || nargs == 4, "else fix")do { if (!(nargs == 3 || nargs == 4)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1109, "assert(" "nargs == 3 || nargs == 4" ") failed", "else fix"
); ::breakpoint(); } } while (0)
;
1110#ifdef _WIN64
1111 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1112 "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1113 if (nargs >= 4)
1114 __masm-> mov(rax, r9); // r9 is also saved_rdi
1115 __masm-> movptr(saved_rdi, rdi);
1116 __masm-> movptr(saved_rsi, rsi);
1117 __masm-> mov(rdi, rcx); // c_rarg0
1118 __masm-> mov(rsi, rdx); // c_rarg1
1119 __masm-> mov(rdx, r8); // c_rarg2
1120 if (nargs >= 4)
1121 __masm-> mov(rcx, rax); // c_rarg3 (via rax)
1122#else
1123 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1124 "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1125#endif
1126 DEBUG_ONLY(regs_in_thread = false;)regs_in_thread = false;
1127 }
1128
1129 void restore_arg_regs() {
1130 assert(!regs_in_thread, "wrong call to restore_arg_regs")do { if (!(!regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1130, "assert(" "!regs_in_thread" ") failed", "wrong call to restore_arg_regs"
); ::breakpoint(); } } while (0)
;
1131 const Register saved_rdi = r9;
1132 const Register saved_rsi = r10;
1133#ifdef _WIN64
1134 __masm-> movptr(rdi, saved_rdi);
1135 __masm-> movptr(rsi, saved_rsi);
1136#endif
1137 }
1138
1139 // This is used in places where r10 is a scratch register, and can
1140 // be adapted if r9 is needed also.
1141 void setup_arg_regs_using_thread() {
1142 const Register saved_r15 = r9;
1143#ifdef _WIN64
1144 __masm-> mov(saved_r15, r15); // r15 is callee saved and needs to be restored
1145 __masm-> get_thread(r15_thread);
1146 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1147 "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1148 __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())), rdi);
1149 __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())), rsi);
1150
1151 __masm-> mov(rdi, rcx); // c_rarg0
1152 __masm-> mov(rsi, rdx); // c_rarg1
1153 __masm-> mov(rdx, r8); // c_rarg2
1154#else
1155 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1156 "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1157#endif
1158 DEBUG_ONLY(regs_in_thread = true;)regs_in_thread = true;
1159 }
1160
1161 void restore_arg_regs_using_thread() {
1162 assert(regs_in_thread, "wrong call to restore_arg_regs")do { if (!(regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1162, "assert(" "regs_in_thread" ") failed", "wrong call to restore_arg_regs"
); ::breakpoint(); } } while (0)
;
1163 const Register saved_r15 = r9;
1164#ifdef _WIN64
1165 __masm-> get_thread(r15_thread);
1166 __masm-> movptr(rsi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())));
1167 __masm-> movptr(rdi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())));
1168 __masm-> mov(r15, saved_r15); // r15 is callee saved and needs to be restored
1169#endif
1170 }
1171
1172 // Copy big chunks forward
1173 //
1174 // Inputs:
1175 // end_from - source arrays end address
1176 // end_to - destination array end address
1177 // qword_count - 64-bits element count, negative
1178 // to - scratch
1179 // L_copy_bytes - entry label
1180 // L_copy_8_bytes - exit label
1181 //
1182 void copy_bytes_forward(Register end_from, Register end_to,
1183 Register qword_count, Register to,
1184 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1185 DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here");
1186 Label L_loop;
1187 __masm-> align(OptoLoopAlignment);
1188 if (UseUnalignedLoadStores) {
1189 Label L_end;
1190 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1191 if (UseAVX >= 2) {
1192 __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1193 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1194 __masm-> vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1195 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1196 } else {
1197 __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1198 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1199 __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1200 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1201 __masm-> movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1202 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1203 __masm-> movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1204 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1205 }
1206
1207 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1208 __masm-> addptr(qword_count, 8);
1209 __masm-> jcc(Assembler::lessEqual, L_loop);
1210 __masm-> subptr(qword_count, 4); // sub(8) and add(4)
1211 __masm-> jccb(Assembler::greater, L_end)jccb_0(Assembler::greater, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1211)
;
1212 // Copy trailing 32 bytes
1213 if (UseAVX >= 2) {
1214 __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1215 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1216 } else {
1217 __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1218 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1219 __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1220 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1221 }
1222 __masm-> addptr(qword_count, 4);
1223 __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":");
1224 if (UseAVX >= 2) {
1225 // clean upper bits of YMM registers
1226 __masm-> vpxor(xmm0, xmm0);
1227 __masm-> vpxor(xmm1, xmm1);
1228 }
1229 } else {
1230 // Copy 32-bytes per iteration
1231 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1232 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -24));
1233 __masm-> movq(Address(end_to, qword_count, Address::times_8, -24), to);
1234 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -16));
1235 __masm-> movq(Address(end_to, qword_count, Address::times_8, -16), to);
1236 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1237 __masm-> movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1238 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1239 __masm-> movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1240
1241 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1242 __masm-> addptr(qword_count, 4);
1243 __masm-> jcc(Assembler::lessEqual, L_loop);
1244 }
1245 __masm-> subptr(qword_count, 4);
1246 __masm-> jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1247 }
1248
1249 // Copy big chunks backward
1250 //
1251 // Inputs:
1252 // from - source arrays address
1253 // dest - destination array address
1254 // qword_count - 64-bits element count
1255 // to - scratch
1256 // L_copy_bytes - entry label
1257 // L_copy_8_bytes - exit label
1258 //
1259 void copy_bytes_backward(Register from, Register dest,
1260 Register qword_count, Register to,
1261 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1262 DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here");
1263 Label L_loop;
1264 __masm-> align(OptoLoopAlignment);
1265 if (UseUnalignedLoadStores) {
1266 Label L_end;
1267 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1268 if (UseAVX >= 2) {
1269 __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1270 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1271 __masm-> vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1272 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1273 } else {
1274 __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1275 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1276 __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1277 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1278 __masm-> movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1279 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1280 __masm-> movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
1281 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
1282 }
1283
1284 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1285 __masm-> subptr(qword_count, 8);
1286 __masm-> jcc(Assembler::greaterEqual, L_loop);
1287
1288 __masm-> addptr(qword_count, 4); // add(8) and sub(4)
1289 __masm-> jccb(Assembler::less, L_end)jccb_0(Assembler::less, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1289)
;
1290 // Copy trailing 32 bytes
1291 if (UseAVX >= 2) {
1292 __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1293 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1294 } else {
1295 __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1296 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1297 __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1298 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1299 }
1300 __masm-> subptr(qword_count, 4);
1301 __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":");
1302 if (UseAVX >= 2) {
1303 // clean upper bits of YMM registers
1304 __masm-> vpxor(xmm0, xmm0);
1305 __masm-> vpxor(xmm1, xmm1);
1306 }
1307 } else {
1308 // Copy 32-bytes per iteration
1309 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1310 __masm-> movq(to, Address(from, qword_count, Address::times_8, 24));
1311 __masm-> movq(Address(dest, qword_count, Address::times_8, 24), to);
1312 __masm-> movq(to, Address(from, qword_count, Address::times_8, 16));
1313 __masm-> movq(Address(dest, qword_count, Address::times_8, 16), to);
1314 __masm-> movq(to, Address(from, qword_count, Address::times_8, 8));
1315 __masm-> movq(Address(dest, qword_count, Address::times_8, 8), to);
1316 __masm-> movq(to, Address(from, qword_count, Address::times_8, 0));
1317 __masm-> movq(Address(dest, qword_count, Address::times_8, 0), to);
1318
1319 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1320 __masm-> subptr(qword_count, 4);
1321 __masm-> jcc(Assembler::greaterEqual, L_loop);
1322 }
1323 __masm-> addptr(qword_count, 4);
1324 __masm-> jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1325 }
1326
1327#ifndef PRODUCT
1328 int& get_profile_ctr(int shift) {
1329 if ( 0 == shift)
1330 return SharedRuntime::_jbyte_array_copy_ctr;
1331 else if(1 == shift)
1332 return SharedRuntime::_jshort_array_copy_ctr;
1333 else if(2 == shift)
1334 return SharedRuntime::_jint_array_copy_ctr;
1335 else
1336 return SharedRuntime::_jlong_array_copy_ctr;
1337 }
1338#endif
1339
1340 void setup_argument_regs(BasicType type) {
1341 if (type == T_BYTE || type == T_SHORT) {
1342 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1343 // r9 and r10 may be used to save non-volatile registers
1344 } else {
1345 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1346 // r9 is used to save r15_thread
1347 }
1348 }
1349
1350 void restore_argument_regs(BasicType type) {
1351 if (type == T_BYTE || type == T_SHORT) {
1352 restore_arg_regs();
1353 } else {
1354 restore_arg_regs_using_thread();
1355 }
1356 }
1357
1358#if COMPILER2_OR_JVMCI1
1359 // Note: Following rules apply to AVX3 optimized arraycopy stubs:-
1360 // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
1361 // for both special cases (various small block sizes) and aligned copy loop. This is the
1362 // default configuration.
1363 // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
1364 // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
1365 // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
1366 // better performance for disjoint copies. For conjoint/backward copy vector based
1367 // copy performs better.
1368 // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
1369 // 64 byte vector registers (ZMMs).
1370
1371 // Inputs:
1372 // c_rarg0 - source array address
1373 // c_rarg1 - destination array address
1374 // c_rarg2 - element count, treated as ssize_t, can be zero
1375 //
1376 //
1377 // Side Effects:
1378 // disjoint_copy_avx3_masked is set to the no-overlap entry point
1379 // used by generate_conjoint_[byte/int/short/long]_copy().
1380 //
1381
1382 address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift,
1383 bool aligned, bool is_oop, bool dest_uninitialized) {
1384 __masm-> align(CodeEntryAlignment);
1385 StubCodeMark mark(this, "StubRoutines", name);
1386 address start = __masm-> pc();
1387 int avx3threshold = VM_Version::avx3_threshold();
1388 bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
1389 Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
1390 Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
1391 const Register from = rdi; // source array address
1392 const Register to = rsi; // destination array address
1393 const Register count = rdx; // elements count
1394 const Register temp1 = r8;
1395 const Register temp2 = r11;
1396 const Register temp3 = rax;
1397 const Register temp4 = rcx;
1398 // End pointers are inclusive, and if count is not zero they point
1399 // to the last unit copied: end_to[0] := end_from[0]
1400
1401 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1402 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1403
1404 if (entry != NULL__null) {
1405 *entry = __masm-> pc();
1406 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1407 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1408 }
1409
1410 BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
1411 BasicType type = is_oop ? T_OBJECT : type_vec[shift];
1412
1413 setup_argument_regs(type);
1414
1415 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1416 if (dest_uninitialized) {
1417 decorators |= IS_DEST_UNINITIALIZED;
1418 }
1419 if (aligned) {
1420 decorators |= ARRAYCOPY_ALIGNED;
1421 }
1422 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1423 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1424
1425 {
1426 // Type(shift) byte(0), short(1), int(2), long(3)
1427 int loop_size[] = { 192, 96, 48, 24};
1428 int threshold[] = { 4096, 2048, 1024, 512};
1429
1430 // UnsafeCopyMemory page error: continue after ucm
1431 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
1432 // 'from', 'to' and 'count' are now valid
1433
1434 // temp1 holds remaining count and temp4 holds running count used to compute
1435 // next address offset for start of to/from addresses (temp4 * scale).
1436 __masm-> mov64(temp4, 0);
1437 __masm-> movq(temp1, count);
1438
1439 // Zero length check.
1440 __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":");
1441 __masm-> cmpq(temp1, 0);
1442 __masm-> jcc(Assembler::lessEqual, L_exit);
1443
1444 // Special cases using 32 byte [masked] vector copy operations.
1445 __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
1446 temp4, temp3, use64byteVector, L_entry, L_exit);
1447
1448 // PRE-MAIN-POST loop for aligned copy.
1449 __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":");
1450
1451 if (avx3threshold != 0) {
1452 __masm-> cmpq(count, threshold[shift]);
1453 if (MaxVectorSize == 64) {
1454 // Copy using 64 byte vectors.
1455 __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64);
1456 } else {
1457 assert(MaxVectorSize < 64, "vector size should be < 64 bytes")do { if (!(MaxVectorSize < 64)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1457, "assert(" "MaxVectorSize < 64" ") failed", "vector size should be < 64 bytes"
); ::breakpoint(); } } while (0)
;
1458 // REP MOVS offer a faster copy path.
1459 __masm-> jcc(Assembler::greaterEqual, L_repmovs);
1460 }
1461 }
1462
1463 if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
1464 // Partial copy to make dst address 32 byte aligned.
1465 __masm-> movq(temp2, to);
1466 __masm-> andq(temp2, 31);
1467 __masm-> jcc(Assembler::equal, L_main_pre_loop);
1468
1469 __masm-> negptr(temp2);
1470 __masm-> addq(temp2, 32);
1471 if (shift) {
1472 __masm-> shrq(temp2, shift);
1473 }
1474 __masm-> movq(temp3, temp2);
1475 __masm-> copy32_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift);
1476 __masm-> movq(temp4, temp2);
1477 __masm-> movq(temp1, count);
1478 __masm-> subq(temp1, temp2);
1479
1480 __masm-> cmpq(temp1, loop_size[shift]);
1481 __masm-> jcc(Assembler::less, L_tail);
1482
1483 __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop"
":")
;
1484 __masm-> subq(temp1, loop_size[shift]);
1485
1486 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1487 __masm-> align32();
1488 __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":");
1489 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0);
1490 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64);
1491 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128);
1492 __masm-> addptr(temp4, loop_size[shift]);
1493 __masm-> subq(temp1, loop_size[shift]);
1494 __masm-> jcc(Assembler::greater, L_main_loop);
1495
1496 __masm-> addq(temp1, loop_size[shift]);
1497
1498 // Tail loop.
1499 __masm-> jmp(L_tail);
1500
1501 __masm-> BIND(L_repmovs)bind(L_repmovs); masm-> block_comment("L_repmovs" ":");
1502 __masm-> movq(temp2, temp1);
1503 // Swap to(RSI) and from(RDI) addresses to comply with REP MOVs semantics.
1504 __masm-> movq(temp3, to);
1505 __masm-> movq(to, from);
1506 __masm-> movq(from, temp3);
1507 // Save to/from for restoration post rep_mov.
1508 __masm-> movq(temp1, to);
1509 __masm-> movq(temp3, from);
1510 if(shift < 3) {
1511 __masm-> shrq(temp2, 3-shift); // quad word count
1512 }
1513 __masm-> movq(temp4 , temp2); // move quad ward count into temp4(RCX).
1514 __masm-> rep_mov();
1515 __masm-> shlq(temp2, 3); // convert quad words into byte count.
1516 if(shift) {
1517 __masm-> shrq(temp2, shift); // type specific count.
1518 }
1519 // Restore original addresses in to/from.
1520 __masm-> movq(to, temp3);
1521 __masm-> movq(from, temp1);
1522 __masm-> movq(temp4, temp2);
1523 __masm-> movq(temp1, count);
1524 __masm-> subq(temp1, temp2); // tailing part (less than a quad ward size).
1525 __masm-> jmp(L_tail);
1526 }
1527
1528 if (MaxVectorSize > 32) {
1529 __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64"
":")
;
1530 // Partial copy to make dst address 64 byte aligned.
1531 __masm-> movq(temp2, to);
1532 __masm-> andq(temp2, 63);
1533 __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes);
1534
1535 __masm-> negptr(temp2);
1536 __masm-> addq(temp2, 64);
1537 if (shift) {
1538 __masm-> shrq(temp2, shift);
1539 }
1540 __masm-> movq(temp3, temp2);
1541 __masm-> copy64_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift, 0 , true);
1542 __masm-> movq(temp4, temp2);
1543 __masm-> movq(temp1, count);
1544 __masm-> subq(temp1, temp2);
1545
1546 __masm-> cmpq(temp1, loop_size[shift]);
1547 __masm-> jcc(Assembler::less, L_tail64);
1548
1549 __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes"
":")
;
1550 __masm-> subq(temp1, loop_size[shift]);
1551
1552 // Main loop with aligned copy block size of 192 bytes at
1553 // 64 byte copy granularity.
1554 __masm-> align32();
1555 __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes"
":")
;
1556 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0 , true);
1557 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64, true);
1558 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128, true);
1559 __masm-> addptr(temp4, loop_size[shift]);
1560 __masm-> subq(temp1, loop_size[shift]);
1561 __masm-> jcc(Assembler::greater, L_main_loop_64bytes);
1562
1563 __masm-> addq(temp1, loop_size[shift]);
1564 // Zero length check.
1565 __masm-> jcc(Assembler::lessEqual, L_exit);
1566
1567 __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":");
1568
1569 // Tail handling using 64 byte [masked] vector copy operations.
1570 use64byteVector = true;
1571 __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
1572 temp4, temp3, use64byteVector, L_entry, L_exit);
1573 }
1574 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1575 }
1576
1577 address ucme_exit_pc = __masm-> pc();
Value stored to 'ucme_exit_pc' during its initialization is never read
1578 // When called from generic_arraycopy r11 contains specific values
1579 // used during arraycopy epilogue, re-initializing r11.
1580 if (is_oop) {
1581 __masm-> movq(r11, shift == 3 ? count : to);
1582 }
1583 bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
1584 restore_argument_regs(type);
1585 inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)"
); inc_counter_np_(get_profile_ctr(shift));
; // Update counter after rscratch1 is free
1586 __masm-> xorptr(rax, rax); // return 0
1587 __masm-> vzeroupper();
1588 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1589 __masm-> ret(0);
1590 return start;
1591 }
1592
1593 // Inputs:
1594 // c_rarg0 - source array address
1595 // c_rarg1 - destination array address
1596 // c_rarg2 - element count, treated as ssize_t, can be zero
1597 //
1598 //
1599 address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift,
1600 address nooverlap_target, bool aligned, bool is_oop,
1601 bool dest_uninitialized) {
1602 __masm-> align(CodeEntryAlignment);
1603 StubCodeMark mark(this, "StubRoutines", name);
1604 address start = __masm-> pc();
1605
1606 int avx3threshold = VM_Version::avx3_threshold();
1607 bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
1608
1609 Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
1610 Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
1611 const Register from = rdi; // source array address
1612 const Register to = rsi; // destination array address
1613 const Register count = rdx; // elements count
1614 const Register temp1 = r8;
1615 const Register temp2 = rcx;
1616 const Register temp3 = r11;
1617 const Register temp4 = rax;
1618 // End pointers are inclusive, and if count is not zero they point
1619 // to the last unit copied: end_to[0] := end_from[0]
1620
1621 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1622 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1623
1624 if (entry != NULL__null) {
1625 *entry = __masm-> pc();
1626 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1627 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1628 }
1629
1630 array_overlap_test(nooverlap_target, (Address::ScaleFactor)(shift));
1631
1632 BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
1633 BasicType type = is_oop ? T_OBJECT : type_vec[shift];
1634
1635 setup_argument_regs(type);
1636
1637 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1638 if (dest_uninitialized) {
1639 decorators |= IS_DEST_UNINITIALIZED;
1640 }
1641 if (aligned) {
1642 decorators |= ARRAYCOPY_ALIGNED;
1643 }
1644 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1645 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1646 {
1647 // Type(shift) byte(0), short(1), int(2), long(3)
1648 int loop_size[] = { 192, 96, 48, 24};
1649 int threshold[] = { 4096, 2048, 1024, 512};
1650
1651 // UnsafeCopyMemory page error: continue after ucm
1652 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
1653 // 'from', 'to' and 'count' are now valid
1654
1655 // temp1 holds remaining count.
1656 __masm-> movq(temp1, count);
1657
1658 // Zero length check.
1659 __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":");
1660 __masm-> cmpq(temp1, 0);
1661 __masm-> jcc(Assembler::lessEqual, L_exit);
1662
1663 __masm-> mov64(temp2, 0);
1664 __masm-> movq(temp3, temp1);
1665 // Special cases using 32 byte [masked] vector copy operations.
1666 __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
1667 temp4, use64byteVector, L_entry, L_exit);
1668
1669 // PRE-MAIN-POST loop for aligned copy.
1670 __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":");
1671
1672 if ((MaxVectorSize > 32) && (avx3threshold != 0)) {
1673 __masm-> cmpq(temp1, threshold[shift]);
1674 __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64);
1675 }
1676
1677 if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
1678 // Partial copy to make dst address 32 byte aligned.
1679 __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
1680 __masm-> andq(temp2, 31);
1681 __masm-> jcc(Assembler::equal, L_main_pre_loop);
1682
1683 if (shift) {
1684 __masm-> shrq(temp2, shift);
1685 }
1686 __masm-> subq(temp1, temp2);
1687 __masm-> copy32_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift);
1688
1689 __masm-> cmpq(temp1, loop_size[shift]);
1690 __masm-> jcc(Assembler::less, L_tail);
1691
1692 __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop"
":")
;
1693
1694 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1695 __masm-> align32();
1696 __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":");
1697 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64);
1698 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128);
1699 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192);
1700 __masm-> subptr(temp1, loop_size[shift]);
1701 __masm-> cmpq(temp1, loop_size[shift]);
1702 __masm-> jcc(Assembler::greater, L_main_loop);
1703
1704 // Tail loop.
1705 __masm-> jmp(L_tail);
1706 }
1707
1708 if (MaxVectorSize > 32) {
1709 __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64"
":")
;
1710 // Partial copy to make dst address 64 byte aligned.
1711 __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
1712 __masm-> andq(temp2, 63);
1713 __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes);
1714
1715 if (shift) {
1716 __masm-> shrq(temp2, shift);
1717 }
1718 __masm-> subq(temp1, temp2);
1719 __masm-> copy64_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift, 0 , true);
1720
1721 __masm-> cmpq(temp1, loop_size[shift]);
1722 __masm-> jcc(Assembler::less, L_tail64);
1723
1724 __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes"
":")
;
1725
1726 // Main loop with aligned copy block size of 192 bytes at
1727 // 64 byte copy granularity.
1728 __masm-> align32();
1729 __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes"
":")
;
1730 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64 , true);
1731 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128, true);
1732 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192, true);
1733 __masm-> subq(temp1, loop_size[shift]);
1734 __masm-> cmpq(temp1, loop_size[shift]);
1735 __masm-> jcc(Assembler::greater, L_main_loop_64bytes);
1736
1737 // Zero length check.
1738 __masm-> cmpq(temp1, 0);
1739 __masm-> jcc(Assembler::lessEqual, L_exit);
1740
1741 __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":");
1742
1743 // Tail handling using 64 byte [masked] vector copy operations.
1744 use64byteVector = true;
1745 __masm-> mov64(temp2, 0);
1746 __masm-> movq(temp3, temp1);
1747 __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
1748 temp4, use64byteVector, L_entry, L_exit);
1749 }
1750 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1751 }
1752 address ucme_exit_pc = __masm-> pc();
1753 // When called from generic_arraycopy r11 contains specific values
1754 // used during arraycopy epilogue, re-initializing r11.
1755 if(is_oop) {
1756 __masm-> movq(r11, count);
1757 }
1758 bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
1759 restore_argument_regs(type);
1760 inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)"
); inc_counter_np_(get_profile_ctr(shift));
; // Update counter after rscratch1 is free
1761 __masm-> xorptr(rax, rax); // return 0
1762 __masm-> vzeroupper();
1763 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1764 __masm-> ret(0);
1765 return start;
1766 }
1767#endif // COMPILER2_OR_JVMCI
1768
1769
1770 // Arguments:
1771 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1772 // ignored
1773 // name - stub name string
1774 //
1775 // Inputs:
1776 // c_rarg0 - source array address
1777 // c_rarg1 - destination array address
1778 // c_rarg2 - element count, treated as ssize_t, can be zero
1779 //
1780 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
1781 // we let the hardware handle it. The one to eight bytes within words,
1782 // dwords or qwords that span cache line boundaries will still be loaded
1783 // and stored atomically.
1784 //
1785 // Side Effects:
1786 // disjoint_byte_copy_entry is set to the no-overlap entry point
1787 // used by generate_conjoint_byte_copy().
1788 //
1789 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
1790#if COMPILER2_OR_JVMCI1
1791 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
1792 return generate_disjoint_copy_avx3_masked(entry, "jbyte_disjoint_arraycopy_avx3", 0,
1793 aligned, false, false);
1794 }
1795#endif
1796 __masm-> align(CodeEntryAlignment);
1797 StubCodeMark mark(this, "StubRoutines", name);
1798 address start = __masm-> pc();
1799
1800 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
1801 Label L_copy_byte, L_exit;
1802 const Register from = rdi; // source array address
1803 const Register to = rsi; // destination array address
1804 const Register count = rdx; // elements count
1805 const Register byte_count = rcx;
1806 const Register qword_count = count;
1807 const Register end_from = from; // source array end address
1808 const Register end_to = to; // destination array end address
1809 // End pointers are inclusive, and if count is not zero they point
1810 // to the last unit copied: end_to[0] := end_from[0]
1811
1812 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1813 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1814
1815 if (entry != NULL__null) {
1816 *entry = __masm-> pc();
1817 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1818 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1819 }
1820
1821 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1822 // r9 and r10 may be used to save non-volatile registers
1823
1824 {
1825 // UnsafeCopyMemory page error: continue after ucm
1826 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1827 // 'from', 'to' and 'count' are now valid
1828 __masm-> movptr(byte_count, count);
1829 __masm-> shrptr(count, 3); // count => qword_count
1830
1831 // Copy from low to high addresses. Use 'to' as scratch.
1832 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
1833 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
1834 __masm-> negptr(qword_count); // make the count negative
1835 __masm-> jmp(L_copy_bytes);
1836
1837 // Copy trailing qwords
1838 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
1839 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1840 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1841 __masm-> increment(qword_count);
1842 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
1843
1844 // Check for and copy trailing dword
1845 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
1846 __masm-> testl(byte_count, 4);
1847 __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1847)
;
1848 __masm-> movl(rax, Address(end_from, 8));
1849 __masm-> movl(Address(end_to, 8), rax);
1850
1851 __masm-> addptr(end_from, 4);
1852 __masm-> addptr(end_to, 4);
1853
1854 // Check for and copy trailing word
1855 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
1856 __masm-> testl(byte_count, 2);
1857 __masm-> jccb(Assembler::zero, L_copy_byte)jccb_0(Assembler::zero, L_copy_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1857)
;
1858 __masm-> movw(rax, Address(end_from, 8));
1859 __masm-> movw(Address(end_to, 8), rax);
1860
1861 __masm-> addptr(end_from, 2);
1862 __masm-> addptr(end_to, 2);
1863
1864 // Check for and copy trailing byte
1865 __masm-> BIND(L_copy_byte)bind(L_copy_byte); masm-> block_comment("L_copy_byte" ":");
1866 __masm-> testl(byte_count, 1);
1867 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1867)
;
1868 __masm-> movb(rax, Address(end_from, 8));
1869 __masm-> movb(Address(end_to, 8), rax);
1870 }
1871 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1872 address ucme_exit_pc = __masm-> pc();
1873 restore_arg_regs();
1874 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1875 __masm-> xorptr(rax, rax); // return 0
1876 __masm-> vzeroupper();
1877 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1878 __masm-> ret(0);
1879
1880 {
1881 UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
1882 // Copy in multi-bytes chunks
1883 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1884 __masm-> jmp(L_copy_4_bytes);
1885 }
1886 return start;
1887 }
1888
1889 // Arguments:
1890 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1891 // ignored
1892 // name - stub name string
1893 //
1894 // Inputs:
1895 // c_rarg0 - source array address
1896 // c_rarg1 - destination array address
1897 // c_rarg2 - element count, treated as ssize_t, can be zero
1898 //
1899 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
1900 // we let the hardware handle it. The one to eight bytes within words,
1901 // dwords or qwords that span cache line boundaries will still be loaded
1902 // and stored atomically.
1903 //
1904 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
1905 address* entry, const char *name) {
1906#if COMPILER2_OR_JVMCI1
1907 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
1908 return generate_conjoint_copy_avx3_masked(entry, "jbyte_conjoint_arraycopy_avx3", 0,
1909 nooverlap_target, aligned, false, false);
1910 }
1911#endif
1912 __masm-> align(CodeEntryAlignment);
1913 StubCodeMark mark(this, "StubRoutines", name);
1914 address start = __masm-> pc();
1915
1916 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
1917 const Register from = rdi; // source array address
1918 const Register to = rsi; // destination array address
1919 const Register count = rdx; // elements count
1920 const Register byte_count = rcx;
1921 const Register qword_count = count;
1922
1923 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1924 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1925
1926 if (entry != NULL__null) {
1927 *entry = __masm-> pc();
1928 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1929 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1930 }
1931
1932 array_overlap_test(nooverlap_target, Address::times_1);
1933 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1934 // r9 and r10 may be used to save non-volatile registers
1935
1936 {
1937 // UnsafeCopyMemory page error: continue after ucm
1938 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1939 // 'from', 'to' and 'count' are now valid
1940 __masm-> movptr(byte_count, count);
1941 __masm-> shrptr(count, 3); // count => qword_count
1942
1943 // Copy from high to low addresses.
1944
1945 // Check for and copy trailing byte
1946 __masm-> testl(byte_count, 1);
1947 __masm-> jcc(Assembler::zero, L_copy_2_bytes);
1948 __masm-> movb(rax, Address(from, byte_count, Address::times_1, -1));
1949 __masm-> movb(Address(to, byte_count, Address::times_1, -1), rax);
1950 __masm-> decrement(byte_count); // Adjust for possible trailing word
1951
1952 // Check for and copy trailing word
1953 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
1954 __masm-> testl(byte_count, 2);
1955 __masm-> jcc(Assembler::zero, L_copy_4_bytes);
1956 __masm-> movw(rax, Address(from, byte_count, Address::times_1, -2));
1957 __masm-> movw(Address(to, byte_count, Address::times_1, -2), rax);
1958
1959 // Check for and copy trailing dword
1960 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
1961 __masm-> testl(byte_count, 4);
1962 __masm-> jcc(Assembler::zero, L_copy_bytes);
1963 __masm-> movl(rax, Address(from, qword_count, Address::times_8));
1964 __masm-> movl(Address(to, qword_count, Address::times_8), rax);
1965 __masm-> jmp(L_copy_bytes);
1966
1967 // Copy trailing qwords
1968 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
1969 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
1970 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
1971 __masm-> decrement(qword_count);
1972 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
1973 }
1974 restore_arg_regs();
1975 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1976 __masm-> xorptr(rax, rax); // return 0
1977 __masm-> vzeroupper();
1978 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1979 __masm-> ret(0);
1980
1981 {
1982 // UnsafeCopyMemory page error: continue after ucm
1983 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1984 // Copy in multi-bytes chunks
1985 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1986 }
1987 restore_arg_regs();
1988 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1989 __masm-> xorptr(rax, rax); // return 0
1990 __masm-> vzeroupper();
1991 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1992 __masm-> ret(0);
1993
1994 return start;
1995 }
1996
1997 // Arguments:
1998 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1999 // ignored
2000 // name - stub name string
2001 //
2002 // Inputs:
2003 // c_rarg0 - source array address
2004 // c_rarg1 - destination array address
2005 // c_rarg2 - element count, treated as ssize_t, can be zero
2006 //
2007 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
2008 // let the hardware handle it. The two or four words within dwords
2009 // or qwords that span cache line boundaries will still be loaded
2010 // and stored atomically.
2011 //
2012 // Side Effects:
2013 // disjoint_short_copy_entry is set to the no-overlap entry point
2014 // used by generate_conjoint_short_copy().
2015 //
2016 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
2017#if COMPILER2_OR_JVMCI1
2018 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2019 return generate_disjoint_copy_avx3_masked(entry, "jshort_disjoint_arraycopy_avx3", 1,
2020 aligned, false, false);
2021 }
2022#endif
2023
2024 __masm-> align(CodeEntryAlignment);
2025 StubCodeMark mark(this, "StubRoutines", name);
2026 address start = __masm-> pc();
2027
2028 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
2029 const Register from = rdi; // source array address
2030 const Register to = rsi; // destination array address
2031 const Register count = rdx; // elements count
2032 const Register word_count = rcx;
2033 const Register qword_count = count;
2034 const Register end_from = from; // source array end address
2035 const Register end_to = to; // destination array end address
2036 // End pointers are inclusive, and if count is not zero they point
2037 // to the last unit copied: end_to[0] := end_from[0]
2038
2039 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2040 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2041
2042 if (entry != NULL__null) {
2043 *entry = __masm-> pc();
2044 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2045 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2046 }
2047
2048 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2049 // r9 and r10 may be used to save non-volatile registers
2050
2051 {
2052 // UnsafeCopyMemory page error: continue after ucm
2053 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2054 // 'from', 'to' and 'count' are now valid
2055 __masm-> movptr(word_count, count);
2056 __masm-> shrptr(count, 2); // count => qword_count
2057
2058 // Copy from low to high addresses. Use 'to' as scratch.
2059 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2060 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2061 __masm-> negptr(qword_count);
2062 __masm-> jmp(L_copy_bytes);
2063
2064 // Copy trailing qwords
2065 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2066 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2067 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2068 __masm-> increment(qword_count);
2069 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2070
2071 // Original 'dest' is trashed, so we can't use it as a
2072 // base register for a possible trailing word copy
2073
2074 // Check for and copy trailing dword
2075 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2076 __masm-> testl(word_count, 2);
2077 __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2077)
;
2078 __masm-> movl(rax, Address(end_from, 8));
2079 __masm-> movl(Address(end_to, 8), rax);
2080
2081 __masm-> addptr(end_from, 4);
2082 __masm-> addptr(end_to, 4);
2083
2084 // Check for and copy trailing word
2085 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
2086 __masm-> testl(word_count, 1);
2087 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2087)
;
2088 __masm-> movw(rax, Address(end_from, 8));
2089 __masm-> movw(Address(end_to, 8), rax);
2090 }
2091 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2092 address ucme_exit_pc = __masm-> pc();
2093 restore_arg_regs();
2094 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2095 __masm-> xorptr(rax, rax); // return 0
2096 __masm-> vzeroupper();
2097 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2098 __masm-> ret(0);
2099
2100 {
2101 UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
2102 // Copy in multi-bytes chunks
2103 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2104 __masm-> jmp(L_copy_4_bytes);
2105 }
2106
2107 return start;
2108 }
2109
2110 address generate_fill(BasicType t, bool aligned, const char *name) {
2111 __masm-> align(CodeEntryAlignment);
2112 StubCodeMark mark(this, "StubRoutines", name);
2113 address start = __masm-> pc();
2114
2115 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2116
2117 const Register to = c_rarg0; // destination array address
2118 const Register value = c_rarg1; // value
2119 const Register count = c_rarg2; // elements count
2120 __masm-> mov(r11, count);
2121
2122 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2123
2124 __masm-> generate_fill(t, aligned, to, value, r11, rax, xmm0);
2125
2126 __masm-> vzeroupper();
2127 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2128 __masm-> ret(0);
2129 return start;
2130 }
2131
2132 // Arguments:
2133 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2134 // ignored
2135 // name - stub name string
2136 //
2137 // Inputs:
2138 // c_rarg0 - source array address
2139 // c_rarg1 - destination array address
2140 // c_rarg2 - element count, treated as ssize_t, can be zero
2141 //
2142 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
2143 // let the hardware handle it. The two or four words within dwords
2144 // or qwords that span cache line boundaries will still be loaded
2145 // and stored atomically.
2146 //
2147 address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
2148 address *entry, const char *name) {
2149#if COMPILER2_OR_JVMCI1
2150 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2151 return generate_conjoint_copy_avx3_masked(entry, "jshort_conjoint_arraycopy_avx3", 1,
2152 nooverlap_target, aligned, false, false);
2153 }
2154#endif
2155 __masm-> align(CodeEntryAlignment);
2156 StubCodeMark mark(this, "StubRoutines", name);
2157 address start = __masm-> pc();
2158
2159 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
2160 const Register from = rdi; // source array address
2161 const Register to = rsi; // destination array address
2162 const Register count = rdx; // elements count
2163 const Register word_count = rcx;
2164 const Register qword_count = count;
2165
2166 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2167 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2168
2169 if (entry != NULL__null) {
2170 *entry = __masm-> pc();
2171 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2172 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2173 }
2174
2175 array_overlap_test(nooverlap_target, Address::times_2);
2176 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2177 // r9 and r10 may be used to save non-volatile registers
2178
2179 {
2180 // UnsafeCopyMemory page error: continue after ucm
2181 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2182 // 'from', 'to' and 'count' are now valid
2183 __masm-> movptr(word_count, count);
2184 __masm-> shrptr(count, 2); // count => qword_count
2185
2186 // Copy from high to low addresses. Use 'to' as scratch.
2187
2188 // Check for and copy trailing word
2189 __masm-> testl(word_count, 1);
2190 __masm-> jccb(Assembler::zero, L_copy_4_bytes)jccb_0(Assembler::zero, L_copy_4_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2190)
;
2191 __masm-> movw(rax, Address(from, word_count, Address::times_2, -2));
2192 __masm-> movw(Address(to, word_count, Address::times_2, -2), rax);
2193
2194 // Check for and copy trailing dword
2195 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2196 __masm-> testl(word_count, 2);
2197 __masm-> jcc(Assembler::zero, L_copy_bytes);
2198 __masm-> movl(rax, Address(from, qword_count, Address::times_8));
2199 __masm-> movl(Address(to, qword_count, Address::times_8), rax);
2200 __masm-> jmp(L_copy_bytes);
2201
2202 // Copy trailing qwords
2203 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2204 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2205 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2206 __masm-> decrement(qword_count);
2207 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2208 }
2209 restore_arg_regs();
2210 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2211 __masm-> xorptr(rax, rax); // return 0
2212 __masm-> vzeroupper();
2213 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2214 __masm-> ret(0);
2215
2216 {
2217 // UnsafeCopyMemory page error: continue after ucm
2218 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2219 // Copy in multi-bytes chunks
2220 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2221 }
2222 restore_arg_regs();
2223 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2224 __masm-> xorptr(rax, rax); // return 0
2225 __masm-> vzeroupper();
2226 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2227 __masm-> ret(0);
2228
2229 return start;
2230 }
2231
2232 // Arguments:
2233 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2234 // ignored
2235 // is_oop - true => oop array, so generate store check code
2236 // name - stub name string
2237 //
2238 // Inputs:
2239 // c_rarg0 - source array address
2240 // c_rarg1 - destination array address
2241 // c_rarg2 - element count, treated as ssize_t, can be zero
2242 //
2243 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
2244 // the hardware handle it. The two dwords within qwords that span
2245 // cache line boundaries will still be loaded and stored atomicly.
2246 //
2247 // Side Effects:
2248 // disjoint_int_copy_entry is set to the no-overlap entry point
2249 // used by generate_conjoint_int_oop_copy().
2250 //
2251 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
2252 const char *name, bool dest_uninitialized = false) {
2253#if COMPILER2_OR_JVMCI1
2254 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2255 return generate_disjoint_copy_avx3_masked(entry, "jint_disjoint_arraycopy_avx3", 2,
2256 aligned, is_oop, dest_uninitialized);
2257 }
2258#endif
2259
2260 __masm-> align(CodeEntryAlignment);
2261 StubCodeMark mark(this, "StubRoutines", name);
2262 address start = __masm-> pc();
2263
2264 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
2265 const Register from = rdi; // source array address
2266 const Register to = rsi; // destination array address
2267 const Register count = rdx; // elements count
2268 const Register dword_count = rcx;
2269 const Register qword_count = count;
2270 const Register end_from = from; // source array end address
2271 const Register end_to = to; // destination array end address
2272 // End pointers are inclusive, and if count is not zero they point
2273 // to the last unit copied: end_to[0] := end_from[0]
2274
2275 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2276 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2277
2278 if (entry != NULL__null) {
2279 *entry = __masm-> pc();
2280 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2281 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2282 }
2283
2284 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2285 // r9 is used to save r15_thread
2286
2287 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2288 if (dest_uninitialized) {
2289 decorators |= IS_DEST_UNINITIALIZED;
2290 }
2291 if (aligned) {
2292 decorators |= ARRAYCOPY_ALIGNED;
2293 }
2294
2295 BasicType type = is_oop ? T_OBJECT : T_INT;
2296 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2297 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2298
2299 {
2300 // UnsafeCopyMemory page error: continue after ucm
2301 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2302 // 'from', 'to' and 'count' are now valid
2303 __masm-> movptr(dword_count, count);
2304 __masm-> shrptr(count, 1); // count => qword_count
2305
2306 // Copy from low to high addresses. Use 'to' as scratch.
2307 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2308 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2309 __masm-> negptr(qword_count);
2310 __masm-> jmp(L_copy_bytes);
2311
2312 // Copy trailing qwords
2313 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2314 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2315 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2316 __masm-> increment(qword_count);
2317 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2318
2319 // Check for and copy trailing dword
2320 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2321 __masm-> testl(dword_count, 1); // Only byte test since the value is 0 or 1
2322 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2322)
;
2323 __masm-> movl(rax, Address(end_from, 8));
2324 __masm-> movl(Address(end_to, 8), rax);
2325 }
2326 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2327 address ucme_exit_pc = __masm-> pc();
2328 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2329 restore_arg_regs_using_thread();
2330 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2331 __masm-> vzeroupper();
2332 __masm-> xorptr(rax, rax); // return 0
2333 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2334 __masm-> ret(0);
2335
2336 {
2337 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
2338 // Copy in multi-bytes chunks
2339 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2340 __masm-> jmp(L_copy_4_bytes);
2341 }
2342
2343 return start;
2344 }
2345
2346 // Arguments:
2347 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2348 // ignored
2349 // is_oop - true => oop array, so generate store check code
2350 // name - stub name string
2351 //
2352 // Inputs:
2353 // c_rarg0 - source array address
2354 // c_rarg1 - destination array address
2355 // c_rarg2 - element count, treated as ssize_t, can be zero
2356 //
2357 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
2358 // the hardware handle it. The two dwords within qwords that span
2359 // cache line boundaries will still be loaded and stored atomicly.
2360 //
2361 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2362 address *entry, const char *name,
2363 bool dest_uninitialized = false) {
2364#if COMPILER2_OR_JVMCI1
2365 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2366 return generate_conjoint_copy_avx3_masked(entry, "jint_conjoint_arraycopy_avx3", 2,
2367 nooverlap_target, aligned, is_oop, dest_uninitialized);
2368 }
2369#endif
2370 __masm-> align(CodeEntryAlignment);
2371 StubCodeMark mark(this, "StubRoutines", name);
2372 address start = __masm-> pc();
2373
2374 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2375 const Register from = rdi; // source array address
2376 const Register to = rsi; // destination array address
2377 const Register count = rdx; // elements count
2378 const Register dword_count = rcx;
2379 const Register qword_count = count;
2380
2381 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2382 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2383
2384 if (entry != NULL__null) {
2385 *entry = __masm-> pc();
2386 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2387 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2388 }
2389
2390 array_overlap_test(nooverlap_target, Address::times_4);
2391 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2392 // r9 is used to save r15_thread
2393
2394 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
2395 if (dest_uninitialized) {
2396 decorators |= IS_DEST_UNINITIALIZED;
2397 }
2398 if (aligned) {
2399 decorators |= ARRAYCOPY_ALIGNED;
2400 }
2401
2402 BasicType type = is_oop ? T_OBJECT : T_INT;
2403 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2404 // no registers are destroyed by this call
2405 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2406
2407 assert_clean_int(count, rax); // Make sure 'count' is clean int.
2408 {
2409 // UnsafeCopyMemory page error: continue after ucm
2410 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2411 // 'from', 'to' and 'count' are now valid
2412 __masm-> movptr(dword_count, count);
2413 __masm-> shrptr(count, 1); // count => qword_count
2414
2415 // Copy from high to low addresses. Use 'to' as scratch.
2416
2417 // Check for and copy trailing dword
2418 __masm-> testl(dword_count, 1);
2419 __masm-> jcc(Assembler::zero, L_copy_bytes);
2420 __masm-> movl(rax, Address(from, dword_count, Address::times_4, -4));
2421 __masm-> movl(Address(to, dword_count, Address::times_4, -4), rax);
2422 __masm-> jmp(L_copy_bytes);
2423
2424 // Copy trailing qwords
2425 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2426 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2427 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2428 __masm-> decrement(qword_count);
2429 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2430 }
2431 if (is_oop) {
2432 __masm-> jmp(L_exit);
2433 }
2434 restore_arg_regs_using_thread();
2435 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2436 __masm-> xorptr(rax, rax); // return 0
2437 __masm-> vzeroupper();
2438 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2439 __masm-> ret(0);
2440
2441 {
2442 // UnsafeCopyMemory page error: continue after ucm
2443 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2444 // Copy in multi-bytes chunks
2445 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2446 }
2447
2448 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2449 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2450 restore_arg_regs_using_thread();
2451 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2452 __masm-> xorptr(rax, rax); // return 0
2453 __masm-> vzeroupper();
2454 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2455 __masm-> ret(0);
2456
2457 return start;
2458 }
2459
2460 // Arguments:
2461 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2462 // ignored
2463 // is_oop - true => oop array, so generate store check code
2464 // name - stub name string
2465 //
2466 // Inputs:
2467 // c_rarg0 - source array address
2468 // c_rarg1 - destination array address
2469 // c_rarg2 - element count, treated as ssize_t, can be zero
2470 //
2471 // Side Effects:
2472 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
2473 // no-overlap entry point used by generate_conjoint_long_oop_copy().
2474 //
2475 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
2476 const char *name, bool dest_uninitialized = false) {
2477#if COMPILER2_OR_JVMCI1
2478 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2479 return generate_disjoint_copy_avx3_masked(entry, "jlong_disjoint_arraycopy_avx3", 3,
2480 aligned, is_oop, dest_uninitialized);
2481 }
2482#endif
2483 __masm-> align(CodeEntryAlignment);
2484 StubCodeMark mark(this, "StubRoutines", name);
2485 address start = __masm-> pc();
2486
2487 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2488 const Register from = rdi; // source array address
2489 const Register to = rsi; // destination array address
2490 const Register qword_count = rdx; // elements count
2491 const Register end_from = from; // source array end address
2492 const Register end_to = rcx; // destination array end address
2493 const Register saved_count = r11;
2494 // End pointers are inclusive, and if count is not zero they point
2495 // to the last unit copied: end_to[0] := end_from[0]
2496
2497 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2498 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2499 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2500
2501 if (entry != NULL__null) {
2502 *entry = __masm-> pc();
2503 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2504 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2505 }
2506
2507 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2508 // r9 is used to save r15_thread
2509 // 'from', 'to' and 'qword_count' are now valid
2510
2511 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2512 if (dest_uninitialized) {
2513 decorators |= IS_DEST_UNINITIALIZED;
2514 }
2515 if (aligned) {
2516 decorators |= ARRAYCOPY_ALIGNED;
2517 }
2518
2519 BasicType type = is_oop ? T_OBJECT : T_LONG;
2520 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2521 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2522 {
2523 // UnsafeCopyMemory page error: continue after ucm
2524 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2525
2526 // Copy from low to high addresses. Use 'to' as scratch.
2527 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2528 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2529 __masm-> negptr(qword_count);
2530 __masm-> jmp(L_copy_bytes);
2531
2532 // Copy trailing qwords
2533 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2534 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2535 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2536 __masm-> increment(qword_count);
2537 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2538 }
2539 if (is_oop) {
2540 __masm-> jmp(L_exit);
2541 } else {
2542 restore_arg_regs_using_thread();
2543 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2544 __masm-> xorptr(rax, rax); // return 0
2545 __masm-> vzeroupper();
2546 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2547 __masm-> ret(0);
2548 }
2549
2550 {
2551 // UnsafeCopyMemory page error: continue after ucm
2552 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2553 // Copy in multi-bytes chunks
2554 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2555 }
2556
2557 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2558 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2559 restore_arg_regs_using_thread();
2560 if (is_oop) {
2561 inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);
; // Update counter after rscratch1 is free
2562 } else {
2563 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2564 }
2565 __masm-> vzeroupper();
2566 __masm-> xorptr(rax, rax); // return 0
2567 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2568 __masm-> ret(0);
2569
2570 return start;
2571 }
2572
2573 // Arguments:
2574 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2575 // ignored
2576 // is_oop - true => oop array, so generate store check code
2577 // name - stub name string
2578 //
2579 // Inputs:
2580 // c_rarg0 - source array address
2581 // c_rarg1 - destination array address
2582 // c_rarg2 - element count, treated as ssize_t, can be zero
2583 //
2584 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2585 address nooverlap_target, address *entry,
2586 const char *name, bool dest_uninitialized = false) {
2587#if COMPILER2_OR_JVMCI1
2588 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2589 return generate_conjoint_copy_avx3_masked(entry, "jlong_conjoint_arraycopy_avx3", 3,
2590 nooverlap_target, aligned, is_oop, dest_uninitialized);
2591 }
2592#endif
2593 __masm-> align(CodeEntryAlignment);
2594 StubCodeMark mark(this, "StubRoutines", name);
2595 address start = __masm-> pc();
2596
2597 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2598 const Register from = rdi; // source array address
2599 const Register to = rsi; // destination array address
2600 const Register qword_count = rdx; // elements count
2601 const Register saved_count = rcx;
2602
2603 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2604 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2605
2606 if (entry != NULL__null) {
2607 *entry = __masm-> pc();
2608 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2609 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2610 }
2611
2612 array_overlap_test(nooverlap_target, Address::times_8);
2613 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2614 // r9 is used to save r15_thread
2615 // 'from', 'to' and 'qword_count' are now valid
2616
2617 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
2618 if (dest_uninitialized) {
2619 decorators |= IS_DEST_UNINITIALIZED;
2620 }
2621 if (aligned) {
2622 decorators |= ARRAYCOPY_ALIGNED;
2623 }
2624
2625 BasicType type = is_oop ? T_OBJECT : T_LONG;
2626 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2627 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2628 {
2629 // UnsafeCopyMemory page error: continue after ucm
2630 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2631
2632 __masm-> jmp(L_copy_bytes);
2633
2634 // Copy trailing qwords
2635 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2636 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2637 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2638 __masm-> decrement(qword_count);
2639 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2640 }
2641 if (is_oop) {
2642 __masm-> jmp(L_exit);
2643 } else {
2644 restore_arg_regs_using_thread();
2645 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2646 __masm-> xorptr(rax, rax); // return 0
2647 __masm-> vzeroupper();
2648 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2649 __masm-> ret(0);
2650 }
2651 {
2652 // UnsafeCopyMemory page error: continue after ucm
2653 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2654
2655 // Copy in multi-bytes chunks
2656 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2657 }
2658 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2659 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2660 restore_arg_regs_using_thread();
2661 if (is_oop) {
2662 inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);
; // Update counter after rscratch1 is free
2663 } else {
2664 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2665 }
2666 __masm-> vzeroupper();
2667 __masm-> xorptr(rax, rax); // return 0
2668 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2669 __masm-> ret(0);
2670
2671 return start;
2672 }
2673
2674
2675 // Helper for generating a dynamic type check.
2676 // Smashes no registers.
2677 void generate_type_check(Register sub_klass,
2678 Register super_check_offset,
2679 Register super_klass,
2680 Label& L_success) {
2681 assert_different_registers(sub_klass, super_check_offset, super_klass);
2682
2683 BLOCK_COMMENT("type_check:")masm-> block_comment("type_check:");
2684
2685 Label L_miss;
2686
2687 __masm-> check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL__null,
2688 super_check_offset);
2689 __masm-> check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL__null);
2690
2691 // Fall through on failure!
2692 __masm-> BIND(L_miss)bind(L_miss); masm-> block_comment("L_miss" ":");
2693 }
2694
2695 //
2696 // Generate checkcasting array copy stub
2697 //
2698 // Input:
2699 // c_rarg0 - source array address
2700 // c_rarg1 - destination array address
2701 // c_rarg2 - element count, treated as ssize_t, can be zero
2702 // c_rarg3 - size_t ckoff (super_check_offset)
2703 // not Win64
2704 // c_rarg4 - oop ckval (super_klass)
2705 // Win64
2706 // rsp+40 - oop ckval (super_klass)
2707 //
2708 // Output:
2709 // rax == 0 - success
2710 // rax == -1^K - failure, where K is partial transfer count
2711 //
2712 address generate_checkcast_copy(const char *name, address *entry,
2713 bool dest_uninitialized = false) {
2714
2715 Label L_load_element, L_store_element, L_do_card_marks, L_done;
2716
2717 // Input registers (after setup_arg_regs)
2718 const Register from = rdi; // source array address
2719 const Register to = rsi; // destination array address
2720 const Register length = rdx; // elements count
2721 const Register ckoff = rcx; // super_check_offset
2722 const Register ckval = r8; // super_klass
2723
2724 // Registers used as temps (r13, r14 are save-on-entry)
2725 const Register end_from = from; // source array end address
2726 const Register end_to = r13; // destination array end address
2727 const Register count = rdx; // -(count_remaining)
2728 const Register r14_length = r14; // saved copy of length
2729 // End pointers are inclusive, and if length is not zero they point
2730 // to the last unit copied: end_to[0] := end_from[0]
2731
2732 const Register rax_oop = rax; // actual oop copied
2733 const Register r11_klass = r11; // oop._klass
2734
2735 //---------------------------------------------------------------
2736 // Assembler stub will be used for this call to arraycopy
2737 // if the two arrays are subtypes of Object[] but the
2738 // destination array type is not equal to or a supertype
2739 // of the source type. Each element must be separately
2740 // checked.
2741
2742 __masm-> align(CodeEntryAlignment);
2743 StubCodeMark mark(this, "StubRoutines", name);
2744 address start = __masm-> pc();
2745
2746 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2747
2748#ifdef ASSERT1
2749 // caller guarantees that the arrays really are different
2750 // otherwise, we would have to make conjoint checks
2751 { Label L;
2752 array_overlap_test(L, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8));
2753 __masm-> stop("checkcast_copy within a single array");
2754 __masm-> bind(L);
2755 }
2756#endif //ASSERT
2757
2758 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2759 // ckoff => rcx, ckval => r8
2760 // r9 and r10 may be used to save non-volatile registers
2761#ifdef _WIN64
2762 // last argument (#4) is on stack on Win64
2763 __masm-> movptr(ckval, Address(rsp, 6 * wordSize));
2764#endif
2765
2766 // Caller of this entry point must set up the argument registers.
2767 if (entry != NULL__null) {
2768 *entry = __masm-> pc();
2769 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2770 }
2771
2772 // allocate spill slots for r13, r14
2773 enum {
2774 saved_r13_offset,
2775 saved_r14_offset,
2776 saved_r10_offset,
2777 saved_rbp_offset
2778 };
2779 __masm-> subptr(rsp, saved_rbp_offset * wordSize);
2780 __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2781 __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14);
2782 __masm-> movptr(Address(rsp, saved_r10_offset * wordSize), r10);
2783
2784#ifdef ASSERT1
2785 Label L2;
2786 __masm-> get_thread(r14);
2787 __masm-> cmpptr(r15_thread, r14);
2788 __masm-> jcc(Assembler::equal, L2);
2789 __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call");
2790 __masm-> bind(L2);
2791#endif // ASSERT
2792
2793 // check that int operands are properly extended to size_t
2794 assert_clean_int(length, rax);
2795 assert_clean_int(ckoff, rax);
2796
2797#ifdef ASSERT1
2798 BLOCK_COMMENT("assert consistent ckoff/ckval")masm-> block_comment("assert consistent ckoff/ckval");
2799 // The ckoff and ckval must be mutually consistent,
2800 // even though caller generates both.
2801 { Label L;
2802 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2803 __masm-> cmpl(ckoff, Address(ckval, sco_offset));
2804 __masm-> jcc(Assembler::equal, L);
2805 __masm-> stop("super_check_offset inconsistent");
2806 __masm-> bind(L);
2807 }
2808#endif //ASSERT
2809
2810 // Loop-invariant addresses. They are exclusive end pointers.
2811 Address end_from_addr(from, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2812 Address end_to_addr(to, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2813 // Loop-variant addresses. They assume post-incremented count < 0.
2814 Address from_element_addr(end_from, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2815 Address to_element_addr(end_to, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2816
2817 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
2818 if (dest_uninitialized) {
2819 decorators |= IS_DEST_UNINITIALIZED;
2820 }
2821
2822 BasicType type = T_OBJECT;
2823 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2824 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2825
2826 // Copy from low to high addresses, indexed from the end of each array.
2827 __masm-> lea(end_from, end_from_addr);
2828 __masm-> lea(end_to, end_to_addr);
2829 __masm-> movptr(r14_length, length); // save a copy of the length
2830 assert(length == count, "")do { if (!(length == count)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2830, "assert(" "length == count" ") failed", ""); ::breakpoint
(); } } while (0)
; // else fix next line:
2831 __masm-> negptr(count); // negate and test the length
2832 __masm-> jcc(Assembler::notZero, L_load_element);
2833
2834 // Empty array: Nothing to do.
2835 __masm-> xorptr(rax, rax); // return 0 on (trivial) success
2836 __masm-> jmp(L_done);
2837
2838 // ======== begin loop ========
2839 // (Loop is rotated; its entry is L_load_element.)
2840 // Loop control:
2841 // for (count = -count; count != 0; count++)
2842 // Base pointers src, dst are biased by 8*(count-1),to last element.
2843 __masm-> align(OptoLoopAlignment);
2844
2845 __masm-> BIND(L_store_element)bind(L_store_element); masm-> block_comment("L_store_element"
":")
;
2846 __masm-> store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW); // store the oop
2847 __masm-> increment(count); // increment the count toward zero
2848 __masm-> jcc(Assembler::zero, L_do_card_marks);
2849
2850 // ======== loop entry is here ========
2851 __masm-> BIND(L_load_element)bind(L_load_element); masm-> block_comment("L_load_element"
":")
;
2852 __masm-> load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop
2853 __masm-> testptr(rax_oop, rax_oop);
2854 __masm-> jcc(Assembler::zero, L_store_element);
2855
2856 __masm-> load_klass(r11_klass, rax_oop, rscratch1);// query the object klass
2857 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2858 // ======== end loop ========
2859
2860 // It was a real error; we must depend on the caller to finish the job.
2861 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2862 // Emit GC store barriers for the oops we have copied (r14 + rdx),
2863 // and report their number to the caller.
2864 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2865 Label L_post_barrier;
2866 __masm-> addptr(r14_length, count); // K = (original - remaining) oops
2867 __masm-> movptr(rax, r14_length); // save the value
2868 __masm-> notptr(rax); // report (-1^K) to caller (does not affect flags)
2869 __masm-> jccb(Assembler::notZero, L_post_barrier)jccb_0(Assembler::notZero, L_post_barrier, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2869)
;
2870 __masm-> jmp(L_done); // K == 0, nothing was copied, skip post barrier
2871
2872 // Come here on success only.
2873 __masm-> BIND(L_do_card_marks)bind(L_do_card_marks); masm-> block_comment("L_do_card_marks"
":")
;
2874 __masm-> xorptr(rax, rax); // return 0 on success
2875
2876 __masm-> BIND(L_post_barrier)bind(L_post_barrier); masm-> block_comment("L_post_barrier"
":")
;
2877 bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2878
2879 // Common exit point (success or failure).
2880 __masm-> BIND(L_done)bind(L_done); masm-> block_comment("L_done" ":");
2881 __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2882 __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2883 __masm-> movptr(r10, Address(rsp, saved_r10_offset * wordSize));
2884 restore_arg_regs();
2885 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_checkcast_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_checkcast_array_copy_ctr);
; // Update counter after rscratch1 is free
2886 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2887 __masm-> ret(0);
2888
2889 return start;
2890 }
2891
2892 //
2893 // Generate 'unsafe' array copy stub
2894 // Though just as safe as the other stubs, it takes an unscaled
2895 // size_t argument instead of an element count.
2896 //
2897 // Input:
2898 // c_rarg0 - source array address
2899 // c_rarg1 - destination array address
2900 // c_rarg2 - byte count, treated as ssize_t, can be zero
2901 //
2902 // Examines the alignment of the operands and dispatches
2903 // to a long, int, short, or byte copy loop.
2904 //
2905 address generate_unsafe_copy(const char *name,
2906 address byte_copy_entry, address short_copy_entry,
2907 address int_copy_entry, address long_copy_entry) {
2908
2909 Label L_long_aligned, L_int_aligned, L_short_aligned;
2910
2911 // Input registers (before setup_arg_regs)
2912 const Register from = c_rarg0; // source array address
2913 const Register to = c_rarg1; // destination array address
2914 const Register size = c_rarg2; // byte count (size_t)
2915
2916 // Register used as a temp
2917 const Register bits = rax; // test copy of low bits
2918
2919 __masm-> align(CodeEntryAlignment);
2920 StubCodeMark mark(this, "StubRoutines", name);
2921 address start = __masm-> pc();
2922
2923 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2924
2925 // bump this on entry, not on exit:
2926 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_unsafe_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_unsafe_array_copy_ctr);
;
2927
2928 __masm-> mov(bits, from);
2929 __masm-> orptr(bits, to);
2930 __masm-> orptr(bits, size);
2931
2932 __masm-> testb(bits, BytesPerLong-1);
2933 __masm-> jccb(Assembler::zero, L_long_aligned)jccb_0(Assembler::zero, L_long_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2933)
;
2934
2935 __masm-> testb(bits, BytesPerInt-1);
2936 __masm-> jccb(Assembler::zero, L_int_aligned)jccb_0(Assembler::zero, L_int_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2936)
;
2937
2938 __masm-> testb(bits, BytesPerShort-1);
2939 __masm-> jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
2940
2941 __masm-> BIND(L_short_aligned)bind(L_short_aligned); masm-> block_comment("L_short_aligned"
":")
;
2942 __masm-> shrptr(size, LogBytesPerShort); // size => short_count
2943 __masm-> jump(RuntimeAddress(short_copy_entry));
2944
2945 __masm-> BIND(L_int_aligned)bind(L_int_aligned); masm-> block_comment("L_int_aligned" ":"
)
;
2946 __masm-> shrptr(size, LogBytesPerInt); // size => int_count
2947 __masm-> jump(RuntimeAddress(int_copy_entry));
2948
2949 __masm-> BIND(L_long_aligned)bind(L_long_aligned); masm-> block_comment("L_long_aligned"
":")
;
2950 __masm-> shrptr(size, LogBytesPerLong); // size => qword_count
2951 __masm-> jump(RuntimeAddress(long_copy_entry));
2952
2953 return start;
2954 }
2955
2956 // Perform range checks on the proposed arraycopy.
2957 // Kills temp, but nothing else.
2958 // Also, clean the sign bits of src_pos and dst_pos.
2959 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
2960 Register src_pos, // source position (c_rarg1)
2961 Register dst, // destination array oo (c_rarg2)
2962 Register dst_pos, // destination position (c_rarg3)
2963 Register length,
2964 Register temp,
2965 Label& L_failed) {
2966 BLOCK_COMMENT("arraycopy_range_checks:")masm-> block_comment("arraycopy_range_checks:");
2967
2968 // if (src_pos + length > arrayOop(src)->length()) FAIL;
2969 __masm-> movl(temp, length);
2970 __masm-> addl(temp, src_pos); // src_pos + length
2971 __masm-> cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
2972 __masm-> jcc(Assembler::above, L_failed);
2973
2974 // if (dst_pos + length > arrayOop(dst)->length()) FAIL;
2975 __masm-> movl(temp, length);
2976 __masm-> addl(temp, dst_pos); // dst_pos + length
2977 __masm-> cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
2978 __masm-> jcc(Assembler::above, L_failed);
2979
2980 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
2981 // Move with sign extension can be used since they are positive.
2982 __masm-> movslq(src_pos, src_pos);
2983 __masm-> movslq(dst_pos, dst_pos);
2984
2985 BLOCK_COMMENT("arraycopy_range_checks done")masm-> block_comment("arraycopy_range_checks done");
2986 }
2987
2988 //
2989 // Generate generic array copy stubs
2990 //
2991 // Input:
2992 // c_rarg0 - src oop
2993 // c_rarg1 - src_pos (32-bits)
2994 // c_rarg2 - dst oop
2995 // c_rarg3 - dst_pos (32-bits)
2996 // not Win64
2997 // c_rarg4 - element count (32-bits)
2998 // Win64
2999 // rsp+40 - element count (32-bits)
3000 //
3001 // Output:
3002 // rax == 0 - success
3003 // rax == -1^K - failure, where K is partial transfer count
3004 //
3005 address generate_generic_copy(const char *name,
3006 address byte_copy_entry, address short_copy_entry,
3007 address int_copy_entry, address oop_copy_entry,
3008 address long_copy_entry, address checkcast_copy_entry) {
3009
3010 Label L_failed, L_failed_0, L_objArray;
3011 Label L_copy_shorts, L_copy_ints, L_copy_longs;
3012
3013 // Input registers
3014 const Register src = c_rarg0; // source array oop
3015 const Register src_pos = c_rarg1; // source position
3016 const Register dst = c_rarg2; // destination array oop
3017 const Register dst_pos = c_rarg3; // destination position
3018#ifndef _WIN64
3019 const Register length = c_rarg4;
3020 const Register rklass_tmp = r9; // load_klass
3021#else
3022 const Address length(rsp, 7 * wordSize); // elements count is on stack on Win64
3023 const Register rklass_tmp = rdi; // load_klass
3024#endif
3025
3026 { int modulus = CodeEntryAlignment;
3027 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
3028 int advance = target - (__masm-> offset() % modulus);
3029 if (advance < 0) advance += modulus;
3030 if (advance > 0) __masm-> nop(advance);
3031 }
3032 StubCodeMark mark(this, "StubRoutines", name);
3033
3034 // Short-hop target to L_failed. Makes for denser prologue code.
3035 __masm-> BIND(L_failed_0)bind(L_failed_0); masm-> block_comment("L_failed_0" ":");
3036 __masm-> jmp(L_failed);
3037 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed")do { if (!(masm-> offset() % CodeEntryAlignment == 0)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3037, "assert(" "masm-> offset() % CodeEntryAlignment == 0"
") failed", "no further alignment needed"); ::breakpoint(); }
} while (0)
;
3038
3039 __masm-> align(CodeEntryAlignment);
3040 address start = __masm-> pc();
3041
3042 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3043
3044#ifdef _WIN64
3045 __masm-> push(rklass_tmp); // rdi is callee-save on Windows
3046#endif
3047
3048 // bump this on entry, not on exit:
3049 inc_counter_np(SharedRuntime::_generic_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_generic_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_generic_array_copy_ctr);
;
3050
3051 //-----------------------------------------------------------------------
3052 // Assembler stub will be used for this call to arraycopy
3053 // if the following conditions are met:
3054 //
3055 // (1) src and dst must not be null.
3056 // (2) src_pos must not be negative.
3057 // (3) dst_pos must not be negative.
3058 // (4) length must not be negative.
3059 // (5) src klass and dst klass should be the same and not NULL.
3060 // (6) src and dst should be arrays.
3061 // (7) src_pos + length must not exceed length of src.
3062 // (8) dst_pos + length must not exceed length of dst.
3063 //
3064
3065 // if (src == NULL) return -1;
3066 __masm-> testptr(src, src); // src oop
3067 size_t j1off = __masm-> offset();
3068 __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3068)
;
3069
3070 // if (src_pos < 0) return -1;
3071 __masm-> testl(src_pos, src_pos); // src_pos (32-bits)
3072 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3072)
;
3073
3074 // if (dst == NULL) return -1;
3075 __masm-> testptr(dst, dst); // dst oop
3076 __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3076)
;
3077
3078 // if (dst_pos < 0) return -1;
3079 __masm-> testl(dst_pos, dst_pos); // dst_pos (32-bits)
3080 size_t j4off = __masm-> offset();
3081 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3081)
;
3082
3083 // The first four tests are very dense code,
3084 // but not quite dense enough to put four
3085 // jumps in a 16-byte instruction fetch buffer.
3086 // That's good, because some branch predicters
3087 // do not like jumps so close together.
3088 // Make sure of this.
3089 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps")do { if (!(((j1off ^ j4off) & ~15) != 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3089, "guarantee(" "((j1off ^ j4off) & ~15) != 0" ") failed"
, "I$ line of 1st & 4th jumps"); ::breakpoint(); } } while
(0)
;
3090
3091 // registers used as temp
3092 const Register r11_length = r11; // elements count to copy
3093 const Register r10_src_klass = r10; // array klass
3094
3095 // if (length < 0) return -1;
3096 __masm-> movl(r11_length, length); // length (elements count, 32-bits value)
3097 __masm-> testl(r11_length, r11_length);
3098 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3098)
;
3099
3100 __masm-> load_klass(r10_src_klass, src, rklass_tmp);
3101#ifdef ASSERT1
3102 // assert(src->klass() != NULL);
3103 {
3104 BLOCK_COMMENT("assert klasses not null {")masm-> block_comment("assert klasses not null {");
3105 Label L1, L2;
3106 __masm-> testptr(r10_src_klass, r10_src_klass);
3107 __masm-> jcc(Assembler::notZero, L2); // it is broken if klass is NULL
3108 __masm-> bind(L1);
3109 __masm-> stop("broken null klass");
3110 __masm-> bind(L2);
3111 __masm-> load_klass(rax, dst, rklass_tmp);
3112 __masm-> cmpq(rax, 0);
3113 __masm-> jcc(Assembler::equal, L1); // this would be broken also
3114 BLOCK_COMMENT("} assert klasses not null done")masm-> block_comment("} assert klasses not null done");
3115 }
3116#endif
3117
3118 // Load layout helper (32-bits)
3119 //
3120 // |array_tag| | header_size | element_type | |log2_element_size|
3121 // 32 30 24 16 8 2 0
3122 //
3123 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
3124 //
3125
3126 const int lh_offset = in_bytes(Klass::layout_helper_offset());
3127
3128 // Handle objArrays completely differently...
3129 const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3130 __masm-> cmpl(Address(r10_src_klass, lh_offset), objArray_lh);
3131 __masm-> jcc(Assembler::equal, L_objArray);
3132
3133 // if (src->klass() != dst->klass()) return -1;
3134 __masm-> load_klass(rax, dst, rklass_tmp);
3135 __masm-> cmpq(r10_src_klass, rax);
3136 __masm-> jcc(Assembler::notEqual, L_failed);
3137
3138 const Register rax_lh = rax; // layout helper
3139 __masm-> movl(rax_lh, Address(r10_src_klass, lh_offset));
3140
3141 // if (!src->is_Array()) return -1;
3142 __masm-> cmpl(rax_lh, Klass::_lh_neutral_value);
3143 __masm-> jcc(Assembler::greaterEqual, L_failed);
3144
3145 // At this point, it is known to be a typeArray (array_tag 0x3).
3146#ifdef ASSERT1
3147 {
3148 BLOCK_COMMENT("assert primitive array {")masm-> block_comment("assert primitive array {");
3149 Label L;
3150 __masm-> cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
3151 __masm-> jcc(Assembler::greaterEqual, L);
3152 __masm-> stop("must be a primitive array");
3153 __masm-> bind(L);
3154 BLOCK_COMMENT("} assert primitive array done")masm-> block_comment("} assert primitive array done");
3155 }
3156#endif
3157
3158 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3159 r10, L_failed);
3160
3161 // TypeArrayKlass
3162 //
3163 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
3164 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
3165 //
3166
3167 const Register r10_offset = r10; // array offset
3168 const Register rax_elsize = rax_lh; // element size
3169
3170 __masm-> movl(r10_offset, rax_lh);
3171 __masm-> shrl(r10_offset, Klass::_lh_header_size_shift);
3172 __masm-> andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset
3173 __masm-> addptr(src, r10_offset); // src array offset
3174 __masm-> addptr(dst, r10_offset); // dst array offset
3175 BLOCK_COMMENT("choose copy loop based on element size")masm-> block_comment("choose copy loop based on element size"
)
;
3176 __masm-> andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
3177
3178#ifdef _WIN64
3179 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3180#endif
3181
3182 // next registers should be set before the jump to corresponding stub
3183 const Register from = c_rarg0; // source array address
3184 const Register to = c_rarg1; // destination array address
3185 const Register count = c_rarg2; // elements count
3186
3187 // 'from', 'to', 'count' registers should be set in such order
3188 // since they are the same as 'src', 'src_pos', 'dst'.
3189
3190 __masm-> cmpl(rax_elsize, 0);
3191 __masm-> jccb(Assembler::notEqual, L_copy_shorts)jccb_0(Assembler::notEqual, L_copy_shorts, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3191)
;
3192 __masm-> lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
3193 __masm-> lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
3194 __masm-> movl2ptr(count, r11_length); // length
3195 __masm-> jump(RuntimeAddress(byte_copy_entry));
3196
3197 __masm-> BIND(L_copy_shorts)bind(L_copy_shorts); masm-> block_comment("L_copy_shorts" ":"
)
;
3198 __masm-> cmpl(rax_elsize, LogBytesPerShort);
3199 __masm-> jccb(Assembler::notEqual, L_copy_ints)jccb_0(Assembler::notEqual, L_copy_ints, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3199)
;
3200 __masm-> lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
3201 __masm-> lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
3202 __masm-> movl2ptr(count, r11_length); // length
3203 __masm-> jump(RuntimeAddress(short_copy_entry));
3204
3205 __masm-> BIND(L_copy_ints)bind(L_copy_ints); masm-> block_comment("L_copy_ints" ":");
3206 __masm-> cmpl(rax_elsize, LogBytesPerInt);
3207 __masm-> jccb(Assembler::notEqual, L_copy_longs)jccb_0(Assembler::notEqual, L_copy_longs, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3207)
;
3208 __masm-> lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
3209 __masm-> lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
3210 __masm-> movl2ptr(count, r11_length); // length
3211 __masm-> jump(RuntimeAddress(int_copy_entry));
3212
3213 __masm-> BIND(L_copy_longs)bind(L_copy_longs); masm-> block_comment("L_copy_longs" ":"
)
;
3214#ifdef ASSERT1
3215 {
3216 BLOCK_COMMENT("assert long copy {")masm-> block_comment("assert long copy {");
3217 Label L;
3218 __masm-> cmpl(rax_elsize, LogBytesPerLong);
3219 __masm-> jcc(Assembler::equal, L);
3220 __masm-> stop("must be long copy, but elsize is wrong");
3221 __masm-> bind(L);
3222 BLOCK_COMMENT("} assert long copy done")masm-> block_comment("} assert long copy done");
3223 }
3224#endif
3225 __masm-> lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
3226 __masm-> lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
3227 __masm-> movl2ptr(count, r11_length); // length
3228 __masm-> jump(RuntimeAddress(long_copy_entry));
3229
3230 // ObjArrayKlass
3231 __masm-> BIND(L_objArray)bind(L_objArray); masm-> block_comment("L_objArray" ":");
3232 // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos]
3233
3234 Label L_plain_copy, L_checkcast_copy;
3235 // test array classes for subtyping
3236 __masm-> load_klass(rax, dst, rklass_tmp);
3237 __masm-> cmpq(r10_src_klass, rax); // usual case is exact equality
3238 __masm-> jcc(Assembler::notEqual, L_checkcast_copy);
3239
3240 // Identically typed arrays can be copied without element-wise checks.
3241 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3242 r10, L_failed);
3243
3244 __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3245 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
3246 __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3247 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
3248 __masm-> movl2ptr(count, r11_length); // length
3249 __masm-> BIND(L_plain_copy)bind(L_plain_copy); masm-> block_comment("L_plain_copy" ":"
)
;
3250#ifdef _WIN64
3251 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3252#endif
3253 __masm-> jump(RuntimeAddress(oop_copy_entry));
3254
3255 __masm-> BIND(L_checkcast_copy)bind(L_checkcast_copy); masm-> block_comment("L_checkcast_copy"
":")
;
3256 // live at this point: r10_src_klass, r11_length, rax (dst_klass)
3257 {
3258 // Before looking at dst.length, make sure dst is also an objArray.
3259 __masm-> cmpl(Address(rax, lh_offset), objArray_lh);
3260 __masm-> jcc(Assembler::notEqual, L_failed);
3261
3262 // It is safe to examine both src.length and dst.length.
3263 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3264 rax, L_failed);
3265
3266 const Register r11_dst_klass = r11;
3267 __masm-> load_klass(r11_dst_klass, dst, rklass_tmp); // reload
3268
3269 // Marshal the base address arguments now, freeing registers.
3270 __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3271 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
3272 __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3273 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
3274 __masm-> movl(count, length); // length (reloaded)
3275 Register sco_temp = c_rarg3; // this register is free now
3276 assert_different_registers(from, to, count, sco_temp,
3277 r11_dst_klass, r10_src_klass);
3278 assert_clean_int(count, sco_temp);
3279
3280 // Generate the type check.
3281 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
3282 __masm-> movl(sco_temp, Address(r11_dst_klass, sco_offset));
3283 assert_clean_int(sco_temp, rax);
3284 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
3285
3286 // Fetch destination element klass from the ObjArrayKlass header.
3287 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
3288 __masm-> movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
3289 __masm-> movl( sco_temp, Address(r11_dst_klass, sco_offset));
3290 assert_clean_int(sco_temp, rax);
3291
3292#ifdef _WIN64
3293 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3294#endif
3295
3296 // the checkcast_copy loop needs two extra arguments:
3297 assert(c_rarg3 == sco_temp, "#3 already in place")do { if (!(c_rarg3 == sco_temp)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3297, "assert(" "c_rarg3 == sco_temp" ") failed", "#3 already in place"
); ::breakpoint(); } } while (0)
;
3298 // Set up arguments for checkcast_copy_entry.
3299 setup_arg_regs(4);
3300 __masm-> movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris
3301 __masm-> jump(RuntimeAddress(checkcast_copy_entry));
3302 }
3303
3304 __masm-> BIND(L_failed)bind(L_failed); masm-> block_comment("L_failed" ":");
3305#ifdef _WIN64
3306 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3307#endif
3308 __masm-> xorptr(rax, rax);
3309 __masm-> notptr(rax); // return -1
3310 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3311 __masm-> ret(0);
3312
3313 return start;
3314 }
3315
3316 address generate_data_cache_writeback() {
3317 const Register src = c_rarg0; // source address
3318
3319 __masm-> align(CodeEntryAlignment);
3320
3321 StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback");
3322
3323 address start = __masm-> pc();
3324 __masm-> enter();
3325 __masm-> cache_wb(Address(src, 0));
3326 __masm-> leave();
3327 __masm-> ret(0);
3328
3329 return start;
3330 }
3331
3332 address generate_data_cache_writeback_sync() {
3333 const Register is_pre = c_rarg0; // pre or post sync
3334
3335 __masm-> align(CodeEntryAlignment);
3336
3337 StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback_sync");
3338
3339 // pre wbsync is a no-op
3340 // post wbsync translates to an sfence
3341
3342 Label skip;
3343 address start = __masm-> pc();
3344 __masm-> enter();
3345 __masm-> cmpl(is_pre, 0);
3346 __masm-> jcc(Assembler::notEqual, skip);
3347 __masm-> cache_wbsync(false);
3348 __masm-> bind(skip);
3349 __masm-> leave();
3350 __masm-> ret(0);
3351
3352 return start;
3353 }
3354
3355 void generate_arraycopy_stubs() {
3356 address entry;
3357 address entry_jbyte_arraycopy;
3358 address entry_jshort_arraycopy;
3359 address entry_jint_arraycopy;
3360 address entry_oop_arraycopy;
3361 address entry_jlong_arraycopy;
3362 address entry_checkcast_arraycopy;
3363
3364 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
3365 "jbyte_disjoint_arraycopy");
3366 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
3367 "jbyte_arraycopy");
3368
3369 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
3370 "jshort_disjoint_arraycopy");
3371 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
3372 "jshort_arraycopy");
3373
3374 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry,
3375 "jint_disjoint_arraycopy");
3376 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry,
3377 &entry_jint_arraycopy, "jint_arraycopy");
3378
3379 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry,
3380 "jlong_disjoint_arraycopy");
3381 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry,
3382 &entry_jlong_arraycopy, "jlong_arraycopy");
3383
3384
3385 if (UseCompressedOops) {
3386 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry,
3387 "oop_disjoint_arraycopy");
3388 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry,
3389 &entry_oop_arraycopy, "oop_arraycopy");
3390 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry,
3391 "oop_disjoint_arraycopy_uninit",
3392 /*dest_uninitialized*/true);
3393 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry,
3394 NULL__null, "oop_arraycopy_uninit",
3395 /*dest_uninitialized*/true);
3396 } else {
3397 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry,
3398 "oop_disjoint_arraycopy");
3399 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry,
3400 &entry_oop_arraycopy, "oop_arraycopy");
3401 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry,
3402 "oop_disjoint_arraycopy_uninit",
3403 /*dest_uninitialized*/true);
3404 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry,
3405 NULL__null, "oop_arraycopy_uninit",
3406 /*dest_uninitialized*/true);
3407 }
3408
3409 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3410 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL__null,
3411 /*dest_uninitialized*/true);
3412
3413 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
3414 entry_jbyte_arraycopy,
3415 entry_jshort_arraycopy,
3416 entry_jint_arraycopy,
3417 entry_jlong_arraycopy);
3418 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
3419 entry_jbyte_arraycopy,
3420 entry_jshort_arraycopy,
3421 entry_jint_arraycopy,
3422 entry_oop_arraycopy,
3423 entry_jlong_arraycopy,
3424 entry_checkcast_arraycopy);
3425
3426 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
3427 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
3428 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
3429 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
3430 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
3431 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
3432
3433 // We don't generate specialized code for HeapWord-aligned source
3434 // arrays, so just use the code we've already generated
3435 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
3436 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
3437
3438 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
3439 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
3440
3441 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
3442 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
3443
3444 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
3445 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
3446
3447 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
3448 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
3449
3450 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
3451 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
3452 }
3453
3454 // AES intrinsic stubs
3455 enum {AESBlockSize = 16};
3456
3457 address generate_key_shuffle_mask() {
3458 __masm-> align(16);
3459 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
3460 address start = __masm-> pc();
3461 __masm-> emit_data64( 0x0405060700010203, relocInfo::none );
3462 __masm-> emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
3463 return start;
3464 }
3465
3466 address generate_counter_shuffle_mask() {
3467 __masm-> align(16);
3468 StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
3469 address start = __masm-> pc();
3470 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
3471 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
3472 return start;
3473 }
3474
3475 // Utility routine for loading a 128-bit key word in little endian format
3476 // can optionally specify that the shuffle mask is already in an xmmregister
3477 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL__null) {
3478 __masm-> movdqu(xmmdst, Address(key, offset));
3479 if (xmm_shuf_mask != NULL__null) {
3480 __masm-> pshufb(xmmdst, xmm_shuf_mask);
3481 } else {
3482 __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3483 }
3484 }
3485
3486 // Utility routine for increase 128bit counter (iv in CTR mode)
3487 void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
3488 __masm-> pextrq(reg, xmmdst, 0x0);
3489 __masm-> addq(reg, inc_delta);
3490 __masm-> pinsrq(xmmdst, reg, 0x0);
3491 __masm-> jcc(Assembler::carryClear, next_block); // jump if no carry
3492 __masm-> pextrq(reg, xmmdst, 0x01); // Carry
3493 __masm-> addq(reg, 0x01);
3494 __masm-> pinsrq(xmmdst, reg, 0x01); //Carry end
3495 __masm-> BIND(next_block)bind(next_block); masm-> block_comment("next_block" ":"); // next instruction
3496 }
3497
3498 // Arguments:
3499 //
3500 // Inputs:
3501 // c_rarg0 - source byte array address
3502 // c_rarg1 - destination byte array address
3503 // c_rarg2 - K (key) in little endian int array
3504 //
3505 address generate_aescrypt_encryptBlock() {
3506 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3506, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3507 __masm-> align(CodeEntryAlignment);
3508 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
3509 Label L_doLast;
3510 address start = __masm-> pc();
3511
3512 const Register from = c_rarg0; // source array address
3513 const Register to = c_rarg1; // destination array address
3514 const Register key = c_rarg2; // key array address
3515 const Register keylen = rax;
3516
3517 const XMMRegister xmm_result = xmm0;
3518 const XMMRegister xmm_key_shuf_mask = xmm1;
3519 // On win64 xmm6-xmm15 must be preserved so don't use them.
3520 const XMMRegister xmm_temp1 = xmm2;
3521 const XMMRegister xmm_temp2 = xmm3;
3522 const XMMRegister xmm_temp3 = xmm4;
3523 const XMMRegister xmm_temp4 = xmm5;
3524
3525 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3526
3527 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
3528 __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3529
3530 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3531 __masm-> movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
3532
3533 // For encryption, the java expanded key ordering is just what we need
3534 // we don't know if the key is aligned, hence not using load-execute form
3535
3536 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
3537 __masm-> pxor(xmm_result, xmm_temp1);
3538
3539 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
3540 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
3541 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
3542 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
3543
3544 __masm-> aesenc(xmm_result, xmm_temp1);
3545 __masm-> aesenc(xmm_result, xmm_temp2);
3546 __masm-> aesenc(xmm_result, xmm_temp3);
3547 __masm-> aesenc(xmm_result, xmm_temp4);
3548
3549 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
3550 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
3551 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
3552 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
3553
3554 __masm-> aesenc(xmm_result, xmm_temp1);
3555 __masm-> aesenc(xmm_result, xmm_temp2);
3556 __masm-> aesenc(xmm_result, xmm_temp3);
3557 __masm-> aesenc(xmm_result, xmm_temp4);
3558
3559 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
3560 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
3561
3562 __masm-> cmpl(keylen, 44);
3563 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3563)
;
3564
3565 __masm-> aesenc(xmm_result, xmm_temp1);
3566 __masm-> aesenc(xmm_result, xmm_temp2);
3567
3568 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
3569 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
3570
3571 __masm-> cmpl(keylen, 52);
3572 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3572)
;
3573
3574 __masm-> aesenc(xmm_result, xmm_temp1);
3575 __masm-> aesenc(xmm_result, xmm_temp2);
3576
3577 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
3578 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
3579
3580 __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":");
3581 __masm-> aesenc(xmm_result, xmm_temp1);
3582 __masm-> aesenclast(xmm_result, xmm_temp2);
3583 __masm-> movdqu(Address(to, 0), xmm_result); // store the result
3584 __masm-> xorptr(rax, rax); // return 0
3585 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3586 __masm-> ret(0);
3587
3588 return start;
3589 }
3590
3591
3592 // Arguments:
3593 //
3594 // Inputs:
3595 // c_rarg0 - source byte array address
3596 // c_rarg1 - destination byte array address
3597 // c_rarg2 - K (key) in little endian int array
3598 //
3599 address generate_aescrypt_decryptBlock() {
3600 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3600, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3601 __masm-> align(CodeEntryAlignment);
3602 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
3603 Label L_doLast;
3604 address start = __masm-> pc();
3605
3606 const Register from = c_rarg0; // source array address
3607 const Register to = c_rarg1; // destination array address
3608 const Register key = c_rarg2; // key array address
3609 const Register keylen = rax;
3610
3611 const XMMRegister xmm_result = xmm0;
3612 const XMMRegister xmm_key_shuf_mask = xmm1;
3613 // On win64 xmm6-xmm15 must be preserved so don't use them.
3614 const XMMRegister xmm_temp1 = xmm2;
3615 const XMMRegister xmm_temp2 = xmm3;
3616 const XMMRegister xmm_temp3 = xmm4;
3617 const XMMRegister xmm_temp4 = xmm5;
3618
3619 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3620
3621 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
3622 __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3623
3624 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3625 __masm-> movdqu(xmm_result, Address(from, 0));
3626
3627 // for decryption java expanded key ordering is rotated one position from what we want
3628 // so we start from 0x10 here and hit 0x00 last
3629 // we don't know if the key is aligned, hence not using load-execute form
3630 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
3631 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
3632 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
3633 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
3634
3635 __masm-> pxor (xmm_result, xmm_temp1);
3636 __masm-> aesdec(xmm_result, xmm_temp2);
3637 __masm-> aesdec(xmm_result, xmm_temp3);
3638 __masm-> aesdec(xmm_result, xmm_temp4);
3639
3640 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
3641 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
3642 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
3643 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
3644
3645 __masm-> aesdec(xmm_result, xmm_temp1);
3646 __masm-> aesdec(xmm_result, xmm_temp2);
3647 __masm-> aesdec(xmm_result, xmm_temp3);
3648 __masm-> aesdec(xmm_result, xmm_temp4);
3649
3650 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
3651 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
3652 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
3653
3654 __masm-> cmpl(keylen, 44);
3655 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3655)
;
3656
3657 __masm-> aesdec(xmm_result, xmm_temp1);
3658 __masm-> aesdec(xmm_result, xmm_temp2);
3659
3660 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
3661 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
3662
3663 __masm-> cmpl(keylen, 52);
3664 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3664)
;
3665
3666 __masm-> aesdec(xmm_result, xmm_temp1);
3667 __masm-> aesdec(xmm_result, xmm_temp2);
3668
3669 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
3670 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
3671
3672 __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":");
3673 __masm-> aesdec(xmm_result, xmm_temp1);
3674 __masm-> aesdec(xmm_result, xmm_temp2);
3675
3676 // for decryption the aesdeclast operation is always on key+0x00
3677 __masm-> aesdeclast(xmm_result, xmm_temp3);
3678 __masm-> movdqu(Address(to, 0), xmm_result); // store the result
3679 __masm-> xorptr(rax, rax); // return 0
3680 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3681 __masm-> ret(0);
3682
3683 return start;
3684 }
3685
3686
3687 // Arguments:
3688 //
3689 // Inputs:
3690 // c_rarg0 - source byte array address
3691 // c_rarg1 - destination byte array address
3692 // c_rarg2 - K (key) in little endian int array
3693 // c_rarg3 - r vector byte array address
3694 // c_rarg4 - input length
3695 //
3696 // Output:
3697 // rax - input length
3698 //
3699 address generate_cipherBlockChaining_encryptAESCrypt() {
3700 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3700, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3701 __masm-> align(CodeEntryAlignment);
3702 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
3703 address start = __masm-> pc();
3704
3705 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
3706 const Register from = c_rarg0; // source array address
3707 const Register to = c_rarg1; // destination array address
3708 const Register key = c_rarg2; // key array address
3709 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
3710 // and left with the results of the last encryption block
3711#ifndef _WIN64
3712 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
3713#else
3714 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
3715 const Register len_reg = r11; // pick the volatile windows register
3716#endif
3717 const Register pos = rax;
3718
3719 // xmm register assignments for the loops below
3720 const XMMRegister xmm_result = xmm0;
3721 const XMMRegister xmm_temp = xmm1;
3722 // keys 0-10 preloaded into xmm2-xmm12
3723 const int XMM_REG_NUM_KEY_FIRST = 2;
3724 const int XMM_REG_NUM_KEY_LAST = 15;
3725 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
3726 const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
3727 const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
3728 const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
3729 const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
3730
3731 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3732
3733#ifdef _WIN64
3734 // on win64, fill len_reg from stack position
3735 __masm-> movl(len_reg, len_mem);
3736#else
3737 __masm-> push(len_reg); // Save
3738#endif
3739
3740 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
3741 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3742 // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
3743 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
3744 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
3745 offset += 0x10;
3746 }
3747 __masm-> movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
3748
3749 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
3750 __masm-> movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3751 __masm-> cmpl(rax, 44);
3752 __masm-> jcc(Assembler::notEqual, L_key_192_256);
3753
3754 // 128 bit code follows here
3755 __masm-> movptr(pos, 0);
3756 __masm-> align(OptoLoopAlignment);
3757
3758 __masm-> BIND(L_loopTop_128)bind(L_loopTop_128); masm-> block_comment("L_loopTop_128" ":"
)
;
3759 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3760 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3761 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3762 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
3763 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3764 }
3765 __masm-> aesenclast(xmm_result, xmm_key10);
3766 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3767 // no need to store r to memory until we exit
3768 __masm-> addptr(pos, AESBlockSize);
3769 __masm-> subptr(len_reg, AESBlockSize);
3770 __masm-> jcc(Assembler::notEqual, L_loopTop_128);
3771
3772 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
3773 __masm-> movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
3774
3775#ifdef _WIN64
3776 __masm-> movl(rax, len_mem);
3777#else
3778 __masm-> pop(rax); // return length
3779#endif
3780 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3781 __masm-> ret(0);
3782
3783 __masm-> BIND(L_key_192_256)bind(L_key_192_256); masm-> block_comment("L_key_192_256" ":"
)
;
3784 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
3785 load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
3786 load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
3787 __masm-> cmpl(rax, 52);
3788 __masm-> jcc(Assembler::notEqual, L_key_256);
3789
3790 // 192-bit code follows here (could be changed to use more xmm registers)
3791 __masm-> movptr(pos, 0);
3792 __masm-> align(OptoLoopAlignment);
3793
3794 __masm-> BIND(L_loopTop_192)bind(L_loopTop_192); masm-> block_comment("L_loopTop_192" ":"
)
;
3795 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3796 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3797 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3798 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
3799 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3800 }
3801 __masm-> aesenclast(xmm_result, xmm_key12);
3802 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3803 // no need to store r to memory until we exit
3804 __masm-> addptr(pos, AESBlockSize);
3805 __masm-> subptr(len_reg, AESBlockSize);
3806 __masm-> jcc(Assembler::notEqual, L_loopTop_192);
3807 __masm-> jmp(L_exit);
3808
3809 __masm-> BIND(L_key_256)bind(L_key_256); masm-> block_comment("L_key_256" ":");
3810 // 256-bit code follows here (could be changed to use more xmm registers)
3811 load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
3812 __masm-> movptr(pos, 0);
3813 __masm-> align(OptoLoopAlignment);
3814
3815 __masm-> BIND(L_loopTop_256)bind(L_loopTop_256); masm-> block_comment("L_loopTop_256" ":"
)
;
3816 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3817 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3818 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3819 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
3820 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3821 }
3822 load_key(xmm_temp, key, 0xe0);
3823 __masm-> aesenclast(xmm_result, xmm_temp);
3824 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3825 // no need to store r to memory until we exit
3826 __masm-> addptr(pos, AESBlockSize);
3827 __masm-> subptr(len_reg, AESBlockSize);
3828 __masm-> jcc(Assembler::notEqual, L_loopTop_256);
3829 __masm-> jmp(L_exit);
3830
3831 return start;
3832 }
3833
3834 // Safefetch stubs.
3835 void generate_safefetch(const char* name, int size, address* entry,
3836 address* fault_pc, address* continuation_pc) {
3837 // safefetch signatures:
3838 // int SafeFetch32(int* adr, int errValue);
3839 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
3840 //
3841 // arguments:
3842 // c_rarg0 = adr
3843 // c_rarg1 = errValue
3844 //
3845 // result:
3846 // PPC_RET = *adr or errValue
3847
3848 StubCodeMark mark(this, "StubRoutines", name);
3849
3850 // Entry point, pc or function descriptor.
3851 *entry = __masm-> pc();
3852
3853 // Load *adr into c_rarg1, may fault.
3854 *fault_pc = __masm-> pc();
3855 switch (size) {
3856 case 4:
3857 // int32_t
3858 __masm-> movl(c_rarg1, Address(c_rarg0, 0));
3859 break;
3860 case 8:
3861 // int64_t
3862 __masm-> movq(c_rarg1, Address(c_rarg0, 0));
3863 break;
3864 default:
3865 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3865); ::breakpoint(); } while (0)
;
3866 }
3867
3868 // return errValue or *adr
3869 *continuation_pc = __masm-> pc();
3870 __masm-> movq(rax, c_rarg1);
3871 __masm-> ret(0);
3872 }
3873
3874 // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
3875 // to hide instruction latency
3876 //
3877 // Arguments:
3878 //
3879 // Inputs:
3880 // c_rarg0 - source byte array address
3881 // c_rarg1 - destination byte array address
3882 // c_rarg2 - K (key) in little endian int array
3883 // c_rarg3 - r vector byte array address
3884 // c_rarg4 - input length
3885 //
3886 // Output:
3887 // rax - input length
3888 //
3889 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
3890 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3890, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3891 __masm-> align(CodeEntryAlignment);
3892 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
3893 address start = __masm-> pc();
3894
3895 const Register from = c_rarg0; // source array address
3896 const Register to = c_rarg1; // destination array address
3897 const Register key = c_rarg2; // key array address
3898 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
3899 // and left with the results of the last encryption block
3900#ifndef _WIN64
3901 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
3902#else
3903 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
3904 const Register len_reg = r11; // pick the volatile windows register
3905#endif
3906 const Register pos = rax;
3907
3908 const int PARALLEL_FACTOR = 4;
3909 const int ROUNDS[3] = { 10, 12, 14 }; // aes rounds for key128, key192, key256
3910
3911 Label L_exit;
3912 Label L_singleBlock_loopTopHead[3]; // 128, 192, 256
3913 Label L_singleBlock_loopTopHead2[3]; // 128, 192, 256
3914 Label L_singleBlock_loopTop[3]; // 128, 192, 256
3915 Label L_multiBlock_loopTopHead[3]; // 128, 192, 256
3916 Label L_multiBlock_loopTop[3]; // 128, 192, 256
3917
3918 // keys 0-10 preloaded into xmm5-xmm15
3919 const int XMM_REG_NUM_KEY_FIRST = 5;
3920 const int XMM_REG_NUM_KEY_LAST = 15;
3921 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
3922 const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
3923
3924 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3925
3926#ifdef _WIN64
3927 // on win64, fill len_reg from stack position
3928 __masm-> movl(len_reg, len_mem);
3929#else
3930 __masm-> push(len_reg); // Save
3931#endif
3932 __masm-> push(rbx);
3933 // the java expanded key ordering is rotated one position from what we want
3934 // so we start from 0x10 here and hit 0x00 last
3935 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
3936 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3937 // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
3938 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
3939 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
3940 offset += 0x10;
3941 }
3942 load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
3943
3944 const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
3945
3946 // registers holding the four results in the parallelized loop
3947 const XMMRegister xmm_result0 = xmm0;
3948 const XMMRegister xmm_result1 = xmm2;
3949 const XMMRegister xmm_result2 = xmm3;
3950 const XMMRegister xmm_result3 = xmm4;
3951
3952 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
3953
3954 __masm-> xorptr(pos, pos);
3955
3956 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
3957 __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3958 __masm-> cmpl(rbx, 52);
3959 __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[1]);
3960 __masm-> cmpl(rbx, 60);
3961 __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[2]);
3962
3963#define DoFour(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1
, src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc
(xmm_result3, src_reg);
\
3964 __masm-> opc(xmm_result0, src_reg); \
3965 __masm-> opc(xmm_result1, src_reg); \
3966 __masm-> opc(xmm_result2, src_reg); \
3967 __masm-> opc(xmm_result3, src_reg); \
3968
3969 for (int k = 0; k < 3; ++k) {
3970 __masm-> BIND(L_multiBlock_loopTopHead[k])bind(L_multiBlock_loopTopHead[k]); masm-> block_comment("L_multiBlock_loopTopHead[k]"
":")
;
3971 if (k != 0) {
3972 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
3973 __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead2[k]);
3974 }
3975 if (k == 1) {
3976 __masm-> subptr(rsp, 6 * wordSize);
3977 __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
3978 load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0
3979 __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15);
3980 load_key(xmm1, key, 0xc0); // 0xc0;
3981 __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1);
3982 } else if (k == 2) {
3983 __masm-> subptr(rsp, 10 * wordSize);
3984 __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
3985 load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes upto 0xe0
3986 __masm-> movdqu(Address(rsp, 6 * wordSize), xmm15);
3987 load_key(xmm1, key, 0xe0); // 0xe0;
3988 __masm-> movdqu(Address(rsp, 8 * wordSize), xmm1);
3989 load_key(xmm15, key, 0xb0); // 0xb0;
3990 __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15);
3991 load_key(xmm1, key, 0xc0); // 0xc0;
3992 __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1);
3993 }
3994 __masm-> align(OptoLoopAlignment);
3995 __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]"
":")
;
3996 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
3997 __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead[k]);
3998
3999 if (k != 0) {
4000 __masm-> movdqu(xmm15, Address(rsp, 2 * wordSize));
4001 __masm-> movdqu(xmm1, Address(rsp, 4 * wordSize));
4002 }
4003
4004 __masm-> movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers
4005 __masm-> movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4006 __masm-> movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4007 __masm-> movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
4008
4009 DoFour(pxor, xmm_key_first)masm-> pxor(xmm_result0, xmm_key_first); masm-> pxor(xmm_result1
, xmm_key_first); masm-> pxor(xmm_result2, xmm_key_first);
masm-> pxor(xmm_result3, xmm_key_first);
;
4010 if (k == 0) {
4011 for (int rnum = 1; rnum < ROUNDS[k]; rnum++) {
4012 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4013 }
4014 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4015 } else if (k == 1) {
4016 for (int rnum = 1; rnum <= ROUNDS[k]-2; rnum++) {
4017 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4018 }
4019 __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
4020 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xc0
4021 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again
4022 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4023 } else if (k == 2) {
4024 for (int rnum = 1; rnum <= ROUNDS[k] - 4; rnum++) {
4025 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4026 }
4027 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xc0
4028 __masm-> movdqu(xmm15, Address(rsp, 6 * wordSize));
4029 __masm-> movdqu(xmm1, Address(rsp, 8 * wordSize));
4030 DoFour(aesdec, xmm15)masm-> aesdec(xmm_result0, xmm15); masm-> aesdec(xmm_result1
, xmm15); masm-> aesdec(xmm_result2, xmm15); masm-> aesdec
(xmm_result3, xmm15);
; // key : 0xd0
4031 __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
4032 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xe0
4033 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again
4034 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4035 }
4036
4037 // for each result, xor with the r vector of previous cipher block
4038 __masm-> pxor(xmm_result0, xmm_prev_block_cipher);
4039 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4040 __masm-> pxor(xmm_result1, xmm_prev_block_cipher);
4041 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4042 __masm-> pxor(xmm_result2, xmm_prev_block_cipher);
4043 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4044 __masm-> pxor(xmm_result3, xmm_prev_block_cipher);
4045 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks
4046 if (k != 0) {
4047 __masm-> movdqu(Address(rvec, 0x00), xmm_prev_block_cipher);
4048 }
4049
4050 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output
4051 __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
4052 __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
4053 __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
4054
4055 __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize);
4056 __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize);
4057 __masm-> jmp(L_multiBlock_loopTop[k]);
4058
4059 // registers used in the non-parallelized loops
4060 // xmm register assignments for the loops below
4061 const XMMRegister xmm_result = xmm0;
4062 const XMMRegister xmm_prev_block_cipher_save = xmm2;
4063 const XMMRegister xmm_key11 = xmm3;
4064 const XMMRegister xmm_key12 = xmm4;
4065 const XMMRegister key_tmp = xmm4;
4066
4067 __masm-> BIND(L_singleBlock_loopTopHead[k])bind(L_singleBlock_loopTopHead[k]); masm-> block_comment("L_singleBlock_loopTopHead[k]"
":")
;
4068 if (k == 1) {
4069 __masm-> addptr(rsp, 6 * wordSize);
4070 } else if (k == 2) {
4071 __masm-> addptr(rsp, 10 * wordSize);
4072 }
4073 __masm-> cmpptr(len_reg, 0); // any blocks left??
4074 __masm-> jcc(Assembler::equal, L_exit);
4075 __masm-> BIND(L_singleBlock_loopTopHead2[k])bind(L_singleBlock_loopTopHead2[k]); masm-> block_comment(
"L_singleBlock_loopTopHead2[k]" ":")
;
4076 if (k == 1) {
4077 load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes upto 0xc0
4078 load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes upto 0xc0
4079 }
4080 if (k == 2) {
4081 load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes upto 0xe0
4082 }
4083 __masm-> align(OptoLoopAlignment);
4084 __masm-> BIND(L_singleBlock_loopTop[k])bind(L_singleBlock_loopTop[k]); masm-> block_comment("L_singleBlock_loopTop[k]"
":")
;
4085 __masm-> movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
4086 __masm-> movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
4087 __masm-> pxor(xmm_result, xmm_key_first); // do the aes dec rounds
4088 for (int rnum = 1; rnum <= 9 ; rnum++) {
4089 __masm-> aesdec(xmm_result, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
4090 }
4091 if (k == 1) {
4092 __masm-> aesdec(xmm_result, xmm_key11);
4093 __masm-> aesdec(xmm_result, xmm_key12);
4094 }
4095 if (k == 2) {
4096 __masm-> aesdec(xmm_result, xmm_key11);
4097 load_key(key_tmp, key, 0xc0);
4098 __masm-> aesdec(xmm_result, key_tmp);
4099 load_key(key_tmp, key, 0xd0);
4100 __masm-> aesdec(xmm_result, key_tmp);
4101 load_key(key_tmp, key, 0xe0);
4102 __masm-> aesdec(xmm_result, key_tmp);
4103 }
4104
4105 __masm-> aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
4106 __masm-> pxor(xmm_result, xmm_prev_block_cipher); // xor with the current r vector
4107 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
4108 // no need to store r to memory until we exit
4109 __masm-> movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
4110 __masm-> addptr(pos, AESBlockSize);
4111 __masm-> subptr(len_reg, AESBlockSize);
4112 __masm-> jcc(Assembler::notEqual, L_singleBlock_loopTop[k]);
4113 if (k != 2) {
4114 __masm-> jmp(L_exit);
4115 }
4116 } //for 128/192/256
4117
4118 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
4119 __masm-> movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
4120 __masm-> pop(rbx);
4121#ifdef _WIN64
4122 __masm-> movl(rax, len_mem);
4123#else
4124 __masm-> pop(rax); // return length
4125#endif
4126 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4127 __masm-> ret(0);
4128 return start;
4129}
4130
4131 address generate_electronicCodeBook_encryptAESCrypt() {
4132 __masm-> align(CodeEntryAlignment);
4133 StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_encryptAESCrypt");
4134 address start = __masm-> pc();
4135 const Register from = c_rarg0; // source array address
4136 const Register to = c_rarg1; // destination array address
4137 const Register key = c_rarg2; // key array address
4138 const Register len = c_rarg3; // src len (must be multiple of blocksize 16)
4139 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4140 __masm-> aesecb_encrypt(from, to, key, len);
4141 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4142 __masm-> ret(0);
4143 return start;
4144 }
4145
4146 address generate_electronicCodeBook_decryptAESCrypt() {
4147 __masm-> align(CodeEntryAlignment);
4148 StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_decryptAESCrypt");
4149 address start = __masm-> pc();
4150 const Register from = c_rarg0; // source array address
4151 const Register to = c_rarg1; // destination array address
4152 const Register key = c_rarg2; // key array address
4153 const Register len = c_rarg3; // src len (must be multiple of blocksize 16)
4154 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4155 __masm-> aesecb_decrypt(from, to, key, len);
4156 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4157 __masm-> ret(0);
4158 return start;
4159 }
4160
4161 // ofs and limit are use for multi-block byte array.
4162 // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
4163 address generate_md5_implCompress(bool multi_block, const char *name) {
4164 __masm-> align(CodeEntryAlignment);
4165 StubCodeMark mark(this, "StubRoutines", name);
4166 address start = __masm-> pc();
4167
4168 const Register buf_param = r15;
4169 const Address state_param(rsp, 0 * wordSize);
4170 const Address ofs_param (rsp, 1 * wordSize );
4171 const Address limit_param(rsp, 1 * wordSize + 4);
4172
4173 __masm-> enter();
4174 __masm-> push(rbx);
4175 __masm-> push(rdi);
4176 __masm-> push(rsi);
4177 __masm-> push(r15);
4178 __masm-> subptr(rsp, 2 * wordSize);
4179
4180 __masm-> movptr(buf_param, c_rarg0);
4181 __masm-> movptr(state_param, c_rarg1);
4182 if (multi_block) {
4183 __masm-> movl(ofs_param, c_rarg2);
4184 __masm-> movl(limit_param, c_rarg3);
4185 }
4186 __masm-> fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block);
4187
4188 __masm-> addptr(rsp, 2 * wordSize);
4189 __masm-> pop(r15);
4190 __masm-> pop(rsi);
4191 __masm-> pop(rdi);
4192 __masm-> pop(rbx);
4193 __masm-> leave();
4194 __masm-> ret(0);
4195 return start;
4196 }
4197
4198 address generate_upper_word_mask() {
4199 __masm-> align64();
4200 StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
4201 address start = __masm-> pc();
4202 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4203 __masm-> emit_data64(0xFFFFFFFF00000000, relocInfo::none);
4204 return start;
4205 }
4206
4207 address generate_shuffle_byte_flip_mask() {
4208 __masm-> align64();
4209 StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
4210 address start = __masm-> pc();
4211 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4212 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4213 return start;
4214 }
4215
4216 // ofs and limit are use for multi-block byte array.
4217 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4218 address generate_sha1_implCompress(bool multi_block, const char *name) {
4219 __masm-> align(CodeEntryAlignment);
4220 StubCodeMark mark(this, "StubRoutines", name);
4221 address start = __masm-> pc();
4222
4223 Register buf = c_rarg0;
4224 Register state = c_rarg1;
4225 Register ofs = c_rarg2;
4226 Register limit = c_rarg3;
4227
4228 const XMMRegister abcd = xmm0;
4229 const XMMRegister e0 = xmm1;
4230 const XMMRegister e1 = xmm2;
4231 const XMMRegister msg0 = xmm3;
4232
4233 const XMMRegister msg1 = xmm4;
4234 const XMMRegister msg2 = xmm5;
4235 const XMMRegister msg3 = xmm6;
4236 const XMMRegister shuf_mask = xmm7;
4237
4238 __masm-> enter();
4239
4240 __masm-> subptr(rsp, 4 * wordSize);
4241
4242 __masm-> fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
4243 buf, state, ofs, limit, rsp, multi_block);
4244
4245 __masm-> addptr(rsp, 4 * wordSize);
4246
4247 __masm-> leave();
4248 __masm-> ret(0);
4249 return start;
4250 }
4251
4252 address generate_pshuffle_byte_flip_mask() {
4253 __masm-> align64();
4254 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
4255 address start = __masm-> pc();
4256 __masm-> emit_data64(0x0405060700010203, relocInfo::none);
4257 __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
4258
4259 if (VM_Version::supports_avx2()) {
4260 __masm-> emit_data64(0x0405060700010203, relocInfo::none); // second copy
4261 __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
4262 // _SHUF_00BA
4263 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4264 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4265 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4266 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4267 // _SHUF_DC00
4268 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4269 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4270 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4271 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4272 }
4273
4274 return start;
4275 }
4276
4277 //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
4278 address generate_pshuffle_byte_flip_mask_sha512() {
4279 __masm-> align32();
4280 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512");
4281 address start = __masm-> pc();
4282 if (VM_Version::supports_avx2()) {
4283 __masm-> emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK
4284 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4285 __masm-> emit_data64(0x1011121314151617, relocInfo::none);
4286 __masm-> emit_data64(0x18191a1b1c1d1e1f, relocInfo::none);
4287 __masm-> emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO
4288 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4289 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4290 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4291 }
4292
4293 return start;
4294 }
4295
4296// ofs and limit are use for multi-block byte array.
4297// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4298 address generate_sha256_implCompress(bool multi_block, const char *name) {
4299 assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_sha() || VM_Version::supports_avx2
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4299, "assert(" "VM_Version::supports_sha() || VM_Version::supports_avx2()"
") failed", ""); ::breakpoint(); } } while (0)
;
4300 __masm-> align(CodeEntryAlignment);
4301 StubCodeMark mark(this, "StubRoutines", name);
4302 address start = __masm-> pc();
4303
4304 Register buf = c_rarg0;
4305 Register state = c_rarg1;
4306 Register ofs = c_rarg2;
4307 Register limit = c_rarg3;
4308
4309 const XMMRegister msg = xmm0;
4310 const XMMRegister state0 = xmm1;
4311 const XMMRegister state1 = xmm2;
4312 const XMMRegister msgtmp0 = xmm3;
4313
4314 const XMMRegister msgtmp1 = xmm4;
4315 const XMMRegister msgtmp2 = xmm5;
4316 const XMMRegister msgtmp3 = xmm6;
4317 const XMMRegister msgtmp4 = xmm7;
4318
4319 const XMMRegister shuf_mask = xmm8;
4320
4321 __masm-> enter();
4322
4323 __masm-> subptr(rsp, 4 * wordSize);
4324
4325 if (VM_Version::supports_sha()) {
4326 __masm-> fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4327 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4328 } else if (VM_Version::supports_avx2()) {
4329 __masm-> sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4330 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4331 }
4332 __masm-> addptr(rsp, 4 * wordSize);
4333 __masm-> vzeroupper();
4334 __masm-> leave();
4335 __masm-> ret(0);
4336 return start;
4337 }
4338
4339 address generate_sha512_implCompress(bool multi_block, const char *name) {
4340 assert(VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_avx2())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4340, "assert(" "VM_Version::supports_avx2()" ") failed", ""
); ::breakpoint(); } } while (0)
;
4341 assert(VM_Version::supports_bmi2(), "")do { if (!(VM_Version::supports_bmi2())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4341, "assert(" "VM_Version::supports_bmi2()" ") failed", ""
); ::breakpoint(); } } while (0)
;
4342 __masm-> align(CodeEntryAlignment);
4343 StubCodeMark mark(this, "StubRoutines", name);
4344 address start = __masm-> pc();
4345
4346 Register buf = c_rarg0;
4347 Register state = c_rarg1;
4348 Register ofs = c_rarg2;
4349 Register limit = c_rarg3;
4350
4351 const XMMRegister msg = xmm0;
4352 const XMMRegister state0 = xmm1;
4353 const XMMRegister state1 = xmm2;
4354 const XMMRegister msgtmp0 = xmm3;
4355 const XMMRegister msgtmp1 = xmm4;
4356 const XMMRegister msgtmp2 = xmm5;
4357 const XMMRegister msgtmp3 = xmm6;
4358 const XMMRegister msgtmp4 = xmm7;
4359
4360 const XMMRegister shuf_mask = xmm8;
4361
4362 __masm-> enter();
4363
4364 __masm-> sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4365 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4366
4367 __masm-> vzeroupper();
4368 __masm-> leave();
4369 __masm-> ret(0);
4370 return start;
4371 }
4372
4373 address ghash_polynomial512_addr() {
4374 __masm-> align(CodeEntryAlignment);
4375 StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
4376 address start = __masm-> pc();
4377 __masm-> emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
4378 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4379 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4380 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4381 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4382 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4383 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4384 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4385 __masm-> emit_data64(0x0000000000000001, relocInfo::none); // POLY
4386 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4387 __masm-> emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
4388 __masm-> emit_data64(0x0000000100000000, relocInfo::none);
4389 return start;
4390}
4391
4392 // Vector AES Galois Counter Mode implementation. Parameters:
4393 // Windows regs | Linux regs
4394 // in = c_rarg0 (rcx) | c_rarg0 (rsi)
4395 // len = c_rarg1 (rdx) | c_rarg1 (rdi)
4396 // ct = c_rarg2 (r8) | c_rarg2 (rdx)
4397 // out = c_rarg3 (r9) | c_rarg3 (rcx)
4398 // key = r10 | c_rarg4 (r8)
4399 // state = r13 | c_rarg5 (r9)
4400 // subkeyHtbl = r14 | r11
4401 // counter = rsi | r12
4402 // return - number of processed bytes
4403 address generate_galoisCounterMode_AESCrypt() {
4404 __masm-> align(CodeEntryAlignment);
4405 StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
4406 address start = __masm-> pc();
4407 const Register in = c_rarg0;
4408 const Register len = c_rarg1;
4409 const Register ct = c_rarg2;
4410 const Register out = c_rarg3;
4411 // and updated with the incremented counter in the end
4412#ifndef _WIN64
4413 const Register key = c_rarg4;
4414 const Register state = c_rarg5;
4415 const Address subkeyH_mem(rbp, 2 * wordSize);
4416 const Register subkeyHtbl = r11;
4417 const Address avx512_subkeyH_mem(rbp, 3 * wordSize);
4418 const Register avx512_subkeyHtbl = r13;
4419 const Address counter_mem(rbp, 4 * wordSize);
4420 const Register counter = r12;
4421#else
4422 const Address key_mem(rbp, 6 * wordSize);
4423 const Register key = r10;
4424 const Address state_mem(rbp, 7 * wordSize);
4425 const Register state = r13;
4426 const Address subkeyH_mem(rbp, 8 * wordSize);
4427 const Register subkeyHtbl = r14;
4428 const Address avx512_subkeyH_mem(rbp, 9 * wordSize);
4429 const Register avx512_subkeyHtbl = r12;
4430 const Address counter_mem(rbp, 10 * wordSize);
4431 const Register counter = rsi;
4432#endif
4433 __masm-> enter();
4434 // Save state before entering routine
4435 __masm-> push(r12);
4436 __masm-> push(r13);
4437 __masm-> push(r14);
4438 __masm-> push(r15);
4439 __masm-> push(rbx);
4440#ifdef _WIN64
4441 // on win64, fill len_reg from stack position
4442 __masm-> push(rsi);
4443 __masm-> movptr(key, key_mem);
4444 __masm-> movptr(state, state_mem);
4445#endif
4446 __masm-> movptr(subkeyHtbl, subkeyH_mem);
4447 __masm-> movptr(avx512_subkeyHtbl, avx512_subkeyH_mem);
4448 __masm-> movptr(counter, counter_mem);
4449
4450 __masm-> aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
4451
4452 // Restore state before leaving routine
4453#ifdef _WIN64
4454 __masm-> pop(rsi);
4455#endif
4456 __masm-> pop(rbx);
4457 __masm-> pop(r15);
4458 __masm-> pop(r14);
4459 __masm-> pop(r13);
4460 __masm-> pop(r12);
4461
4462 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4463 __masm-> ret(0);
4464 return start;
4465 }
4466
4467 // This mask is used for incrementing counter value(linc0, linc4, etc.)
4468 address counter_mask_addr() {
4469 __masm-> align64();
4470 StubCodeMark mark(this, "StubRoutines", "counter_mask_addr");
4471 address start = __masm-> pc();
4472 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);//lbswapmask
4473 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4474 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4475 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4476 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4477 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4478 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4479 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4480 __masm-> emit_data64(0x0000000000000000, relocInfo::none);//linc0 = counter_mask_addr+64
4481 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4482 __masm-> emit_data64(0x0000000000000001, relocInfo::none);//counter_mask_addr() + 80
4483 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4484 __masm-> emit_data64(0x0000000000000002, relocInfo::none);
4485 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4486 __masm-> emit_data64(0x0000000000000003, relocInfo::none);
4487 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4488 __masm-> emit_data64(0x0000000000000004, relocInfo::none);//linc4 = counter_mask_addr() + 128
4489 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4490 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4491 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4492 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4493 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4494 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4495 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4496 __masm-> emit_data64(0x0000000000000008, relocInfo::none);//linc8 = counter_mask_addr() + 192
4497 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4498 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4499 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4500 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4501 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4502 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4503 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4504 __masm-> emit_data64(0x0000000000000020, relocInfo::none);//linc32 = counter_mask_addr() + 256
4505 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4506 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4507 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4508 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4509 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4510 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4511 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4512 __masm-> emit_data64(0x0000000000000010, relocInfo::none);//linc16 = counter_mask_addr() + 320
4513 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4514 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4515 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4516 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4517 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4518 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4519 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4520 return start;
4521 }
4522
4523 // Vector AES Counter implementation
4524 address generate_counterMode_VectorAESCrypt() {
4525 __masm-> align(CodeEntryAlignment);
4526 StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
4527 address start = __masm-> pc();
4528 const Register from = c_rarg0; // source array address
4529 const Register to = c_rarg1; // destination array address
4530 const Register key = c_rarg2; // key array address r8
4531 const Register counter = c_rarg3; // counter byte array initialized from counter array address
4532 // and updated with the incremented counter in the end
4533#ifndef _WIN64
4534 const Register len_reg = c_rarg4;
4535 const Register saved_encCounter_start = c_rarg5;
4536 const Register used_addr = r10;
4537 const Address used_mem(rbp, 2 * wordSize);
4538 const Register used = r11;
4539#else
4540 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4541 const Address saved_encCounter_mem(rbp, 7 * wordSize); // saved encrypted counter is on stack on Win64
4542 const Address used_mem(rbp, 8 * wordSize); // used length is on stack on Win64
4543 const Register len_reg = r10; // pick the first volatile windows register
4544 const Register saved_encCounter_start = r11;
4545 const Register used_addr = r13;
4546 const Register used = r14;
4547#endif
4548 __masm-> enter();
4549 // Save state before entering routine
4550 __masm-> push(r12);
4551 __masm-> push(r13);
4552 __masm-> push(r14);
4553 __masm-> push(r15);
4554#ifdef _WIN64
4555 // on win64, fill len_reg from stack position
4556 __masm-> movl(len_reg, len_mem);
4557 __masm-> movptr(saved_encCounter_start, saved_encCounter_mem);
4558 __masm-> movptr(used_addr, used_mem);
4559 __masm-> movl(used, Address(used_addr, 0));
4560#else
4561 __masm-> push(len_reg); // Save
4562 __masm-> movptr(used_addr, used_mem);
4563 __masm-> movl(used, Address(used_addr, 0));
4564#endif
4565 __masm-> push(rbx);
4566 __masm-> aesctr_encrypt(from, to, key, counter, len_reg, used, used_addr, saved_encCounter_start);
4567 // Restore state before leaving routine
4568 __masm-> pop(rbx);
4569#ifdef _WIN64
4570 __masm-> movl(rax, len_mem); // return length
4571#else
4572 __masm-> pop(rax); // return length
4573#endif
4574 __masm-> pop(r15);
4575 __masm-> pop(r14);
4576 __masm-> pop(r13);
4577 __masm-> pop(r12);
4578
4579 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4580 __masm-> ret(0);
4581 return start;
4582 }
4583
4584 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
4585 // to hide instruction latency
4586 //
4587 // Arguments:
4588 //
4589 // Inputs:
4590 // c_rarg0 - source byte array address
4591 // c_rarg1 - destination byte array address
4592 // c_rarg2 - K (key) in little endian int array
4593 // c_rarg3 - counter vector byte array address
4594 // Linux
4595 // c_rarg4 - input length
4596 // c_rarg5 - saved encryptedCounter start
4597 // rbp + 6 * wordSize - saved used length
4598 // Windows
4599 // rbp + 6 * wordSize - input length
4600 // rbp + 7 * wordSize - saved encryptedCounter start
4601 // rbp + 8 * wordSize - saved used length
4602 //
4603 // Output:
4604 // rax - input length
4605 //
4606 address generate_counterMode_AESCrypt_Parallel() {
4607 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4607, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
4608 __masm-> align(CodeEntryAlignment);
4609 StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
4610 address start = __masm-> pc();
4611 const Register from = c_rarg0; // source array address
4612 const Register to = c_rarg1; // destination array address
4613 const Register key = c_rarg2; // key array address
4614 const Register counter = c_rarg3; // counter byte array initialized from counter array address
4615 // and updated with the incremented counter in the end
4616#ifndef _WIN64
4617 const Register len_reg = c_rarg4;
4618 const Register saved_encCounter_start = c_rarg5;
4619 const Register used_addr = r10;
4620 const Address used_mem(rbp, 2 * wordSize);
4621 const Register used = r11;
4622#else
4623 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4624 const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64
4625 const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64
4626 const Register len_reg = r10; // pick the first volatile windows register
4627 const Register saved_encCounter_start = r11;
4628 const Register used_addr = r13;
4629 const Register used = r14;
4630#endif
4631 const Register pos = rax;
4632
4633 const int PARALLEL_FACTOR = 6;
4634 const XMMRegister xmm_counter_shuf_mask = xmm0;
4635 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
4636 const XMMRegister xmm_curr_counter = xmm2;
4637
4638 const XMMRegister xmm_key_tmp0 = xmm3;
4639 const XMMRegister xmm_key_tmp1 = xmm4;
4640
4641 // registers holding the four results in the parallelized loop
4642 const XMMRegister xmm_result0 = xmm5;
4643 const XMMRegister xmm_result1 = xmm6;
4644 const XMMRegister xmm_result2 = xmm7;
4645 const XMMRegister xmm_result3 = xmm8;
4646 const XMMRegister xmm_result4 = xmm9;
4647 const XMMRegister xmm_result5 = xmm10;
4648
4649 const XMMRegister xmm_from0 = xmm11;
4650 const XMMRegister xmm_from1 = xmm12;
4651 const XMMRegister xmm_from2 = xmm13;
4652 const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
4653 const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
4654 const XMMRegister xmm_from5 = xmm4;
4655
4656 //for key_128, key_192, key_256
4657 const int rounds[3] = {10, 12, 14};
4658 Label L_exit_preLoop, L_preLoop_start;
4659 Label L_multiBlock_loopTop[3];
4660 Label L_singleBlockLoopTop[3];
4661 Label L__incCounter[3][6]; //for 6 blocks
4662 Label L__incCounter_single[3]; //for single block, key128, key192, key256
4663 Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
4664 Label L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
4665
4666 Label L_exit;
4667
4668 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4669
4670#ifdef _WIN64
4671 // allocate spill slots for r13, r14
4672 enum {
4673 saved_r13_offset,
4674 saved_r14_offset
4675 };
4676 __masm-> subptr(rsp, 2 * wordSize);
4677 __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13);
4678 __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14);
4679
4680 // on win64, fill len_reg from stack position
4681 __masm-> movl(len_reg, len_mem);
4682 __masm-> movptr(saved_encCounter_start, saved_encCounter_mem);
4683 __masm-> movptr(used_addr, used_mem);
4684 __masm-> movl(used, Address(used_addr, 0));
4685#else
4686 __masm-> push(len_reg); // Save
4687 __masm-> movptr(used_addr, used_mem);
4688 __masm-> movl(used, Address(used_addr, 0));
4689#endif
4690
4691 __masm-> push(rbx); // Save RBX
4692 __masm-> movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
4693 __masm-> movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
4694 __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
4695 __masm-> movptr(pos, 0);
4696
4697 // Use the partially used encrpyted counter from last invocation
4698 __masm-> BIND(L_preLoop_start)bind(L_preLoop_start); masm-> block_comment("L_preLoop_start"
":")
;
4699 __masm-> cmpptr(used, 16);
4700 __masm-> jcc(Assembler::aboveEqual, L_exit_preLoop);
4701 __masm-> cmpptr(len_reg, 0);
4702 __masm-> jcc(Assembler::lessEqual, L_exit_preLoop);
4703 __masm-> movb(rbx, Address(saved_encCounter_start, used));
4704 __masm-> xorb(rbx, Address(from, pos));
4705 __masm-> movb(Address(to, pos), rbx);
4706 __masm-> addptr(pos, 1);
4707 __masm-> addptr(used, 1);
4708 __masm-> subptr(len_reg, 1);
4709
4710 __masm-> jmp(L_preLoop_start);
4711
4712 __masm-> BIND(L_exit_preLoop)bind(L_exit_preLoop); masm-> block_comment("L_exit_preLoop"
":")
;
4713 __masm-> movl(Address(used_addr, 0), used);
4714
4715 // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
4716 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
4717 __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
4718 __masm-> cmpl(rbx, 52);
4719 __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[1]);
4720 __masm-> cmpl(rbx, 60);
4721 __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[2]);
4722
4723#define CTR_DoSix(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1
, src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc
(xmm_result3, src_reg); masm-> opc(xmm_result4, src_reg); masm
-> opc(xmm_result5, src_reg);
\
4724 __masm-> opc(xmm_result0, src_reg); \
4725 __masm-> opc(xmm_result1, src_reg); \
4726 __masm-> opc(xmm_result2, src_reg); \
4727 __masm-> opc(xmm_result3, src_reg); \
4728 __masm-> opc(xmm_result4, src_reg); \
4729 __masm-> opc(xmm_result5, src_reg);
4730
4731 // k == 0 : generate code for key_128
4732 // k == 1 : generate code for key_192
4733 // k == 2 : generate code for key_256
4734 for (int k = 0; k < 3; ++k) {
4735 //multi blocks starts here
4736 __masm-> align(OptoLoopAlignment);
4737 __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]"
":")
;
4738 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
4739 __masm-> jcc(Assembler::less, L_singleBlockLoopTop[k]);
4740 load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
4741
4742 //load, then increase counters
4743 CTR_DoSix(movdqa, xmm_curr_counter)masm-> movdqa(xmm_result0, xmm_curr_counter); masm-> movdqa
(xmm_result1, xmm_curr_counter); masm-> movdqa(xmm_result2
, xmm_curr_counter); masm-> movdqa(xmm_result3, xmm_curr_counter
); masm-> movdqa(xmm_result4, xmm_curr_counter); masm->
movdqa(xmm_result5, xmm_curr_counter);
;
4744 inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]);
4745 inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]);
4746 inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]);
4747 inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]);
4748 inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]);
4749 inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]);
4750 CTR_DoSix(pshufb, xmm_counter_shuf_mask)masm-> pshufb(xmm_result0, xmm_counter_shuf_mask); masm->
pshufb(xmm_result1, xmm_counter_shuf_mask); masm-> pshufb
(xmm_result2, xmm_counter_shuf_mask); masm-> pshufb(xmm_result3
, xmm_counter_shuf_mask); masm-> pshufb(xmm_result4, xmm_counter_shuf_mask
); masm-> pshufb(xmm_result5, xmm_counter_shuf_mask);
; // after increased, shuffled counters back for PXOR
4751 CTR_DoSix(pxor, xmm_key_tmp0)masm-> pxor(xmm_result0, xmm_key_tmp0); masm-> pxor(xmm_result1
, xmm_key_tmp0); masm-> pxor(xmm_result2, xmm_key_tmp0); masm
-> pxor(xmm_result3, xmm_key_tmp0); masm-> pxor(xmm_result4
, xmm_key_tmp0); masm-> pxor(xmm_result5, xmm_key_tmp0);
; //PXOR with Round 0 key
4752
4753 //load two ROUND_KEYs at a time
4754 for (int i = 1; i < rounds[k]; ) {
4755 load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask);
4756 load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask);
4757 CTR_DoSix(aesenc, xmm_key_tmp1)masm-> aesenc(xmm_result0, xmm_key_tmp1); masm-> aesenc
(xmm_result1, xmm_key_tmp1); masm-> aesenc(xmm_result2, xmm_key_tmp1
); masm-> aesenc(xmm_result3, xmm_key_tmp1); masm-> aesenc
(xmm_result4, xmm_key_tmp1); masm-> aesenc(xmm_result5, xmm_key_tmp1
);
;
4758 i++;
4759 if (i != rounds[k]) {
4760 CTR_DoSix(aesenc, xmm_key_tmp0)masm-> aesenc(xmm_result0, xmm_key_tmp0); masm-> aesenc
(xmm_result1, xmm_key_tmp0); masm-> aesenc(xmm_result2, xmm_key_tmp0
); masm-> aesenc(xmm_result3, xmm_key_tmp0); masm-> aesenc
(xmm_result4, xmm_key_tmp0); masm-> aesenc(xmm_result5, xmm_key_tmp0
);
;
4761 } else {
4762 CTR_DoSix(aesenclast, xmm_key_tmp0)masm-> aesenclast(xmm_result0, xmm_key_tmp0); masm-> aesenclast
(xmm_result1, xmm_key_tmp0); masm-> aesenclast(xmm_result2
, xmm_key_tmp0); masm-> aesenclast(xmm_result3, xmm_key_tmp0
); masm-> aesenclast(xmm_result4, xmm_key_tmp0); masm->
aesenclast(xmm_result5, xmm_key_tmp0);
;
4763 }
4764 i++;
4765 }
4766
4767 // get next PARALLEL_FACTOR blocks into xmm_result registers
4768 __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4769 __masm-> movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4770 __masm-> movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4771 __masm-> movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
4772 __masm-> movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize));
4773 __masm-> movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize));
4774
4775 __masm-> pxor(xmm_result0, xmm_from0);
4776 __masm-> pxor(xmm_result1, xmm_from1);
4777 __masm-> pxor(xmm_result2, xmm_from2);
4778 __masm-> pxor(xmm_result3, xmm_from3);
4779 __masm-> pxor(xmm_result4, xmm_from4);
4780 __masm-> pxor(xmm_result5, xmm_from5);
4781
4782 // store 6 results into the next 64 bytes of output
4783 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
4784 __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
4785 __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
4786 __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
4787 __masm-> movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4);
4788 __masm-> movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5);
4789
4790 __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
4791 __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
4792 __masm-> jmp(L_multiBlock_loopTop[k]);
4793
4794 // singleBlock starts here
4795 __masm-> align(OptoLoopAlignment);
4796 __masm-> BIND(L_singleBlockLoopTop[k])bind(L_singleBlockLoopTop[k]); masm-> block_comment("L_singleBlockLoopTop[k]"
":")
;
4797 __masm-> cmpptr(len_reg, 0);
4798 __masm-> jcc(Assembler::lessEqual, L_exit);
4799 load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
4800 __masm-> movdqa(xmm_result0, xmm_curr_counter);
4801 inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]);
4802 __masm-> pshufb(xmm_result0, xmm_counter_shuf_mask);
4803 __masm-> pxor(xmm_result0, xmm_key_tmp0);
4804 for (int i = 1; i < rounds[k]; i++) {
4805 load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask);
4806 __masm-> aesenc(xmm_result0, xmm_key_tmp0);
4807 }
4808 load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask);
4809 __masm-> aesenclast(xmm_result0, xmm_key_tmp0);
4810 __masm-> cmpptr(len_reg, AESBlockSize);
4811 __masm-> jcc(Assembler::less, L_processTail_insr[k]);
4812 __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4813 __masm-> pxor(xmm_result0, xmm_from0);
4814 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
4815 __masm-> addptr(pos, AESBlockSize);
4816 __masm-> subptr(len_reg, AESBlockSize);
4817 __masm-> jmp(L_singleBlockLoopTop[k]);
4818 __masm-> BIND(L_processTail_insr[k])bind(L_processTail_insr[k]); masm-> block_comment("L_processTail_insr[k]"
":")
; // Process the tail part of the input array
4819 __masm-> addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register
4820 __masm-> testptr(len_reg, 8);
4821 __masm-> jcc(Assembler::zero, L_processTail_4_insr[k]);
4822 __masm-> subptr(pos,8);
4823 __masm-> pinsrq(xmm_from0, Address(from, pos), 0);
4824 __masm-> BIND(L_processTail_4_insr[k])bind(L_processTail_4_insr[k]); masm-> block_comment("L_processTail_4_insr[k]"
":")
;
4825 __masm-> testptr(len_reg, 4);
4826 __masm-> jcc(Assembler::zero, L_processTail_2_insr[k]);
4827 __masm-> subptr(pos,4);
4828 __masm-> pslldq(xmm_from0, 4);
4829 __masm-> pinsrd(xmm_from0, Address(from, pos), 0);
4830 __masm-> BIND(L_processTail_2_insr[k])bind(L_processTail_2_insr[k]); masm-> block_comment("L_processTail_2_insr[k]"
":")
;
4831 __masm-> testptr(len_reg, 2);
4832 __masm-> jcc(Assembler::zero, L_processTail_1_insr[k]);
4833 __masm-> subptr(pos, 2);
4834 __masm-> pslldq(xmm_from0, 2);
4835 __masm-> pinsrw(xmm_from0, Address(from, pos), 0);
4836 __masm-> BIND(L_processTail_1_insr[k])bind(L_processTail_1_insr[k]); masm-> block_comment("L_processTail_1_insr[k]"
":")
;
4837 __masm-> testptr(len_reg, 1);
4838 __masm-> jcc(Assembler::zero, L_processTail_exit_insr[k]);
4839 __masm-> subptr(pos, 1);
4840 __masm-> pslldq(xmm_from0, 1);
4841 __masm-> pinsrb(xmm_from0, Address(from, pos), 0);
4842 __masm-> BIND(L_processTail_exit_insr[k])bind(L_processTail_exit_insr[k]); masm-> block_comment("L_processTail_exit_insr[k]"
":")
;
4843
4844 __masm-> movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes.
4845 __masm-> pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation.
4846
4847 __masm-> testptr(len_reg, 8);
4848 __masm-> jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array
4849 __masm-> pextrq(Address(to, pos), xmm_result0, 0);
4850 __masm-> psrldq(xmm_result0, 8);
4851 __masm-> addptr(pos, 8);
4852 __masm-> BIND(L_processTail_4_extr[k])bind(L_processTail_4_extr[k]); masm-> block_comment("L_processTail_4_extr[k]"
":")
;
4853 __masm-> testptr(len_reg, 4);
4854 __masm-> jcc(Assembler::zero, L_processTail_2_extr[k]);
4855 __masm-> pextrd(Address(to, pos), xmm_result0, 0);
4856 __masm-> psrldq(xmm_result0, 4);
4857 __masm-> addptr(pos, 4);
4858 __masm-> BIND(L_processTail_2_extr[k])bind(L_processTail_2_extr[k]); masm-> block_comment("L_processTail_2_extr[k]"
":")
;
4859 __masm-> testptr(len_reg, 2);
4860 __masm-> jcc(Assembler::zero, L_processTail_1_extr[k]);
4861 __masm-> pextrw(Address(to, pos), xmm_result0, 0);
4862 __masm-> psrldq(xmm_result0, 2);
4863 __masm-> addptr(pos, 2);
4864 __masm-> BIND(L_processTail_1_extr[k])bind(L_processTail_1_extr[k]); masm-> block_comment("L_processTail_1_extr[k]"
":")
;
4865 __masm-> testptr(len_reg, 1);
4866 __masm-> jcc(Assembler::zero, L_processTail_exit_extr[k]);
4867 __masm-> pextrb(Address(to, pos), xmm_result0, 0);
4868
4869 __masm-> BIND(L_processTail_exit_extr[k])bind(L_processTail_exit_extr[k]); masm-> block_comment("L_processTail_exit_extr[k]"
":")
;
4870 __masm-> movl(Address(used_addr, 0), len_reg);
4871 __masm-> jmp(L_exit);
4872
4873 }
4874
4875 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
4876 __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
4877 __masm-> movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
4878 __masm-> pop(rbx); // pop the saved RBX.
4879#ifdef _WIN64
4880 __masm-> movl(rax, len_mem);
4881 __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize));
4882 __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize));
4883 __masm-> addptr(rsp, 2 * wordSize);
4884#else
4885 __masm-> pop(rax); // return 'len'
4886#endif
4887 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4888 __masm-> ret(0);
4889 return start;
4890 }
4891
4892void roundDec(XMMRegister xmm_reg) {
4893 __masm-> vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
4894 __masm-> vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
4895 __masm-> vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
4896 __masm-> vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
4897 __masm-> vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
4898 __masm-> vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
4899 __masm-> vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
4900 __masm-> vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
4901}
4902
4903void roundDeclast(XMMRegister xmm_reg) {
4904 __masm-> vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
4905 __masm-> vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
4906 __masm-> vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
4907 __masm-> vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
4908 __masm-> vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
4909 __masm-> vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
4910 __masm-> vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
4911 __masm-> vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
4912}
4913
4914 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL__null) {
4915 __masm-> movdqu(xmmdst, Address(key, offset));
4916 if (xmm_shuf_mask != NULL__null) {
4917 __masm-> pshufb(xmmdst, xmm_shuf_mask);
4918 } else {
4919 __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
4920 }
4921 __masm-> evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
4922
4923 }
4924
4925address generate_cipherBlockChaining_decryptVectorAESCrypt() {
4926 assert(VM_Version::supports_avx512_vaes(), "need AES instructions and misaligned SSE support")do { if (!(VM_Version::supports_avx512_vaes())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4926, "assert(" "VM_Version::supports_avx512_vaes()" ") failed"
, "need AES instructions and misaligned SSE support"); ::breakpoint
(); } } while (0)
;
4927 __masm-> align(CodeEntryAlignment);
4928 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
4929 address start = __masm-> pc();
4930
4931 const Register from = c_rarg0; // source array address
4932 const Register to = c_rarg1; // destination array address
4933 const Register key = c_rarg2; // key array address
4934 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
4935 // and left with the results of the last encryption block
4936#ifndef _WIN64
4937 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
4938#else
4939 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4940 const Register len_reg = r11; // pick the volatile windows register
4941#endif
4942
4943 Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop,
4944 Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit;
4945
4946 __masm-> enter();
4947
4948#ifdef _WIN64
4949 // on win64, fill len_reg from stack position
4950 __masm-> movl(len_reg, len_mem);
4951#else
4952 __masm-> push(len_reg); // Save
4953#endif
4954 __masm-> push(rbx);
4955 __masm-> vzeroupper();
4956
4957 // Temporary variable declaration for swapping key bytes
4958 const XMMRegister xmm_key_shuf_mask = xmm1;
4959 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
4960
4961 // Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
4962 const Register rounds = rbx;
4963 __masm-> movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
4964
4965 const XMMRegister IV = xmm0;
4966 // Load IV and broadcast value to 512-bits
4967 __masm-> evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit);
4968
4969 // Temporary variables for storing round keys
4970 const XMMRegister RK0 = xmm30;
4971 const XMMRegister RK1 = xmm9;
4972 const XMMRegister RK2 = xmm18;
4973 const XMMRegister RK3 = xmm19;
4974 const XMMRegister RK4 = xmm20;
4975 const XMMRegister RK5 = xmm21;
4976 const XMMRegister RK6 = xmm22;
4977 const XMMRegister RK7 = xmm23;
4978 const XMMRegister RK8 = xmm24;
4979 const XMMRegister RK9 = xmm25;
4980 const XMMRegister RK10 = xmm26;
4981
4982 // Load and shuffle key
4983 // the java expanded key ordering is rotated one position from what we want
4984 // so we start from 1*16 here and hit 0*16 last
4985 ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask);
4986 ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask);
4987 ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask);
4988 ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask);
4989 ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask);
4990 ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask);
4991 ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask);
4992 ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask);
4993 ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask);
4994 ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask);
4995 ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask);
4996
4997 // Variables for storing source cipher text
4998 const XMMRegister S0 = xmm10;
4999 const XMMRegister S1 = xmm11;
5000 const XMMRegister S2 = xmm12;
5001 const XMMRegister S3 = xmm13;
5002 const XMMRegister S4 = xmm14;
5003 const XMMRegister S5 = xmm15;
5004 const XMMRegister S6 = xmm16;
5005 const XMMRegister S7 = xmm17;
5006
5007 // Variables for storing decrypted text
5008 const XMMRegister B0 = xmm1;
5009 const XMMRegister B1 = xmm2;
5010 const XMMRegister B2 = xmm3;
5011 const XMMRegister B3 = xmm4;
5012 const XMMRegister B4 = xmm5;
5013 const XMMRegister B5 = xmm6;
5014 const XMMRegister B6 = xmm7;
5015 const XMMRegister B7 = xmm8;
5016
5017 __masm-> cmpl(rounds, 44);
5018 __masm-> jcc(Assembler::greater, KEY_192);
5019 __masm-> jmp(Loop);
5020
5021 __masm-> BIND(KEY_192)bind(KEY_192); masm-> block_comment("KEY_192" ":");
5022 const XMMRegister RK11 = xmm27;
5023 const XMMRegister RK12 = xmm28;
5024 ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask);
5025 ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask);
5026
5027 __masm-> cmpl(rounds, 52);
5028 __masm-> jcc(Assembler::greater, KEY_256);
5029 __masm-> jmp(Loop);
5030
5031 __masm-> BIND(KEY_256)bind(KEY_256); masm-> block_comment("KEY_256" ":");
5032 const XMMRegister RK13 = xmm29;
5033 const XMMRegister RK14 = xmm31;
5034 ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask);
5035 ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask);
5036
5037 __masm-> BIND(Loop)bind(Loop); masm-> block_comment("Loop" ":");
5038 __masm-> cmpl(len_reg, 512);
5039 __masm-> jcc(Assembler::below, Lcbc_dec_rem);
5040 __masm-> BIND(Loop1)bind(Loop1); masm-> block_comment("Loop1" ":");
5041 __masm-> subl(len_reg, 512);
5042 __masm-> evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit);
5043 __masm-> evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit);
5044 __masm-> evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit);
5045 __masm-> evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit);
5046 __masm-> evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit);
5047 __masm-> evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit);
5048 __masm-> evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit);
5049 __masm-> evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit);
5050 __masm-> leaq(from, Address(from, 8 * 64));
5051
5052 __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
5053 __masm-> evpxorq(B1, S1, RK1, Assembler::AVX_512bit);
5054 __masm-> evpxorq(B2, S2, RK1, Assembler::AVX_512bit);
5055 __masm-> evpxorq(B3, S3, RK1, Assembler::AVX_512bit);
5056 __masm-> evpxorq(B4, S4, RK1, Assembler::AVX_512bit);
5057 __masm-> evpxorq(B5, S5, RK1, Assembler::AVX_512bit);
5058 __masm-> evpxorq(B6, S6, RK1, Assembler::AVX_512bit);
5059 __masm-> evpxorq(B7, S7, RK1, Assembler::AVX_512bit);
5060
5061 __masm-> evalignq(IV, S0, IV, 0x06);
5062 __masm-> evalignq(S0, S1, S0, 0x06);
5063 __masm-> evalignq(S1, S2, S1, 0x06);
5064 __masm-> evalignq(S2, S3, S2, 0x06);
5065 __masm-> evalignq(S3, S4, S3, 0x06);
5066 __masm-> evalignq(S4, S5, S4, 0x06);
5067 __masm-> evalignq(S5, S6, S5, 0x06);
5068 __masm-> evalignq(S6, S7, S6, 0x06);
5069
5070 roundDec(RK2);
5071 roundDec(RK3);
5072 roundDec(RK4);
5073 roundDec(RK5);
5074 roundDec(RK6);
5075 roundDec(RK7);
5076 roundDec(RK8);
5077 roundDec(RK9);
5078 roundDec(RK10);
5079
5080 __masm-> cmpl(rounds, 44);
5081 __masm-> jcc(Assembler::belowEqual, L_128);
5082 roundDec(RK11);
5083 roundDec(RK12);
5084
5085 __masm-> cmpl(rounds, 52);
5086 __masm-> jcc(Assembler::belowEqual, L_192);
5087 roundDec(RK13);
5088 roundDec(RK14);
5089
5090 __masm-> BIND(L_256)bind(L_256); masm-> block_comment("L_256" ":");
5091 roundDeclast(RK0);
5092 __masm-> jmp(Loop2);
5093
5094 __masm-> BIND(L_128)bind(L_128); masm-> block_comment("L_128" ":");
5095 roundDeclast(RK0);
5096 __masm-> jmp(Loop2);
5097
5098 __masm-> BIND(L_192)bind(L_192); masm-> block_comment("L_192" ":");
5099 roundDeclast(RK0);
5100
5101 __masm-> BIND(Loop2)bind(Loop2); masm-> block_comment("Loop2" ":");
5102 __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit);
5103 __masm-> evpxorq(B1, B1, S0, Assembler::AVX_512bit);
5104 __masm-> evpxorq(B2, B2, S1, Assembler::AVX_512bit);
5105 __masm-> evpxorq(B3, B3, S2, Assembler::AVX_512bit);
5106 __masm-> evpxorq(B4, B4, S3, Assembler::AVX_512bit);
5107 __masm-> evpxorq(B5, B5, S4, Assembler::AVX_512bit);
5108 __masm-> evpxorq(B6, B6, S5, Assembler::AVX_512bit);
5109 __masm-> evpxorq(B7, B7, S6, Assembler::AVX_512bit);
5110 __masm-> evmovdquq(IV, S7, Assembler::AVX_512bit);
5111
5112 __masm-> evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit);
5113 __masm-> evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit);
5114 __masm-> evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit);
5115 __masm-> evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit);
5116 __masm-> evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit);
5117 __masm-> evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit);
5118 __masm-> evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit);
5119 __masm-> evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit);
5120 __masm-> leaq(to, Address(to, 8 * 64));
5121 __masm-> jmp(Loop);
5122
5123 __masm-> BIND(Lcbc_dec_rem)bind(Lcbc_dec_rem); masm-> block_comment("Lcbc_dec_rem" ":"
)
;
5124 __masm-> evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit);
5125
5126 __masm-> BIND(Lcbc_dec_rem_loop)bind(Lcbc_dec_rem_loop); masm-> block_comment("Lcbc_dec_rem_loop"
":")
;
5127 __masm-> subl(len_reg, 16);
5128 __masm-> jcc(Assembler::carrySet, Lcbc_dec_ret);
5129
5130 __masm-> movdqu(S0, Address(from, 0));
5131 __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
5132 __masm-> vaesdec(B0, B0, RK2, Assembler::AVX_512bit);
5133 __masm-> vaesdec(B0, B0, RK3, Assembler::AVX_512bit);
5134 __masm-> vaesdec(B0, B0, RK4, Assembler::AVX_512bit);
5135 __masm-> vaesdec(B0, B0, RK5, Assembler::AVX_512bit);
5136 __masm-> vaesdec(B0, B0, RK6, Assembler::AVX_512bit);
5137 __masm-> vaesdec(B0, B0, RK7, Assembler::AVX_512bit);
5138 __masm-> vaesdec(B0, B0, RK8, Assembler::AVX_512bit);
5139 __masm-> vaesdec(B0, B0, RK9, Assembler::AVX_512bit);
5140 __masm-> vaesdec(B0, B0, RK10, Assembler::AVX_512bit);
5141 __masm-> cmpl(rounds, 44);
5142 __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
5143
5144 __masm-> vaesdec(B0, B0, RK11, Assembler::AVX_512bit);
5145 __masm-> vaesdec(B0, B0, RK12, Assembler::AVX_512bit);
5146 __masm-> cmpl(rounds, 52);
5147 __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
5148
5149 __masm-> vaesdec(B0, B0, RK13, Assembler::AVX_512bit);
5150 __masm-> vaesdec(B0, B0, RK14, Assembler::AVX_512bit);
5151
5152 __masm-> BIND(Lcbc_dec_rem_last)bind(Lcbc_dec_rem_last); masm-> block_comment("Lcbc_dec_rem_last"
":")
;
5153 __masm-> vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit);
5154
5155 __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit);
5156 __masm-> evmovdquq(IV, S0, Assembler::AVX_512bit);
5157 __masm-> movdqu(Address(to, 0), B0);
5158 __masm-> leaq(from, Address(from, 16));
5159 __masm-> leaq(to, Address(to, 16));
5160 __masm-> jmp(Lcbc_dec_rem_loop);
5161
5162 __masm-> BIND(Lcbc_dec_ret)bind(Lcbc_dec_ret); masm-> block_comment("Lcbc_dec_ret" ":"
)
;
5163 __masm-> movdqu(Address(rvec, 0), IV);
5164
5165 // Zero out the round keys
5166 __masm-> evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit);
5167 __masm-> evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit);
5168 __masm-> evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit);
5169 __masm-> evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit);
5170 __masm-> evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit);
5171 __masm-> evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit);
5172 __masm-> evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit);
5173 __masm-> evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit);
5174 __masm-> evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit);
5175 __masm-> evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit);
5176 __masm-> evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit);
5177 __masm-> cmpl(rounds, 44);
5178 __masm-> jcc(Assembler::belowEqual, Lcbc_exit);
5179 __masm-> evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit);
5180 __masm-> evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit);
5181 __masm-> cmpl(rounds, 52);
5182 __masm-> jcc(Assembler::belowEqual, Lcbc_exit);
5183 __masm-> evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit);
5184 __masm-> evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit);
5185
5186 __masm-> BIND(Lcbc_exit)bind(Lcbc_exit); masm-> block_comment("Lcbc_exit" ":");
5187 __masm-> pop(rbx);
5188#ifdef _WIN64
5189 __masm-> movl(rax, len_mem);
5190#else
5191 __masm-> pop(rax); // return length
5192#endif
5193 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
5194 __masm-> ret(0);
5195 return start;
5196}
5197
5198// Polynomial x^128+x^127+x^126+x^121+1
5199address ghash_polynomial_addr() {
5200 __masm-> align(CodeEntryAlignment);
5201 StubCodeMark mark(this, "StubRoutines", "_ghash_poly_addr");
5202 address start = __masm-> pc();
5203 __masm-> emit_data64(0x0000000000000001, relocInfo::none);
5204 __masm-> emit_data64(0xc200000000000000, relocInfo::none);
5205 return start;
5206}
5207
5208address ghash_shufflemask_addr() {
5209 __masm-> align(CodeEntryAlignment);
5210 StubCodeMark mark(this, "StubRoutines", "_ghash_shuffmask_addr");
5211 address start = __masm-> pc();
5212 __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
5213 __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
5214 return start;
5215}
5216
5217// Ghash single and multi block operations using AVX instructions
5218address generate_avx_ghash_processBlocks() {
5219 __masm-> align(CodeEntryAlignment);
5220
5221 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
5222 address start = __masm-> pc();
5223
5224 // arguments
5225 const Register state = c_rarg0;
5226 const Register htbl = c_rarg1;
5227 const Register data = c_rarg2;
5228 const Register blocks = c_rarg3;
5229 __masm-> enter();
5230 // Save state before entering routine
5231 __masm-> avx_ghash(state, htbl, data, blocks);
5232 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
5233 __masm-> ret(0);
5234 return start;
5235}
5236
5237 // byte swap x86 long
5238 address generate_ghash_long_swap_mask() {
5239 __masm-> align(CodeEntryAlignment);
5240 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
5241 address start = __masm-> pc();
5242 __masm-> emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
5243 __masm-> emit_data64(0x0706050403020100, relocInfo::none );
5244 return start;
5245 }
5246
5247 // byte swap x86 byte array
5248 address generate_ghash_byte_swap_mask() {
5249 __masm-> align(CodeEntryAlignment);
5250 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
5251 address start = __masm-> pc();
5252 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
5253 __masm-> emit_data64(0x0001020304050607, relocInfo::none );
5254 return start;
5255 }
5256
5257 /* Single and multi-block ghash operations */
5258 address generate_ghash_processBlocks() {
5259 __masm-> align(CodeEntryAlignment);
5260 Label L_ghash_loop, L_exit;
5261 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
5262 address start = __masm-> pc();
5263
5264 const Register state = c_rarg0;
5265 const Register subkeyH = c_rarg1;
5266 const Register data = c_rarg2;
5267 const Register blocks = c_rarg3;
5268
5269 const XMMRegister xmm_temp0 = xmm0;
5270 const XMMRegister xmm_temp1 = xmm1;
5271 const XMMRegister xmm_temp2 = xmm2;
5272 const XMMRegister xmm_temp3 = xmm3;
5273 const XMMRegister xmm_temp4 = xmm4;
5274 const XMMRegister xmm_temp5 = xmm5;
5275 const XMMRegister xmm_temp6 = xmm6;
5276 const XMMRegister xmm_temp7 = xmm7;
5277 const XMMRegister xmm_temp8 = xmm8;
5278 const XMMRegister xmm_temp9 = xmm9;
5279 const XMMRegister xmm_temp10 = xmm10;
5280
5281 __masm-> enter();
5282
5283 __masm-> movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
5284
5285 __masm-> movdqu(xmm_temp0, Address(state, 0));
5286 __masm-> pshufb(xmm_temp0, xmm_temp10);
5287
5288
5289 __masm-> BIND(L_ghash_loop)bind(L_ghash_loop); masm-> block_comment("L_ghash_loop" ":"
)
;
5290 __masm-> movdqu(xmm_temp2, Address(data, 0));
5291 __masm-> pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
5292
5293 __masm-> movdqu(xmm_temp1, Address(subkeyH, 0));
5294 __masm-> pshufb(xmm_temp1, xmm_temp10);
5295
5296 __masm-> pxor(xmm_temp0, xmm_temp2);
5297
5298 //
5299 // Multiply with the hash key
5300 //
5301 __masm-> movdqu(xmm_temp3, xmm_temp0);
5302 __masm-> pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
5303 __masm-> movdqu(xmm_temp4, xmm_temp0);
5304 __masm-> pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
5305
5306 __masm-> movdqu(xmm_temp5, xmm_temp0);
5307 __masm-> pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
5308 __masm-> movdqu(xmm_temp6, xmm_temp0);
5309 __masm-> pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
5310
5311 __masm-> pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
5312
5313 __masm-> movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
5314 __masm-> psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
5315 __masm-> pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
5316 __masm-> pxor(xmm_temp3, xmm_temp5);
5317 __masm-> pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
5318 // of the carry-less multiplication of
5319 // xmm0 by xmm1.
5320
5321 // We shift the result of the multiplication by one bit position
5322 // to the left to cope for the fact that the bits are reversed.
5323 __masm-> movdqu(xmm_temp7, xmm_temp3);
5324 __masm-> movdqu(xmm_temp8, xmm_temp6);
5325 __masm-> pslld(xmm_temp3, 1);
5326 __masm-> pslld(xmm_temp6, 1);
5327 __masm-> psrld(xmm_temp7, 31);
5328 __masm-> psrld(xmm_temp8, 31);
5329 __masm-> movdqu(xmm_temp9, xmm_temp7);
5330 __masm-> pslldq(xmm_temp8, 4);
5331 __masm-> pslldq(xmm_temp7, 4);
5332 __masm-> psrldq(xmm_temp9, 12);
5333 __masm-> por(xmm_temp3, xmm_temp7);
5334 __masm-> por(xmm_temp6, xmm_temp8);
5335 __masm-> por(xmm_temp6, xmm_temp9);
5336
5337 //
5338 // First phase of the reduction
5339 //
5340 // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
5341 // independently.
5342 __masm-> movdqu(xmm_temp7, xmm_temp3);
5343 __masm-> movdqu(xmm_temp8, xmm_temp3);
5344 __masm-> movdqu(xmm_temp9, xmm_temp3);
5345 __masm-> pslld(xmm_temp7, 31); // packed right shift shifting << 31
5346 __masm-> pslld(xmm_temp8, 30); // packed right shift shifting << 30
5347 __masm-> pslld(xmm_temp9, 25); // packed right shift shifting << 25
5348 __masm-> pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
5349 __masm-> pxor(xmm_temp7, xmm_temp9);
5350 __masm-> movdqu(xmm_temp8, xmm_temp7);
5351 __masm-> pslldq(xmm_temp7, 12);
5352 __masm-> psrldq(xmm_temp8, 4);
5353 __masm-> pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
5354
5355 //
5356 // Second phase of the reduction
5357 //
5358 // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
5359 // shift operations.
5360 __masm-> movdqu(xmm_temp2, xmm_temp3);
5361 __masm-> movdqu(xmm_temp4, xmm_temp3);
5362 __masm-> movdqu(xmm_temp5, xmm_temp3);
5363 __masm-> psrld(xmm_temp2, 1); // packed left shifting >> 1
5364 __masm-> psrld(xmm_temp4, 2); // packed left shifting >> 2
5365 __masm-> psrld(xmm_temp5, 7); // packed left shifting >> 7
5366 __masm-> pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
5367 __masm-> pxor(xmm_temp2, xmm_temp5);
5368 __masm-> pxor(xmm_temp2, xmm_temp8);
5369 __masm-> pxor(xmm_temp3, xmm_temp2);
5370 __masm-> pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
5371
5372 __masm-> decrement(blocks);
5373 __masm-> jcc(Assembler::zero, L_exit);
5374 __masm-> movdqu(xmm_temp0, xmm_temp6);
5375 __masm-> addptr(data, 16);
5376 __masm-> jmp(L_ghash_loop);
5377
5378 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
5379 __masm-> pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
5380 __masm-> movdqu(Address(state, 0), xmm_temp6); // store the result
5381 __masm-> leave();
5382 __masm-> ret(0);
5383 return start;
5384 }
5385
5386 address base64_shuffle_addr()
5387 {
5388 __masm-> align64();
5389 StubCodeMark mark(this, "StubRoutines", "shuffle_base64");
5390 address start = __masm-> pc();
5391 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5392, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5392 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5392, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5393 __masm-> emit_data64(0x0405030401020001, relocInfo::none);
5394 __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none);
5395 __masm-> emit_data64(0x10110f100d0e0c0d, relocInfo::none);
5396 __masm-> emit_data64(0x1617151613141213, relocInfo::none);
5397 __masm-> emit_data64(0x1c1d1b1c191a1819, relocInfo::none);
5398 __masm-> emit_data64(0x222321221f201e1f, relocInfo::none);
5399 __masm-> emit_data64(0x2829272825262425, relocInfo::none);
5400 __masm-> emit_data64(0x2e2f2d2e2b2c2a2b, relocInfo::none);
5401 return start;
5402 }
5403
5404 address base64_avx2_shuffle_addr()
5405 {
5406 __masm-> align32();
5407 StubCodeMark mark(this, "StubRoutines", "avx2_shuffle_base64");
5408 address start = __masm-> pc();
5409 __masm-> emit_data64(0x0809070805060405, relocInfo::none);
5410 __masm-> emit_data64(0x0e0f0d0e0b0c0a0b, relocInfo::none);
5411 __masm-> emit_data64(0x0405030401020001, relocInfo::none);
5412 __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none);
5413 return start;
5414 }
5415
5416 address base64_avx2_input_mask_addr()
5417 {
5418 __masm-> align32();
5419 StubCodeMark mark(this, "StubRoutines", "avx2_input_mask_base64");
5420 address start = __masm-> pc();
5421 __masm-> emit_data64(0x8000000000000000, relocInfo::none);
5422 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5423 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5424 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5425 return start;
5426 }
5427
5428 address base64_avx2_lut_addr()
5429 {
5430 __masm-> align32();
5431 StubCodeMark mark(this, "StubRoutines", "avx2_lut_base64");
5432 address start = __masm-> pc();
5433 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5434 __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none);
5435 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5436 __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none);
5437
5438 // URL LUT
5439 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5440 __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none);
5441 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5442 __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none);
5443 return start;
5444 }
5445
5446 address base64_encoding_table_addr()
5447 {
5448 __masm-> align64();
5449 StubCodeMark mark(this, "StubRoutines", "encoding_table_base64");
5450 address start = __masm-> pc();
5451 assert(((unsigned long long)start & 0x3f) == 0, "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5451, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5452 __masm-> emit_data64(0x4847464544434241, relocInfo::none);
5453 __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none);
5454 __masm-> emit_data64(0x5857565554535251, relocInfo::none);
5455 __masm-> emit_data64(0x6665646362615a59, relocInfo::none);
5456 __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none);
5457 __masm-> emit_data64(0x767574737271706f, relocInfo::none);
5458 __masm-> emit_data64(0x333231307a797877, relocInfo::none);
5459 __masm-> emit_data64(0x2f2b393837363534, relocInfo::none);
5460
5461 // URL table
5462 __masm-> emit_data64(0x4847464544434241, relocInfo::none);
5463 __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none);
5464 __masm-> emit_data64(0x5857565554535251, relocInfo::none);
5465 __masm-> emit_data64(0x6665646362615a59, relocInfo::none);
5466 __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none);
5467 __masm-> emit_data64(0x767574737271706f, relocInfo::none);
5468 __masm-> emit_data64(0x333231307a797877, relocInfo::none);
5469 __masm-> emit_data64(0x5f2d393837363534, relocInfo::none);
5470 return start;
5471 }
5472
5473 // Code for generating Base64 encoding.
5474 // Intrinsic function prototype in Base64.java:
5475 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp,
5476 // boolean isURL) {
5477 address generate_base64_encodeBlock()
5478 {
5479 __masm-> align(CodeEntryAlignment);
5480 StubCodeMark mark(this, "StubRoutines", "implEncode");
5481 address start = __masm-> pc();
5482 __masm-> enter();
5483
5484 // Save callee-saved registers before using them
5485 __masm-> push(r12);
5486 __masm-> push(r13);
5487 __masm-> push(r14);
5488 __masm-> push(r15);
5489
5490 // arguments
5491 const Register source = c_rarg0; // Source Array
5492 const Register start_offset = c_rarg1; // start offset
5493 const Register end_offset = c_rarg2; // end offset
5494 const Register dest = c_rarg3; // destination array
5495
5496#ifndef _WIN64
5497 const Register dp = c_rarg4; // Position for writing to dest array
5498 const Register isURL = c_rarg5; // Base64 or URL character set
5499#else
5500 const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64
5501 const Address isURL_mem(rbp, 7 * wordSize);
5502 const Register isURL = r10; // pick the volatile windows register
5503 const Register dp = r12;
5504 __masm-> movl(dp, dp_mem);
5505 __masm-> movl(isURL, isURL_mem);
5506#endif
5507
5508 const Register length = r14;
5509 const Register encode_table = r13;
5510 Label L_process3, L_exit, L_processdata, L_vbmiLoop, L_not512, L_32byteLoop;
5511
5512 // calculate length from offsets
5513 __masm-> movl(length, end_offset);
5514 __masm-> subl(length, start_offset);
5515 __masm-> cmpl(length, 0);
5516 __masm-> jcc(Assembler::lessEqual, L_exit);
5517
5518 // Code for 512-bit VBMI encoding. Encodes 48 input bytes into 64
5519 // output bytes. We read 64 input bytes and ignore the last 16, so be
5520 // sure not to read past the end of the input buffer.
5521 if (VM_Version::supports_avx512_vbmi()) {
5522 __masm-> cmpl(length, 64); // Do not overrun input buffer.
5523 __masm-> jcc(Assembler::below, L_not512);
5524
5525 __masm-> shll(isURL, 6); // index into decode table based on isURL
5526 __masm-> lea(encode_table, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr()));
5527 __masm-> addptr(encode_table, isURL);
5528 __masm-> shrl(isURL, 6); // restore isURL
5529
5530 __masm-> mov64(rax, 0x3036242a1016040aull); // Shifts
5531 __masm-> evmovdquq(xmm3, ExternalAddress(StubRoutines::x86::base64_shuffle_addr()), Assembler::AVX_512bit, r15);
5532 __masm-> evmovdquq(xmm2, Address(encode_table, 0), Assembler::AVX_512bit);
5533 __masm-> evpbroadcastq(xmm1, rax, Assembler::AVX_512bit);
5534
5535 __masm-> align32();
5536 __masm-> BIND(L_vbmiLoop)bind(L_vbmiLoop); masm-> block_comment("L_vbmiLoop" ":");
5537
5538 __masm-> vpermb(xmm0, xmm3, Address(source, start_offset), Assembler::AVX_512bit);
5539 __masm-> subl(length, 48);
5540
5541 // Put the input bytes into the proper lanes for writing, then
5542 // encode them.
5543 __masm-> evpmultishiftqb(xmm0, xmm1, xmm0, Assembler::AVX_512bit);
5544 __masm-> vpermb(xmm0, xmm0, xmm2, Assembler::AVX_512bit);
5545
5546 // Write to destination
5547 __masm-> evmovdquq(Address(dest, dp), xmm0, Assembler::AVX_512bit);
5548
5549 __masm-> addptr(dest, 64);
5550 __masm-> addptr(source, 48);
5551 __masm-> cmpl(length, 64);
5552 __masm-> jcc(Assembler::aboveEqual, L_vbmiLoop);
5553
5554 __masm-> vzeroupper();
5555 }
5556
5557 __masm-> BIND(L_not512)bind(L_not512); masm-> block_comment("L_not512" ":");
5558 if (VM_Version::supports_avx2()
5559 && VM_Version::supports_avx512vlbw()) {
5560 /*
5561 ** This AVX2 encoder is based off the paper at:
5562 ** https://dl.acm.org/doi/10.1145/3132709
5563 **
5564 ** We use AVX2 SIMD instructions to encode 24 bytes into 32
5565 ** output bytes.
5566 **
5567 */
5568 // Lengths under 32 bytes are done with scalar routine
5569 __masm-> cmpl(length, 31);
5570 __masm-> jcc(Assembler::belowEqual, L_process3);
5571
5572 // Set up supporting constant table data
5573 __masm-> vmovdqu(xmm9, ExternalAddress(StubRoutines::x86::base64_avx2_shuffle_addr()), rax);
5574 // 6-bit mask for 2nd and 4th (and multiples) 6-bit values
5575 __masm-> movl(rax, 0x0fc0fc00);
5576 __masm-> vmovdqu(xmm1, ExternalAddress(StubRoutines::x86::base64_avx2_input_mask_addr()), rax);
5577 __masm-> evpbroadcastd(xmm8, rax, Assembler::AVX_256bit);
5578
5579 // Multiplication constant for "shifting" right by 6 and 10
5580 // bits
5581 __masm-> movl(rax, 0x04000040);
5582
5583 __masm-> subl(length, 24);
5584 __masm-> evpbroadcastd(xmm7, rax, Assembler::AVX_256bit);
5585
5586 // For the first load, we mask off reading of the first 4
5587 // bytes into the register. This is so we can get 4 3-byte
5588 // chunks into each lane of the register, avoiding having to
5589 // handle end conditions. We then shuffle these bytes into a
5590 // specific order so that manipulation is easier.
5591 //
5592 // The initial read loads the XMM register like this:
5593 //
5594 // Lower 128-bit lane:
5595 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5596 // | XX | XX | XX | XX | A0 | A1 | A2 | B0 | B1 | B2 | C0 | C1
5597 // | C2 | D0 | D1 | D2 |
5598 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5599 //
5600 // Upper 128-bit lane:
5601 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5602 // | E0 | E1 | E2 | F0 | F1 | F2 | G0 | G1 | G2 | H0 | H1 | H2
5603 // | XX | XX | XX | XX |
5604 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5605 //
5606 // Where A0 is the first input byte, B0 is the fourth, etc.
5607 // The alphabetical significance denotes the 3 bytes to be
5608 // consumed and encoded into 4 bytes.
5609 //
5610 // We then shuffle the register so each 32-bit word contains
5611 // the sequence:
5612 // A1 A0 A2 A1, B1, B0, B2, B1, etc.
5613 // Each of these byte sequences are then manipulated into 4
5614 // 6-bit values ready for encoding.
5615 //
5616 // If we focus on one set of 3-byte chunks, changing the
5617 // nomenclature such that A0 => a, A1 => b, and A2 => c, we
5618 // shuffle such that each 24-bit chunk contains:
5619 //
5620 // b7 b6 b5 b4 b3 b2 b1 b0 | a7 a6 a5 a4 a3 a2 a1 a0 | c7 c6
5621 // c5 c4 c3 c2 c1 c0 | b7 b6 b5 b4 b3 b2 b1 b0
5622 // Explain this step.
5623 // b3 b2 b1 b0 c5 c4 c3 c2 | c1 c0 d5 d4 d3 d2 d1 d0 | a5 a4
5624 // a3 a2 a1 a0 b5 b4 | b3 b2 b1 b0 c5 c4 c3 c2
5625 //
5626 // W first and off all but bits 4-9 and 16-21 (c5..c0 and
5627 // a5..a0) and shift them using a vector multiplication
5628 // operation (vpmulhuw) which effectively shifts c right by 6
5629 // bits and a right by 10 bits. We similarly mask bits 10-15
5630 // (d5..d0) and 22-27 (b5..b0) and shift them left by 8 and 4
5631 // bits respecively. This is done using vpmullw. We end up
5632 // with 4 6-bit values, thus splitting the 3 input bytes,
5633 // ready for encoding:
5634 // 0 0 d5..d0 0 0 c5..c0 0 0 b5..b0 0 0 a5..a0
5635 //
5636 // For translation, we recognize that there are 5 distinct
5637 // ranges of legal Base64 characters as below:
5638 //
5639 // +-------------+-------------+------------+
5640 // | 6-bit value | ASCII range | offset |
5641 // +-------------+-------------+------------+
5642 // | 0..25 | A..Z | 65 |
5643 // | 26..51 | a..z | 71 |
5644 // | 52..61 | 0..9 | -4 |
5645 // | 62 | + or - | -19 or -17 |
5646 // | 63 | / or _ | -16 or 32 |
5647 // +-------------+-------------+------------+
5648 //
5649 // We note that vpshufb does a parallel lookup in a
5650 // destination register using the lower 4 bits of bytes from a
5651 // source register. If we use a saturated subtraction and
5652 // subtract 51 from each 6-bit value, bytes from [0,51]
5653 // saturate to 0, and [52,63] map to a range of [1,12]. We
5654 // distinguish the [0,25] and [26,51] ranges by assigning a
5655 // value of 13 for all 6-bit values less than 26. We end up
5656 // with:
5657 //
5658 // +-------------+-------------+------------+
5659 // | 6-bit value | Reduced | offset |
5660 // +-------------+-------------+------------+
5661 // | 0..25 | 13 | 65 |
5662 // | 26..51 | 0 | 71 |
5663 // | 52..61 | 0..9 | -4 |
5664 // | 62 | 11 | -19 or -17 |
5665 // | 63 | 12 | -16 or 32 |
5666 // +-------------+-------------+------------+
5667 //
5668 // We then use a final vpshufb to add the appropriate offset,
5669 // translating the bytes.
5670 //
5671 // Load input bytes - only 28 bytes. Mask the first load to
5672 // not load into the full register.
5673 __masm-> vpmaskmovd(xmm1, xmm1, Address(source, start_offset, Address::times_1, -4), Assembler::AVX_256bit);
5674
5675 // Move 3-byte chunks of input (12 bytes) into 16 bytes,
5676 // ordering by:
5677 // 1, 0, 2, 1; 4, 3, 5, 4; etc. This groups 6-bit chunks
5678 // for easy masking
5679 __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit);
5680
5681 __masm-> addl(start_offset, 24);
5682
5683 // Load masking register for first and third (and multiples)
5684 // 6-bit values.
5685 __masm-> movl(rax, 0x003f03f0);
5686 __masm-> evpbroadcastd(xmm6, rax, Assembler::AVX_256bit);
5687 // Multiplication constant for "shifting" left by 4 and 8 bits
5688 __masm-> movl(rax, 0x01000010);
5689 __masm-> evpbroadcastd(xmm5, rax, Assembler::AVX_256bit);
5690
5691 // Isolate 6-bit chunks of interest
5692 __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit);
5693
5694 // Load constants for encoding
5695 __masm-> movl(rax, 0x19191919);
5696 __masm-> evpbroadcastd(xmm3, rax, Assembler::AVX_256bit);
5697 __masm-> movl(rax, 0x33333333);
5698 __masm-> evpbroadcastd(xmm4, rax, Assembler::AVX_256bit);
5699
5700 // Shift output bytes 0 and 2 into proper lanes
5701 __masm-> vpmulhuw(xmm2, xmm0, xmm7, Assembler::AVX_256bit);
5702
5703 // Mask and shift output bytes 1 and 3 into proper lanes and
5704 // combine
5705 __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit);
5706 __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit);
5707 __masm-> vpor(xmm0, xmm0, xmm2, Assembler::AVX_256bit);
5708
5709 // Find out which are 0..25. This indicates which input
5710 // values fall in the range of 'A'-'Z', which require an
5711 // additional offset (see comments above)
5712 __masm-> vpcmpgtb(xmm2, xmm0, xmm3, Assembler::AVX_256bit);
5713 __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit);
5714 __masm-> vpsubb(xmm1, xmm1, xmm2, Assembler::AVX_256bit);
5715
5716 // Load the proper lookup table
5717 __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_avx2_lut_addr()));
5718 __masm-> movl(r15, isURL);
5719 __masm-> shll(r15, 5);
5720 __masm-> vmovdqu(xmm2, Address(r11, r15));
5721
5722 // Shuffle the offsets based on the range calculation done
5723 // above. This allows us to add the correct offset to the
5724 // 6-bit value corresponding to the range documented above.
5725 __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit);
5726 __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit);
5727
5728 // Store the encoded bytes
5729 __masm-> vmovdqu(Address(dest, dp), xmm0);
5730 __masm-> addl(dp, 32);
5731
5732 __masm-> cmpl(length, 31);
5733 __masm-> jcc(Assembler::belowEqual, L_process3);
5734
5735 __masm-> align32();
5736 __masm-> BIND(L_32byteLoop)bind(L_32byteLoop); masm-> block_comment("L_32byteLoop" ":"
)
;
5737
5738 // Get next 32 bytes
5739 __masm-> vmovdqu(xmm1, Address(source, start_offset, Address::times_1, -4));
5740
5741 __masm-> subl(length, 24);
5742 __masm-> addl(start_offset, 24);
5743
5744 // This logic is identical to the above, with only constant
5745 // register loads removed. Shuffle the input, mask off 6-bit
5746 // chunks, shift them into place, then add the offset to
5747 // encode.
5748 __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit);
5749
5750 __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit);
5751 __masm-> vpmulhuw(xmm10, xmm0, xmm7, Assembler::AVX_256bit);
5752 __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit);
5753 __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit);
5754 __masm-> vpor(xmm0, xmm0, xmm10, Assembler::AVX_256bit);
5755 __masm-> vpcmpgtb(xmm10, xmm0, xmm3, Assembler::AVX_256bit);
5756 __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit);
5757 __masm-> vpsubb(xmm1, xmm1, xmm10, Assembler::AVX_256bit);
5758 __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit);
5759 __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit);
5760
5761 // Store the encoded bytes
5762 __masm-> vmovdqu(Address(dest, dp), xmm0);
5763 __masm-> addl(dp, 32);
5764
5765 __masm-> cmpl(length, 31);
5766 __masm-> jcc(Assembler::above, L_32byteLoop);
5767
5768 __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":");
5769 __masm-> vzeroupper();
5770 } else {
5771 __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":");
5772 }
5773
5774 __masm-> cmpl(length, 3);
5775 __masm-> jcc(Assembler::below, L_exit);
5776
5777 // Load the encoding table based on isURL
5778 __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr()));
5779 __masm-> movl(r15, isURL);
5780 __masm-> shll(r15, 6);
5781 __masm-> addptr(r11, r15);
5782
5783 __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":"
)
;
5784
5785 // Load 3 bytes
5786 __masm-> load_unsigned_byte(r15, Address(source, start_offset));
5787 __masm-> load_unsigned_byte(r10, Address(source, start_offset, Address::times_1, 1));
5788 __masm-> load_unsigned_byte(r13, Address(source, start_offset, Address::times_1, 2));
5789
5790 // Build a 32-bit word with bytes 1, 2, 0, 1
5791 __masm-> movl(rax, r10);
5792 __masm-> shll(r10, 24);
5793 __masm-> orl(rax, r10);
5794
5795 __masm-> subl(length, 3);
5796
5797 __masm-> shll(r15, 8);
5798 __masm-> shll(r13, 16);
5799 __masm-> orl(rax, r15);
5800
5801 __masm-> addl(start_offset, 3);
5802
5803 __masm-> orl(rax, r13);
5804 // At this point, rax contains | byte1 | byte2 | byte0 | byte1
5805 // r13 has byte2 << 16 - need low-order 6 bits to translate.
5806 // This translated byte is the fourth output byte.
5807 __masm-> shrl(r13, 16);
5808 __masm-> andl(r13, 0x3f);
5809
5810 // The high-order 6 bits of r15 (byte0) is translated.
5811 // The translated byte is the first output byte.
5812 __masm-> shrl(r15, 10);
5813
5814 __masm-> load_unsigned_byte(r13, Address(r11, r13));
5815 __masm-> load_unsigned_byte(r15, Address(r11, r15));
5816
5817 __masm-> movb(Address(dest, dp, Address::times_1, 3), r13);
5818
5819 // Extract high-order 4 bits of byte1 and low-order 2 bits of byte0.
5820 // This translated byte is the second output byte.
5821 __masm-> shrl(rax, 4);
5822 __masm-> movl(r10, rax);
5823 __masm-> andl(rax, 0x3f);
5824
5825 __masm-> movb(Address(dest, dp, Address::times_1, 0), r15);
5826
5827 __masm-> load_unsigned_byte(rax, Address(r11, rax));
5828
5829 // Extract low-order 2 bits of byte1 and high-order 4 bits of byte2.
5830 // This translated byte is the third output byte.
5831 __masm-> shrl(r10, 18);
5832 __masm-> andl(r10, 0x3f);
5833
5834 __masm-> load_unsigned_byte(r10, Address(r11, r10));
5835
5836 __masm-> movb(Address(dest, dp, Address::times_1, 1), rax);
5837 __masm-> movb(Address(dest, dp, Address::times_1, 2), r10);
5838
5839 __masm-> addl(dp, 4);
5840 __masm-> cmpl(length, 3);
5841 __masm-> jcc(Assembler::aboveEqual, L_processdata);
5842
5843 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
5844 __masm-> pop(r15);
5845 __masm-> pop(r14);
5846 __masm-> pop(r13);
5847 __masm-> pop(r12);
5848 __masm-> leave();
5849 __masm-> ret(0);
5850 return start;
5851 }
5852
5853 // base64 AVX512vbmi tables
5854 address base64_vbmi_lookup_lo_addr() {
5855 __masm-> align64();
5856 StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64");
5857 address start = __masm-> pc();
5858 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5859, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5859 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5859, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5860 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5861 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5862 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5863 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5864 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5865 __masm-> emit_data64(0x3f8080803e808080, relocInfo::none);
5866 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5867 __masm-> emit_data64(0x8080808080803d3c, relocInfo::none);
5868 return start;
5869 }
5870
5871 address base64_vbmi_lookup_hi_addr() {
5872 __masm-> align64();
5873 StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64");
5874 address start = __masm-> pc();
5875 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5876, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5876 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5876, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5877 __masm-> emit_data64(0x0605040302010080, relocInfo::none);
5878 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
5879 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
5880 __masm-> emit_data64(0x8080808080191817, relocInfo::none);
5881 __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none);
5882 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
5883 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
5884 __masm-> emit_data64(0x8080808080333231, relocInfo::none);
5885 return start;
5886 }
5887 address base64_vbmi_lookup_lo_url_addr() {
5888 __masm-> align64();
5889 StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64url");
5890 address start = __masm-> pc();
5891 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5892, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5892 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5892, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5893 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5894 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5895 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5896 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5897 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5898 __masm-> emit_data64(0x80803e8080808080, relocInfo::none);
5899 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5900 __masm-> emit_data64(0x8080808080803d3c, relocInfo::none);
5901 return start;
5902 }
5903
5904 address base64_vbmi_lookup_hi_url_addr() {
5905 __masm-> align64();
5906 StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64url");
5907 address start = __masm-> pc();
5908 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5909, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5909 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5909, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5910 __masm-> emit_data64(0x0605040302010080, relocInfo::none);
5911 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
5912 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
5913 __masm-> emit_data64(0x3f80808080191817, relocInfo::none);
5914 __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none);
5915 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
5916 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
5917 __masm-> emit_data64(0x8080808080333231, relocInfo::none);
5918 return start;
5919 }
5920
5921 address base64_vbmi_pack_vec_addr() {
5922 __masm-> align64();
5923 StubCodeMark mark(this, "StubRoutines", "pack_vec_base64");
5924 address start = __masm-> pc();
5925 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5926, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5926 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5926, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5927 __masm-> emit_data64(0x090a040506000102, relocInfo::none);
5928 __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none);
5929 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5930 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5931 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5932 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5933 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
5934 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
5935 return start;
5936 }
5937
5938 address base64_vbmi_join_0_1_addr() {
5939 __masm-> align64();
5940 StubCodeMark mark(this, "StubRoutines", "join_0_1_base64");
5941 address start = __masm-> pc();
5942 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5943, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5943 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5943, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5944 __masm-> emit_data64(0x090a040506000102, relocInfo::none);
5945 __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none);
5946 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5947 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5948 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5949 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5950 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5951 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5952 return start;
5953 }
5954
5955 address base64_vbmi_join_1_2_addr() {
5956 __masm-> align64();
5957 StubCodeMark mark(this, "StubRoutines", "join_1_2_base64");
5958 address start = __masm-> pc();
5959 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5960, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5960 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5960, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5961 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5962 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5963 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5964 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5965 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5966 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5967 __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none);
5968 __masm-> emit_data64(0x696a646566606162, relocInfo::none);
5969 return start;
5970 }
5971
5972 address base64_vbmi_join_2_3_addr() {
5973 __masm-> align64();
5974 StubCodeMark mark(this, "StubRoutines", "join_2_3_base64");
5975 address start = __masm-> pc();
5976 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5977, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5977 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5977, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5978 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5979 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5980 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5981 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5982 __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none);
5983 __masm-> emit_data64(0x696a646566606162, relocInfo::none);
5984 __masm-> emit_data64(0x767071726c6d6e68, relocInfo::none);
5985 __masm-> emit_data64(0x7c7d7e78797a7475, relocInfo::none);
5986 return start;
5987 }
5988
5989 address base64_decoding_table_addr() {
5990 StubCodeMark mark(this, "StubRoutines", "decoding_table_base64");
5991 address start = __masm-> pc();
5992 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5993 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5994 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5995 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5996 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5997 __masm-> emit_data64(0x3fffffff3effffff, relocInfo::none);
5998 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5999 __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none);
6000 __masm-> emit_data64(0x06050403020100ff, relocInfo::none);
6001 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
6002 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
6003 __masm-> emit_data64(0xffffffffff191817, relocInfo::none);
6004 __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none);
6005 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
6006 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
6007 __masm-> emit_data64(0xffffffffff333231, relocInfo::none);
6008 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6009 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6010 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6011 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6012 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6013 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6014 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6015 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6016 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6017 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6018 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6019 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6020 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6021 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6022 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6023 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6024
6025 // URL table
6026 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6027 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6028 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6029 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6030 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6031 __masm-> emit_data64(0xffff3effffffffff, relocInfo::none);
6032 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
6033 __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none);
6034 __masm-> emit_data64(0x06050403020100ff, relocInfo::none);
6035 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
6036 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
6037 __masm-> emit_data64(0x3fffffffff191817, relocInfo::none);
6038 __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none);
6039 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
6040 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
6041 __masm-> emit_data64(0xffffffffff333231, relocInfo::none);
6042 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6043 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6044 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6045 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6046 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6047 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6048 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6049 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6050 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6051 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6052 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6053 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6054 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6055 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6056 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6057 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6058 return start;
6059 }
6060
6061
6062// Code for generating Base64 decoding.
6063//
6064// Based on the article (and associated code) from https://arxiv.org/abs/1910.05109.
6065//
6066// Intrinsic function prototype in Base64.java:
6067// private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME) {
6068 address generate_base64_decodeBlock() {
6069 __masm-> align(CodeEntryAlignment);
6070 StubCodeMark mark(this, "StubRoutines", "implDecode");
6071 address start = __masm-> pc();
6072 __masm-> enter();
6073
6074 // Save callee-saved registers before using them
6075 __masm-> push(r12);
6076 __masm-> push(r13);
6077 __masm-> push(r14);
6078 __masm-> push(r15);
6079 __masm-> push(rbx);
6080
6081 // arguments
6082 const Register source = c_rarg0; // Source Array
6083 const Register start_offset = c_rarg1; // start offset
6084 const Register end_offset = c_rarg2; // end offset
6085 const Register dest = c_rarg3; // destination array
6086 const Register isMIME = rbx;
6087
6088#ifndef _WIN64
6089 const Register dp = c_rarg4; // Position for writing to dest array
6090 const Register isURL = c_rarg5;// Base64 or URL character set
6091 __masm-> movl(isMIME, Address(rbp, 2 * wordSize));
6092#else
6093 const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64
6094 const Address isURL_mem(rbp, 7 * wordSize);
6095 const Register isURL = r10; // pick the volatile windows register
6096 const Register dp = r12;
6097 __masm-> movl(dp, dp_mem);
6098 __masm-> movl(isURL, isURL_mem);
6099 __masm-> movl(isMIME, Address(rbp, 8 * wordSize));
6100#endif
6101
6102 const XMMRegister lookup_lo = xmm5;
6103 const XMMRegister lookup_hi = xmm6;
6104 const XMMRegister errorvec = xmm7;
6105 const XMMRegister pack16_op = xmm9;
6106 const XMMRegister pack32_op = xmm8;
6107 const XMMRegister input0 = xmm3;
6108 const XMMRegister input1 = xmm20;
6109 const XMMRegister input2 = xmm21;
6110 const XMMRegister input3 = xmm19;
6111 const XMMRegister join01 = xmm12;
6112 const XMMRegister join12 = xmm11;
6113 const XMMRegister join23 = xmm10;
6114 const XMMRegister translated0 = xmm2;
6115 const XMMRegister translated1 = xmm1;
6116 const XMMRegister translated2 = xmm0;
6117 const XMMRegister translated3 = xmm4;
6118
6119 const XMMRegister merged0 = xmm2;
6120 const XMMRegister merged1 = xmm1;
6121 const XMMRegister merged2 = xmm0;
6122 const XMMRegister merged3 = xmm4;
6123 const XMMRegister merge_ab_bc0 = xmm2;
6124 const XMMRegister merge_ab_bc1 = xmm1;
6125 const XMMRegister merge_ab_bc2 = xmm0;
6126 const XMMRegister merge_ab_bc3 = xmm4;
6127
6128 const XMMRegister pack24bits = xmm4;
6129
6130 const Register length = r14;
6131 const Register output_size = r13;
6132 const Register output_mask = r15;
6133 const KRegister input_mask = k1;
6134
6135 const XMMRegister input_initial_valid_b64 = xmm0;
6136 const XMMRegister tmp = xmm10;
6137 const XMMRegister mask = xmm0;
6138 const XMMRegister invalid_b64 = xmm1;
6139
6140 Label L_process256, L_process64, L_process64Loop, L_exit, L_processdata, L_loadURL;
6141 Label L_continue, L_finalBit, L_padding, L_donePadding, L_bruteForce;
6142 Label L_forceLoop, L_bottomLoop, L_checkMIME, L_exit_no_vzero;
6143
6144 // calculate length from offsets
6145 __masm-> movl(length, end_offset);
6146 __masm-> subl(length, start_offset);
6147 __masm-> push(dest); // Save for return value calc
6148
6149 // If AVX512 VBMI not supported, just compile non-AVX code
6150 if(VM_Version::supports_avx512_vbmi() &&
6151 VM_Version::supports_avx512bw()) {
6152 __masm-> cmpl(length, 128); // 128-bytes is break-even for AVX-512
6153 __masm-> jcc(Assembler::lessEqual, L_bruteForce);
6154
6155 __masm-> cmpl(isMIME, 0);
6156 __masm-> jcc(Assembler::notEqual, L_bruteForce);
6157
6158 // Load lookup tables based on isURL
6159 __masm-> cmpl(isURL, 0);
6160 __masm-> jcc(Assembler::notZero, L_loadURL);
6161
6162 __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_addr()), Assembler::AVX_512bit, r13);
6163 __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_addr()), Assembler::AVX_512bit, r13);
6164
6165 __masm-> BIND(L_continue)bind(L_continue); masm-> block_comment("L_continue" ":");
6166
6167 __masm-> movl(r15, 0x01400140);
6168 __masm-> evpbroadcastd(pack16_op, r15, Assembler::AVX_512bit);
6169
6170 __masm-> movl(r15, 0x00011000);
6171 __masm-> evpbroadcastd(pack32_op, r15, Assembler::AVX_512bit);
6172
6173 __masm-> cmpl(length, 0xff);
6174 __masm-> jcc(Assembler::lessEqual, L_process64);
6175
6176 // load masks required for decoding data
6177 __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":"
)
;
6178 __masm-> evmovdquq(join01, ExternalAddress(StubRoutines::x86::base64_vbmi_join_0_1_addr()), Assembler::AVX_512bit,r13);
6179 __masm-> evmovdquq(join12, ExternalAddress(StubRoutines::x86::base64_vbmi_join_1_2_addr()), Assembler::AVX_512bit, r13);
6180 __masm-> evmovdquq(join23, ExternalAddress(StubRoutines::x86::base64_vbmi_join_2_3_addr()), Assembler::AVX_512bit, r13);
6181
6182 __masm-> align32();
6183 __masm-> BIND(L_process256)bind(L_process256); masm-> block_comment("L_process256" ":"
)
;
6184 // Grab input data
6185 __masm-> evmovdquq(input0, Address(source, start_offset, Address::times_1, 0x00), Assembler::AVX_512bit);
6186 __masm-> evmovdquq(input1, Address(source, start_offset, Address::times_1, 0x40), Assembler::AVX_512bit);
6187 __masm-> evmovdquq(input2, Address(source, start_offset, Address::times_1, 0x80), Assembler::AVX_512bit);
6188 __masm-> evmovdquq(input3, Address(source, start_offset, Address::times_1, 0xc0), Assembler::AVX_512bit);
6189
6190 // Copy the low part of the lookup table into the destination of the permutation
6191 __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit);
6192 __masm-> evmovdquq(translated1, lookup_lo, Assembler::AVX_512bit);
6193 __masm-> evmovdquq(translated2, lookup_lo, Assembler::AVX_512bit);
6194 __masm-> evmovdquq(translated3, lookup_lo, Assembler::AVX_512bit);
6195
6196 // Translate the base64 input into "decoded" bytes
6197 __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit);
6198 __masm-> evpermt2b(translated1, input1, lookup_hi, Assembler::AVX_512bit);
6199 __masm-> evpermt2b(translated2, input2, lookup_hi, Assembler::AVX_512bit);
6200 __masm-> evpermt2b(translated3, input3, lookup_hi, Assembler::AVX_512bit);
6201
6202 // OR all of the translations together to check for errors (high-order bit of byte set)
6203 __masm-> vpternlogd(input0, 0xfe, input1, input2, Assembler::AVX_512bit);
6204
6205 __masm-> vpternlogd(input3, 0xfe, translated0, translated1, Assembler::AVX_512bit);
6206 __masm-> vpternlogd(input0, 0xfe, translated2, translated3, Assembler::AVX_512bit);
6207 __masm-> vpor(errorvec, input3, input0, Assembler::AVX_512bit);
6208
6209 // Check if there was an error - if so, try 64-byte chunks
6210 __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit);
6211 __masm-> kortestql(k3, k3);
6212 __masm-> jcc(Assembler::notZero, L_process64);
6213
6214 // The merging and shuffling happens here
6215 // We multiply each byte pair [00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa]
6216 // Multiply [00cccccc] by 2^6 added to [00dddddd] to get [0000cccc | ccdddddd]
6217 // The pack16_op is a vector of 0x01400140, so multiply D by 1 and C by 0x40
6218 __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit);
6219 __masm-> vpmaddubsw(merge_ab_bc1, translated1, pack16_op, Assembler::AVX_512bit);
6220 __masm-> vpmaddubsw(merge_ab_bc2, translated2, pack16_op, Assembler::AVX_512bit);
6221 __masm-> vpmaddubsw(merge_ab_bc3, translated3, pack16_op, Assembler::AVX_512bit);
6222
6223 // Now do the same with packed 16-bit values.
6224 // We start with [0000cccc | ccdddddd | 0000aaaa | aabbbbbb]
6225 // pack32_op is 0x00011000 (2^12, 1), so this multiplies [0000aaaa | aabbbbbb] by 2^12
6226 // and adds [0000cccc | ccdddddd] to yield [00000000 | aaaaaabb | bbbbcccc | ccdddddd]
6227 __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit);
6228 __masm-> vpmaddwd(merged1, merge_ab_bc1, pack32_op, Assembler::AVX_512bit);
6229 __masm-> vpmaddwd(merged2, merge_ab_bc2, pack32_op, Assembler::AVX_512bit);
6230 __masm-> vpmaddwd(merged3, merge_ab_bc3, pack32_op, Assembler::AVX_512bit);
6231
6232 // The join vectors specify which byte from which vector goes into the outputs
6233 // One of every 4 bytes in the extended vector is zero, so we pack them into their
6234 // final positions in the register for storing (256 bytes in, 192 bytes out)
6235 __masm-> evpermt2b(merged0, join01, merged1, Assembler::AVX_512bit);
6236 __masm-> evpermt2b(merged1, join12, merged2, Assembler::AVX_512bit);
6237 __masm-> evpermt2b(merged2, join23, merged3, Assembler::AVX_512bit);
6238
6239 // Store result
6240 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x00), merged0, Assembler::AVX_512bit);
6241 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x40), merged1, Assembler::AVX_512bit);
6242 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x80), merged2, Assembler::AVX_512bit);
6243
6244 __masm-> addptr(source, 0x100);
6245 __masm-> addptr(dest, 0xc0);
6246 __masm-> subl(length, 0x100);
6247 __masm-> cmpl(length, 64 * 4);
6248 __masm-> jcc(Assembler::greaterEqual, L_process256);
6249
6250 // At this point, we've decoded 64 * 4 * n bytes.
6251 // The remaining length will be <= 64 * 4 - 1.
6252 // UNLESS there was an error decoding the first 256-byte chunk. In this
6253 // case, the length will be arbitrarily long.
6254 //
6255 // Note that this will be the path for MIME-encoded strings.
6256
6257 __masm-> BIND(L_process64)bind(L_process64); masm-> block_comment("L_process64" ":");
6258
6259 __masm-> evmovdquq(pack24bits, ExternalAddress(StubRoutines::x86::base64_vbmi_pack_vec_addr()), Assembler::AVX_512bit, r13);
6260
6261 __masm-> cmpl(length, 63);
6262 __masm-> jcc(Assembler::lessEqual, L_finalBit);
6263
6264 __masm-> mov64(rax, 0x0000ffffffffffff);
6265 __masm-> kmovql(k2, rax);
6266
6267 __masm-> align32();
6268 __masm-> BIND(L_process64Loop)bind(L_process64Loop); masm-> block_comment("L_process64Loop"
":")
;
6269
6270 // Handle first 64-byte block
6271
6272 __masm-> evmovdquq(input0, Address(source, start_offset), Assembler::AVX_512bit);
6273 __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit);
6274 __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit);
6275
6276 __masm-> vpor(errorvec, translated0, input0, Assembler::AVX_512bit);
6277
6278 // Check for error and bomb out before updating dest
6279 __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit);
6280 __masm-> kortestql(k3, k3);
6281 __masm-> jcc(Assembler::notZero, L_exit);
6282
6283 // Pack output register, selecting correct byte ordering
6284 __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit);
6285 __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit);
6286 __masm-> vpermb(merged0, pack24bits, merged0, Assembler::AVX_512bit);
6287
6288 __masm-> evmovdqub(Address(dest, dp), k2, merged0, true, Assembler::AVX_512bit);
6289
6290 __masm-> subl(length, 64);
6291 __masm-> addptr(source, 64);
6292 __masm-> addptr(dest, 48);
6293
6294 __masm-> cmpl(length, 64);
6295 __masm-> jcc(Assembler::greaterEqual, L_process64Loop);
6296
6297 __masm-> cmpl(length, 0);
6298 __masm-> jcc(Assembler::lessEqual, L_exit);
6299
6300 __masm-> BIND(L_finalBit)bind(L_finalBit); masm-> block_comment("L_finalBit" ":");
6301 // Now have 1 to 63 bytes left to decode
6302
6303 // I was going to let Java take care of the final fragment
6304 // however it will repeatedly call this routine for every 4 bytes
6305 // of input data, so handle the rest here.
6306 __masm-> movq(rax, -1);
6307 __masm-> bzhiq(rax, rax, length); // Input mask in rax
6308
6309 __masm-> movl(output_size, length);
6310 __masm-> shrl(output_size, 2); // Find (len / 4) * 3 (output length)
6311 __masm-> lea(output_size, Address(output_size, output_size, Address::times_2, 0));
6312 // output_size in r13
6313
6314 // Strip pad characters, if any, and adjust length and mask
6315 __masm-> cmpb(Address(source, length, Address::times_1, -1), '=');
6316 __masm-> jcc(Assembler::equal, L_padding);
6317
6318 __masm-> BIND(L_donePadding)bind(L_donePadding); masm-> block_comment("L_donePadding" ":"
)
;
6319
6320 // Output size is (64 - output_size), output mask is (all 1s >> output_size).
6321 __masm-> kmovql(input_mask, rax);
6322 __masm-> movq(output_mask, -1);
6323 __masm-> bzhiq(output_mask, output_mask, output_size);
6324
6325 // Load initial input with all valid base64 characters. Will be used
6326 // in merging source bytes to avoid masking when determining if an error occurred.
6327 __masm-> movl(rax, 0x61616161);
6328 __masm-> evpbroadcastd(input_initial_valid_b64, rax, Assembler::AVX_512bit);
6329
6330 // A register containing all invalid base64 decoded values
6331 __masm-> movl(rax, 0x80808080);
6332 __masm-> evpbroadcastd(invalid_b64, rax, Assembler::AVX_512bit);
6333
6334 // input_mask is in k1
6335 // output_size is in r13
6336 // output_mask is in r15
6337 // zmm0 - free
6338 // zmm1 - 0x00011000
6339 // zmm2 - 0x01400140
6340 // zmm3 - errorvec
6341 // zmm4 - pack vector
6342 // zmm5 - lookup_lo
6343 // zmm6 - lookup_hi
6344 // zmm7 - errorvec
6345 // zmm8 - 0x61616161
6346 // zmm9 - 0x80808080
6347
6348 // Load only the bytes from source, merging into our "fully-valid" register
6349 __masm-> evmovdqub(input_initial_valid_b64, input_mask, Address(source, start_offset, Address::times_1, 0x0), true, Assembler::AVX_512bit);
6350
6351 // Decode all bytes within our merged input
6352 __masm-> evmovdquq(tmp, lookup_lo, Assembler::AVX_512bit);
6353 __masm-> evpermt2b(tmp, input_initial_valid_b64, lookup_hi, Assembler::AVX_512bit);
6354 __masm-> vporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit);
6355
6356 // Check for error. Compare (decoded | initial) to all invalid.
6357 // If any bytes have their high-order bit set, then we have an error.
6358 __masm-> evptestmb(k2, mask, invalid_b64, Assembler::AVX_512bit);
6359 __masm-> kortestql(k2, k2);
6360
6361 // If we have an error, use the brute force loop to decode what we can (4-byte chunks).
6362 __masm-> jcc(Assembler::notZero, L_bruteForce);
6363
6364 // Shuffle output bytes
6365 __masm-> vpmaddubsw(tmp, tmp, pack16_op, Assembler::AVX_512bit);
6366 __masm-> vpmaddwd(tmp, tmp, pack32_op, Assembler::AVX_512bit);
6367
6368 __masm-> vpermb(tmp, pack24bits, tmp, Assembler::AVX_512bit);
6369 __masm-> kmovql(k1, output_mask);
6370 __masm-> evmovdqub(Address(dest, dp), k1, tmp, true, Assembler::AVX_512bit);
6371
6372 __masm-> addptr(dest, output_size);
6373
6374 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
6375 __masm-> vzeroupper();
6376 __masm-> pop(rax); // Get original dest value
6377 __masm-> subptr(dest, rax); // Number of bytes converted
6378 __masm-> movptr(rax, dest);
6379 __masm-> pop(rbx);
6380 __masm-> pop(r15);
6381 __masm-> pop(r14);
6382 __masm-> pop(r13);
6383 __masm-> pop(r12);
6384 __masm-> leave();
6385 __masm-> ret(0);
6386
6387 __masm-> BIND(L_loadURL)bind(L_loadURL); masm-> block_comment("L_loadURL" ":");
6388 __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_url_addr()), Assembler::AVX_512bit, r13);
6389 __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_url_addr()), Assembler::AVX_512bit, r13);
6390 __masm-> jmp(L_continue);
6391
6392 __masm-> BIND(L_padding)bind(L_padding); masm-> block_comment("L_padding" ":");
6393 __masm-> decrementq(output_size, 1);
6394 __masm-> shrq(rax, 1);
6395
6396 __masm-> cmpb(Address(source, length, Address::times_1, -2), '=');
6397 __masm-> jcc(Assembler::notEqual, L_donePadding);
6398
6399 __masm-> decrementq(output_size, 1);
6400 __masm-> shrq(rax, 1);
6401 __masm-> jmp(L_donePadding);
6402
6403 __masm-> align32();
6404 __masm-> BIND(L_bruteForce)bind(L_bruteForce); masm-> block_comment("L_bruteForce" ":"
)
;
6405 } // End of if(avx512_vbmi)
6406
6407 // Use non-AVX code to decode 4-byte chunks into 3 bytes of output
6408
6409 // Register state (Linux):
6410 // r12-15 - saved on stack
6411 // rdi - src
6412 // rsi - sp
6413 // rdx - sl
6414 // rcx - dst
6415 // r8 - dp
6416 // r9 - isURL
6417
6418 // Register state (Windows):
6419 // r12-15 - saved on stack
6420 // rcx - src
6421 // rdx - sp
6422 // r8 - sl
6423 // r9 - dst
6424 // r12 - dp
6425 // r10 - isURL
6426
6427 // Registers (common):
6428 // length (r14) - bytes in src
6429
6430 const Register decode_table = r11;
6431 const Register out_byte_count = rbx;
6432 const Register byte1 = r13;
6433 const Register byte2 = r15;
6434 const Register byte3 = WINDOWS_ONLY(r8) NOT_WINDOWS(rdx)rdx;
6435 const Register byte4 = WINDOWS_ONLY(r10) NOT_WINDOWS(r9)r9;
6436
6437 __masm-> shrl(length, 2); // Multiple of 4 bytes only - length is # 4-byte chunks
6438 __masm-> cmpl(length, 0);
6439 __masm-> jcc(Assembler::lessEqual, L_exit_no_vzero);
6440
6441 __masm-> shll(isURL, 8); // index into decode table based on isURL
6442 __masm-> lea(decode_table, ExternalAddress(StubRoutines::x86::base64_decoding_table_addr()));
6443 __masm-> addptr(decode_table, isURL);
6444
6445 __masm-> jmp(L_bottomLoop);
6446
6447 __masm-> align32();
6448 __masm-> BIND(L_forceLoop)bind(L_forceLoop); masm-> block_comment("L_forceLoop" ":");
6449 __masm-> shll(byte1, 18);
6450 __masm-> shll(byte2, 12);
6451 __masm-> shll(byte3, 6);
6452 __masm-> orl(byte1, byte2);
6453 __masm-> orl(byte1, byte3);
6454 __masm-> orl(byte1, byte4);
6455
6456 __masm-> addptr(source, 4);
6457
6458 __masm-> movb(Address(dest, dp, Address::times_1, 2), byte1);
6459 __masm-> shrl(byte1, 8);
6460 __masm-> movb(Address(dest, dp, Address::times_1, 1), byte1);
6461 __masm-> shrl(byte1, 8);
6462 __masm-> movb(Address(dest, dp, Address::times_1, 0), byte1);
6463
6464 __masm-> addptr(dest, 3);
6465 __masm-> decrementl(length, 1);
6466 __masm-> jcc(Assembler::zero, L_exit_no_vzero);
6467
6468 __masm-> BIND(L_bottomLoop)bind(L_bottomLoop); masm-> block_comment("L_bottomLoop" ":"
)
;
6469 __masm-> load_unsigned_byte(byte1, Address(source, start_offset, Address::times_1, 0x00));
6470 __masm-> load_unsigned_byte(byte2, Address(source, start_offset, Address::times_1, 0x01));
6471 __masm-> load_signed_byte(byte1, Address(decode_table, byte1));
6472 __masm-> load_signed_byte(byte2, Address(decode_table, byte2));
6473 __masm-> load_unsigned_byte(byte3, Address(source, start_offset, Address::times_1, 0x02));
6474 __masm-> load_unsigned_byte(byte4, Address(source, start_offset, Address::times_1, 0x03));
6475 __masm-> load_signed_byte(byte3, Address(decode_table, byte3));
6476 __masm-> load_signed_byte(byte4, Address(decode_table, byte4));
6477
6478 __masm-> mov(rax, byte1);
6479 __masm-> orl(rax, byte2);
6480 __masm-> orl(rax, byte3);
6481 __masm-> orl(rax, byte4);
6482 __masm-> jcc(Assembler::positive, L_forceLoop);
6483
6484 __masm-> BIND(L_exit_no_vzero)bind(L_exit_no_vzero); masm-> block_comment("L_exit_no_vzero"
":")
;
6485 __masm-> pop(rax); // Get original dest value
6486 __masm-> subptr(dest, rax); // Number of bytes converted
6487 __masm-> movptr(rax, dest);
6488 __masm-> pop(rbx);
6489 __masm-> pop(r15);
6490 __masm-> pop(r14);
6491 __masm-> pop(r13);
6492 __masm-> pop(r12);
6493 __masm-> leave();
6494 __masm-> ret(0);
6495
6496 return start;
6497 }
6498
6499
6500 /**
6501 * Arguments:
6502 *
6503 * Inputs:
6504 * c_rarg0 - int crc
6505 * c_rarg1 - byte* buf
6506 * c_rarg2 - int length
6507 *
6508 * Ouput:
6509 * rax - int crc result
6510 */
6511 address generate_updateBytesCRC32() {
6512 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions")do { if (!(UseCRC32Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6512, "assert(" "UseCRC32Intrinsics" ") failed", "need AVX and CLMUL instructions"
); ::breakpoint(); } } while (0)
;
6513
6514 __masm-> align(CodeEntryAlignment);
6515 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
6516
6517 address start = __masm-> pc();
6518 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6519 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6520 // rscratch1: r10
6521 const Register crc = c_rarg0; // crc
6522 const Register buf = c_rarg1; // source java byte array address
6523 const Register len = c_rarg2; // length
6524 const Register table = c_rarg3; // crc_table address (reuse register)
6525 const Register tmp1 = r11;
6526 const Register tmp2 = r10;
6527 assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax);
6528
6529 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6530 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6531
6532 if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() &&
6533 VM_Version::supports_avx512bw() &&
6534 VM_Version::supports_avx512vl()) {
6535 // The constants used in the CRC32 algorithm requires the 1's compliment of the initial crc value.
6536 // However, the constant table for CRC32-C assumes the original crc value. Account for this
6537 // difference before calling and after returning.
6538 __masm-> lea(table, ExternalAddress(StubRoutines::x86::crc_table_avx512_addr()));
6539 __masm-> notl(crc);
6540 __masm-> kernel_crc32_avx512(crc, buf, len, table, tmp1, tmp2);
6541 __masm-> notl(crc);
6542 } else {
6543 __masm-> kernel_crc32(crc, buf, len, table, tmp1);
6544 }
6545
6546 __masm-> movl(rax, crc);
6547 __masm-> vzeroupper();
6548 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6549 __masm-> ret(0);
6550
6551 return start;
6552 }
6553
6554 /**
6555 * Arguments:
6556 *
6557 * Inputs:
6558 * c_rarg0 - int crc
6559 * c_rarg1 - byte* buf
6560 * c_rarg2 - long length
6561 * c_rarg3 - table_start - optional (present only when doing a library_call,
6562 * not used by x86 algorithm)
6563 *
6564 * Ouput:
6565 * rax - int crc result
6566 */
6567 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
6568 assert(UseCRC32CIntrinsics, "need SSE4_2")do { if (!(UseCRC32CIntrinsics)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6568, "assert(" "UseCRC32CIntrinsics" ") failed", "need SSE4_2"
); ::breakpoint(); } } while (0)
;
6569 __masm-> align(CodeEntryAlignment);
6570 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
6571 address start = __masm-> pc();
6572 //reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs
6573 //Windows RCX RDX R8 R9 none none XMM0..XMM3
6574 //Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7
6575 const Register crc = c_rarg0; // crc
6576 const Register buf = c_rarg1; // source java byte array address
6577 const Register len = c_rarg2; // length
6578 const Register a = rax;
6579 const Register j = r9;
6580 const Register k = r10;
6581 const Register l = r11;
6582#ifdef _WIN64
6583 const Register y = rdi;
6584 const Register z = rsi;
6585#else
6586 const Register y = rcx;
6587 const Register z = r8;
6588#endif
6589 assert_different_registers(crc, buf, len, a, j, k, l, y, z);
6590
6591 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6592 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6593 if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() &&
6594 VM_Version::supports_avx512bw() &&
6595 VM_Version::supports_avx512vl()) {
6596 __masm-> lea(j, ExternalAddress(StubRoutines::x86::crc32c_table_avx512_addr()));
6597 __masm-> kernel_crc32_avx512(crc, buf, len, j, l, k);
6598 } else {
6599#ifdef _WIN64
6600 __masm-> push(y);
6601 __masm-> push(z);
6602#endif
6603 __masm-> crc32c_ipl_alg2_alt2(crc, buf, len,
6604 a, j, k,
6605 l, y, z,
6606 c_farg0, c_farg1, c_farg2,
6607 is_pclmulqdq_supported);
6608#ifdef _WIN64
6609 __masm-> pop(z);
6610 __masm-> pop(y);
6611#endif
6612 }
6613 __masm-> movl(rax, crc);
6614 __masm-> vzeroupper();
6615 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6616 __masm-> ret(0);
6617
6618 return start;
6619 }
6620
6621
6622 /***
6623 * Arguments:
6624 *
6625 * Inputs:
6626 * c_rarg0 - int adler
6627 * c_rarg1 - byte* buff
6628 * c_rarg2 - int len
6629 *
6630 * Output:
6631 * rax - int adler result
6632 */
6633
6634 address generate_updateBytesAdler32() {
6635 assert(UseAdler32Intrinsics, "need AVX2")do { if (!(UseAdler32Intrinsics)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6635, "assert(" "UseAdler32Intrinsics" ") failed", "need AVX2"
); ::breakpoint(); } } while (0)
;
6636
6637 __masm-> align(CodeEntryAlignment);
6638 StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
6639
6640 address start = __masm-> pc();
6641
6642 const Register data = r9;
6643 const Register size = r10;
6644
6645 const XMMRegister yshuf0 = xmm6;
6646 const XMMRegister yshuf1 = xmm7;
6647 assert_different_registers(c_rarg0, c_rarg1, c_rarg2, data, size);
6648
6649 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6650 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6651
6652 __masm-> vmovdqu(yshuf0, ExternalAddress((address) StubRoutines::x86::_adler32_shuf0_table), r9);
6653 __masm-> vmovdqu(yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_shuf1_table), r9);
6654 __masm-> movptr(data, c_rarg1); //data
6655 __masm-> movl(size, c_rarg2); //length
6656 __masm-> updateBytesAdler32(c_rarg0, data, size, yshuf0, yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_ascale_table));
6657 __masm-> leave();
6658 __masm-> ret(0);
6659 return start;
6660 }
6661
6662 /**
6663 * Arguments:
6664 *
6665 * Input:
6666 * c_rarg0 - x address
6667 * c_rarg1 - x length
6668 * c_rarg2 - y address
6669 * c_rarg3 - y length
6670 * not Win64
6671 * c_rarg4 - z address
6672 * c_rarg5 - z length
6673 * Win64
6674 * rsp+40 - z address
6675 * rsp+48 - z length
6676 */
6677 address generate_multiplyToLen() {
6678 __masm-> align(CodeEntryAlignment);
6679 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
6680
6681 address start = __masm-> pc();
6682 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6683 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6684 const Register x = rdi;
6685 const Register xlen = rax;
6686 const Register y = rsi;
6687 const Register ylen = rcx;
6688 const Register z = r8;
6689 const Register zlen = r11;
6690
6691 // Next registers will be saved on stack in multiply_to_len().
6692 const Register tmp1 = r12;
6693 const Register tmp2 = r13;
6694 const Register tmp3 = r14;
6695 const Register tmp4 = r15;
6696 const Register tmp5 = rbx;
6697
6698 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6699 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6700
6701#ifndef _WIN64
6702 __masm-> movptr(zlen, r9); // Save r9 in r11 - zlen
6703#endif
6704 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
6705 // ylen => rcx, z => r8, zlen => r11
6706 // r9 and r10 may be used to save non-volatile registers
6707#ifdef _WIN64
6708 // last 2 arguments (#4, #5) are on stack on Win64
6709 __masm-> movptr(z, Address(rsp, 6 * wordSize));
6710 __masm-> movptr(zlen, Address(rsp, 7 * wordSize));
6711#endif
6712
6713 __masm-> movptr(xlen, rsi);
6714 __masm-> movptr(y, rdx);
6715 __masm-> multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
6716
6717 restore_arg_regs();
6718
6719 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6720 __masm-> ret(0);
6721
6722 return start;
6723 }
6724
6725 /**
6726 * Arguments:
6727 *
6728 * Input:
6729 * c_rarg0 - obja address
6730 * c_rarg1 - objb address
6731 * c_rarg3 - length length
6732 * c_rarg4 - scale log2_array_indxscale
6733 *
6734 * Output:
6735 * rax - int >= mismatched index, < 0 bitwise complement of tail
6736 */
6737 address generate_vectorizedMismatch() {
6738 __masm-> align(CodeEntryAlignment);
6739 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
6740 address start = __masm-> pc();
6741
6742 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6743 __masm-> enter();
6744
6745#ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6746 const Register scale = c_rarg0; //rcx, will exchange with r9
6747 const Register objb = c_rarg1; //rdx
6748 const Register length = c_rarg2; //r8
6749 const Register obja = c_rarg3; //r9
6750 __masm-> xchgq(obja, scale); //now obja and scale contains the correct contents
6751
6752 const Register tmp1 = r10;
6753 const Register tmp2 = r11;
6754#endif
6755#ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6756 const Register obja = c_rarg0; //U:rdi
6757 const Register objb = c_rarg1; //U:rsi
6758 const Register length = c_rarg2; //U:rdx
6759 const Register scale = c_rarg3; //U:rcx
6760 const Register tmp1 = r8;
6761 const Register tmp2 = r9;
6762#endif
6763 const Register result = rax; //return value
6764 const XMMRegister vec0 = xmm0;
6765 const XMMRegister vec1 = xmm1;
6766 const XMMRegister vec2 = xmm2;
6767
6768 __masm-> vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2);
6769
6770 __masm-> vzeroupper();
6771 __masm-> leave();
6772 __masm-> ret(0);
6773
6774 return start;
6775 }
6776
6777/**
6778 * Arguments:
6779 *
6780 // Input:
6781 // c_rarg0 - x address
6782 // c_rarg1 - x length
6783 // c_rarg2 - z address
6784 // c_rarg3 - z lenth
6785 *
6786 */
6787 address generate_squareToLen() {
6788
6789 __masm-> align(CodeEntryAlignment);
6790 StubCodeMark mark(this, "StubRoutines", "squareToLen");
6791
6792 address start = __masm-> pc();
6793 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6794 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
6795 const Register x = rdi;
6796 const Register len = rsi;
6797 const Register z = r8;
6798 const Register zlen = rcx;
6799
6800 const Register tmp1 = r12;
6801 const Register tmp2 = r13;
6802 const Register tmp3 = r14;
6803 const Register tmp4 = r15;
6804 const Register tmp5 = rbx;
6805
6806 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6807 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6808
6809 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
6810 // zlen => rcx
6811 // r9 and r10 may be used to save non-volatile registers
6812 __masm-> movptr(r8, rdx);
6813 __masm-> square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
6814
6815 restore_arg_regs();
6816
6817 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6818 __masm-> ret(0);
6819
6820 return start;
6821 }
6822
6823 address generate_method_entry_barrier() {
6824 __masm-> align(CodeEntryAlignment);
6825 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
6826
6827 Label deoptimize_label;
6828
6829 address start = __masm-> pc();
6830
6831 __masm-> push(-1); // cookie, this is used for writing the new rsp when deoptimizing
6832
6833 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6834 __masm-> enter(); // save rbp
6835
6836 // save c_rarg0, because we want to use that value.
6837 // We could do without it but then we depend on the number of slots used by pusha
6838 __masm-> push(c_rarg0);
6839
6840 __masm-> lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
6841
6842 __masm-> pusha();
6843
6844 // The method may have floats as arguments, and we must spill them before calling
6845 // the VM runtime.
6846 assert(Argument::n_float_register_parameters_j == 8, "Assumption")do { if (!(Argument::n_float_register_parameters_j == 8)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6846, "assert(" "Argument::n_float_register_parameters_j == 8"
") failed", "Assumption"); ::breakpoint(); } } while (0)
;
6847 const int xmm_size = wordSize * 2;
6848 const int xmm_spill_size = xmm_size * Argument::n_float_register_parameters_j;
6849 __masm-> subptr(rsp, xmm_spill_size);
6850 __masm-> movdqu(Address(rsp, xmm_size * 7), xmm7);
6851 __masm-> movdqu(Address(rsp, xmm_size * 6), xmm6);
6852 __masm-> movdqu(Address(rsp, xmm_size * 5), xmm5);
6853 __masm-> movdqu(Address(rsp, xmm_size * 4), xmm4);
6854 __masm-> movdqu(Address(rsp, xmm_size * 3), xmm3);
6855 __masm-> movdqu(Address(rsp, xmm_size * 2), xmm2);
6856 __masm-> movdqu(Address(rsp, xmm_size * 1), xmm1);
6857 __masm-> movdqu(Address(rsp, xmm_size * 0), xmm0);
6858
6859 __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier))((address)((address_word)(static_cast<int (*)(address*)>
(BarrierSetNMethod::nmethod_stub_entry_barrier))))
, 1);
6860
6861 __masm-> movdqu(xmm0, Address(rsp, xmm_size * 0));
6862 __masm-> movdqu(xmm1, Address(rsp, xmm_size * 1));
6863 __masm-> movdqu(xmm2, Address(rsp, xmm_size * 2));
6864 __masm-> movdqu(xmm3, Address(rsp, xmm_size * 3));
6865 __masm-> movdqu(xmm4, Address(rsp, xmm_size * 4));
6866 __masm-> movdqu(xmm5, Address(rsp, xmm_size * 5));
6867 __masm-> movdqu(xmm6, Address(rsp, xmm_size * 6));
6868 __masm-> movdqu(xmm7, Address(rsp, xmm_size * 7));
6869 __masm-> addptr(rsp, xmm_spill_size);
6870
6871 __masm-> cmpl(rax, 1); // 1 means deoptimize
6872 __masm-> jcc(Assembler::equal, deoptimize_label);
6873
6874 __masm-> popa();
6875 __masm-> pop(c_rarg0);
6876
6877 __masm-> leave();
6878
6879 __masm-> addptr(rsp, 1 * wordSize); // cookie
6880 __masm-> ret(0);
6881
6882
6883 __masm-> BIND(deoptimize_label)bind(deoptimize_label); masm-> block_comment("deoptimize_label"
":")
;
6884
6885 __masm-> popa();
6886 __masm-> pop(c_rarg0);
6887
6888 __masm-> leave();
6889
6890 // this can be taken out, but is good for verification purposes. getting a SIGSEGV
6891 // here while still having a correct stack is valuable
6892 __masm-> testptr(rsp, Address(rsp, 0));
6893
6894 __masm-> movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier
6895 __masm-> jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point
6896
6897 return start;
6898 }
6899
6900 /**
6901 * Arguments:
6902 *
6903 * Input:
6904 * c_rarg0 - out address
6905 * c_rarg1 - in address
6906 * c_rarg2 - offset
6907 * c_rarg3 - len
6908 * not Win64
6909 * c_rarg4 - k
6910 * Win64
6911 * rsp+40 - k
6912 */
6913 address generate_mulAdd() {
6914 __masm-> align(CodeEntryAlignment);
6915 StubCodeMark mark(this, "StubRoutines", "mulAdd");
6916
6917 address start = __masm-> pc();
6918 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6919 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6920 const Register out = rdi;
6921 const Register in = rsi;
6922 const Register offset = r11;
6923 const Register len = rcx;
6924 const Register k = r8;
6925
6926 // Next registers will be saved on stack in mul_add().
6927 const Register tmp1 = r12;
6928 const Register tmp2 = r13;
6929 const Register tmp3 = r14;
6930 const Register tmp4 = r15;
6931 const Register tmp5 = rbx;
6932
6933 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6934 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6935
6936 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
6937 // len => rcx, k => r8
6938 // r9 and r10 may be used to save non-volatile registers
6939#ifdef _WIN64
6940 // last argument is on stack on Win64
6941 __masm-> movl(k, Address(rsp, 6 * wordSize));
6942#endif
6943 __masm-> movptr(r11, rdx); // move offset in rdx to offset(r11)
6944 __masm-> mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
6945
6946 restore_arg_regs();
6947
6948 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6949 __masm-> ret(0);
6950
6951 return start;
6952 }
6953
6954 address generate_bigIntegerRightShift() {
6955 __masm-> align(CodeEntryAlignment);
6956 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
6957
6958 address start = __masm-> pc();
6959 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
6960 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
6961 const Register newArr = rdi;
6962 const Register oldArr = rsi;
6963 const Register newIdx = rdx;
6964 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
6965 const Register totalNumIter = r8;
6966
6967 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
6968 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
6969 const Register tmp1 = r11; // Caller save.
6970 const Register tmp2 = rax; // Caller save.
6971 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save.
6972 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save.
6973 const Register tmp5 = r14; // Callee save.
6974 const Register tmp6 = r15;
6975
6976 const XMMRegister x0 = xmm0;
6977 const XMMRegister x1 = xmm1;
6978 const XMMRegister x2 = xmm2;
6979
6980 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6981 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6982
6983#ifdef _WINDOWS
6984 setup_arg_regs(4);
6985 // For windows, since last argument is on stack, we need to move it to the appropriate register.
6986 __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize));
6987 // Save callee save registers.
6988 __masm-> push(tmp3);
6989 __masm-> push(tmp4);
6990#endif
6991 __masm-> push(tmp5);
6992
6993 // Rename temps used throughout the code.
6994 const Register idx = tmp1;
6995 const Register nIdx = tmp2;
6996
6997 __masm-> xorl(idx, idx);
6998
6999 // Start right shift from end of the array.
7000 // For example, if #iteration = 4 and newIdx = 1
7001 // then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
7002 // if #iteration = 4 and newIdx = 0
7003 // then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
7004 __masm-> movl(idx, totalNumIter);
7005 __masm-> movl(nIdx, idx);
7006 __masm-> addl(nIdx, newIdx);
7007
7008 // If vectorization is enabled, check if the number of iterations is at least 64
7009 // If not, then go to ShifTwo processing 2 iterations
7010 if (VM_Version::supports_avx512_vbmi2()) {
7011 __masm-> cmpptr(totalNumIter, (AVX3Threshold/64));
7012 __masm-> jcc(Assembler::less, ShiftTwo);
7013
7014 if (AVX3Threshold < 16 * 64) {
7015 __masm-> cmpl(totalNumIter, 16);
7016 __masm-> jcc(Assembler::less, ShiftTwo);
7017 }
7018 __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
7019 __masm-> subl(idx, 16);
7020 __masm-> subl(nIdx, 16);
7021 __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":"
)
;
7022 __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
7023 __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
7024 __masm-> vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
7025 __masm-> evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
7026 __masm-> subl(nIdx, 16);
7027 __masm-> subl(idx, 16);
7028 __masm-> jcc(Assembler::greaterEqual, Shift512Loop);
7029 __masm-> addl(idx, 16);
7030 __masm-> addl(nIdx, 16);
7031 }
7032 __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":");
7033 __masm-> cmpl(idx, 2);
7034 __masm-> jcc(Assembler::less, ShiftOne);
7035 __masm-> subl(idx, 2);
7036 __masm-> subl(nIdx, 2);
7037 __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":"
)
;
7038 __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
7039 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
7040 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7041 __masm-> shrdl(tmp5, tmp4);
7042 __masm-> shrdl(tmp4, tmp3);
7043 __masm-> movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
7044 __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4);
7045 __masm-> subl(nIdx, 2);
7046 __masm-> subl(idx, 2);
7047 __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop);
7048 __masm-> addl(idx, 2);
7049 __masm-> addl(nIdx, 2);
7050
7051 // Do the last iteration
7052 __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":");
7053 __masm-> cmpl(idx, 1);
7054 __masm-> jcc(Assembler::less, Exit);
7055 __masm-> subl(idx, 1);
7056 __masm-> subl(nIdx, 1);
7057 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
7058 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7059 __masm-> shrdl(tmp4, tmp3);
7060 __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4);
7061 __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":");
7062 // Restore callee save registers.
7063 __masm-> pop(tmp5);
7064#ifdef _WINDOWS
7065 __masm-> pop(tmp4);
7066 __masm-> pop(tmp3);
7067 restore_arg_regs();
7068#endif
7069 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7070 __masm-> ret(0);
7071 return start;
7072 }
7073
7074 /**
7075 * Arguments:
7076 *
7077 * Input:
7078 * c_rarg0 - newArr address
7079 * c_rarg1 - oldArr address
7080 * c_rarg2 - newIdx
7081 * c_rarg3 - shiftCount
7082 * not Win64
7083 * c_rarg4 - numIter
7084 * Win64
7085 * rsp40 - numIter
7086 */
7087 address generate_bigIntegerLeftShift() {
7088 __masm-> align(CodeEntryAlignment);
7089 StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
7090 address start = __masm-> pc();
7091 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
7092 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
7093 const Register newArr = rdi;
7094 const Register oldArr = rsi;
7095 const Register newIdx = rdx;
7096 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
7097 const Register totalNumIter = r8;
7098 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
7099 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
7100 const Register tmp1 = r11; // Caller save.
7101 const Register tmp2 = rax; // Caller save.
7102 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save.
7103 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save.
7104 const Register tmp5 = r14; // Callee save.
7105
7106 const XMMRegister x0 = xmm0;
7107 const XMMRegister x1 = xmm1;
7108 const XMMRegister x2 = xmm2;
7109 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7110 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7111
7112#ifdef _WINDOWS
7113 setup_arg_regs(4);
7114 // For windows, since last argument is on stack, we need to move it to the appropriate register.
7115 __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize));
7116 // Save callee save registers.
7117 __masm-> push(tmp3);
7118 __masm-> push(tmp4);
7119#endif
7120 __masm-> push(tmp5);
7121
7122 // Rename temps used throughout the code
7123 const Register idx = tmp1;
7124 const Register numIterTmp = tmp2;
7125
7126 // Start idx from zero.
7127 __masm-> xorl(idx, idx);
7128 // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
7129 __masm-> lea(newArr, Address(newArr, newIdx, Address::times_4));
7130 __masm-> movl(numIterTmp, totalNumIter);
7131
7132 // If vectorization is enabled, check if the number of iterations is at least 64
7133 // If not, then go to ShiftTwo shifting two numbers at a time
7134 if (VM_Version::supports_avx512_vbmi2()) {
7135 __masm-> cmpl(totalNumIter, (AVX3Threshold/64));
7136 __masm-> jcc(Assembler::less, ShiftTwo);
7137
7138 if (AVX3Threshold < 16 * 64) {
7139 __masm-> cmpl(totalNumIter, 16);
7140 __masm-> jcc(Assembler::less, ShiftTwo);
7141 }
7142 __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
7143 __masm-> subl(numIterTmp, 16);
7144 __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":"
)
;
7145 __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
7146 __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
7147 __masm-> vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
7148 __masm-> evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
7149 __masm-> addl(idx, 16);
7150 __masm-> subl(numIterTmp, 16);
7151 __masm-> jcc(Assembler::greaterEqual, Shift512Loop);
7152 __masm-> addl(numIterTmp, 16);
7153 }
7154 __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":");
7155 __masm-> cmpl(totalNumIter, 1);
7156 __masm-> jcc(Assembler::less, Exit);
7157 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7158 __masm-> subl(numIterTmp, 2);
7159 __masm-> jcc(Assembler::less, ShiftOne);
7160
7161 __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":"
)
;
7162 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
7163 __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
7164 __masm-> shldl(tmp3, tmp4);
7165 __masm-> shldl(tmp4, tmp5);
7166 __masm-> movl(Address(newArr, idx, Address::times_4), tmp3);
7167 __masm-> movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
7168 __masm-> movl(tmp3, tmp5);
7169 __masm-> addl(idx, 2);
7170 __masm-> subl(numIterTmp, 2);
7171 __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop);
7172
7173 // Do the last iteration
7174 __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":");
7175 __masm-> addl(numIterTmp, 2);
7176 __masm-> cmpl(numIterTmp, 1);
7177 __masm-> jcc(Assembler::less, Exit);
7178 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
7179 __masm-> shldl(tmp3, tmp4);
7180 __masm-> movl(Address(newArr, idx, Address::times_4), tmp3);
7181
7182 __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":");
7183 // Restore callee save registers.
7184 __masm-> pop(tmp5);
7185#ifdef _WINDOWS
7186 __masm-> pop(tmp4);
7187 __masm-> pop(tmp3);
7188 restore_arg_regs();
7189#endif
7190 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7191 __masm-> ret(0);
7192 return start;
7193 }
7194
7195 address generate_libmExp() {
7196 StubCodeMark mark(this, "StubRoutines", "libmExp");
7197
7198 address start = __masm-> pc();
7199
7200 const XMMRegister x0 = xmm0;
7201 const XMMRegister x1 = xmm1;
7202 const XMMRegister x2 = xmm2;
7203 const XMMRegister x3 = xmm3;
7204
7205 const XMMRegister x4 = xmm4;
7206 const XMMRegister x5 = xmm5;
7207 const XMMRegister x6 = xmm6;
7208 const XMMRegister x7 = xmm7;
7209
7210 const Register tmp = r11;
7211
7212 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7213 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7214
7215 __masm-> fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
7216
7217 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7218 __masm-> ret(0);
7219
7220 return start;
7221
7222 }
7223
7224 address generate_libmLog() {
7225 StubCodeMark mark(this, "StubRoutines", "libmLog");
7226
7227 address start = __masm-> pc();
7228
7229 const XMMRegister x0 = xmm0;
7230 const XMMRegister x1 = xmm1;
7231 const XMMRegister x2 = xmm2;
7232 const XMMRegister x3 = xmm3;
7233
7234 const XMMRegister x4 = xmm4;
7235 const XMMRegister x5 = xmm5;
7236 const XMMRegister x6 = xmm6;
7237 const XMMRegister x7 = xmm7;
7238
7239 const Register tmp1 = r11;
7240 const Register tmp2 = r8;
7241
7242 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7243 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7244
7245 __masm-> fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
7246
7247 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7248 __masm-> ret(0);
7249
7250 return start;
7251
7252 }
7253
7254 address generate_libmLog10() {
7255 StubCodeMark mark(this, "StubRoutines", "libmLog10");
7256
7257 address start = __masm-> pc();
7258
7259 const XMMRegister x0 = xmm0;
7260 const XMMRegister x1 = xmm1;
7261 const XMMRegister x2 = xmm2;
7262 const XMMRegister x3 = xmm3;
7263
7264 const XMMRegister x4 = xmm4;
7265 const XMMRegister x5 = xmm5;
7266 const XMMRegister x6 = xmm6;
7267 const XMMRegister x7 = xmm7;
7268
7269 const Register tmp = r11;
7270
7271 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7272 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7273
7274 __masm-> fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
7275
7276 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7277 __masm-> ret(0);
7278
7279 return start;
7280
7281 }
7282
7283 address generate_libmPow() {
7284 StubCodeMark mark(this, "StubRoutines", "libmPow");
7285
7286 address start = __masm-> pc();
7287
7288 const XMMRegister x0 = xmm0;
7289 const XMMRegister x1 = xmm1;
7290 const XMMRegister x2 = xmm2;
7291 const XMMRegister x3 = xmm3;
7292
7293 const XMMRegister x4 = xmm4;
7294 const XMMRegister x5 = xmm5;
7295 const XMMRegister x6 = xmm6;
7296 const XMMRegister x7 = xmm7;
7297
7298 const Register tmp1 = r8;
7299 const Register tmp2 = r9;
7300 const Register tmp3 = r10;
7301 const Register tmp4 = r11;
7302
7303 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7304 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7305
7306 __masm-> fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7307
7308 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7309 __masm-> ret(0);
7310
7311 return start;
7312
7313 }
7314
7315 address generate_libmSin() {
7316 StubCodeMark mark(this, "StubRoutines", "libmSin");
7317
7318 address start = __masm-> pc();
7319
7320 const XMMRegister x0 = xmm0;
7321 const XMMRegister x1 = xmm1;
7322 const XMMRegister x2 = xmm2;
7323 const XMMRegister x3 = xmm3;
7324
7325 const XMMRegister x4 = xmm4;
7326 const XMMRegister x5 = xmm5;
7327 const XMMRegister x6 = xmm6;
7328 const XMMRegister x7 = xmm7;
7329
7330 const Register tmp1 = r8;
7331 const Register tmp2 = r9;
7332 const Register tmp3 = r10;
7333 const Register tmp4 = r11;
7334
7335 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7336 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7337
7338#ifdef _WIN64
7339 __masm-> push(rsi);
7340 __masm-> push(rdi);
7341#endif
7342 __masm-> fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7343
7344#ifdef _WIN64
7345 __masm-> pop(rdi);
7346 __masm-> pop(rsi);
7347#endif
7348
7349 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7350 __masm-> ret(0);
7351
7352 return start;
7353
7354 }
7355
7356 address generate_libmCos() {
7357 StubCodeMark mark(this, "StubRoutines", "libmCos");
7358
7359 address start = __masm-> pc();
7360
7361 const XMMRegister x0 = xmm0;
7362 const XMMRegister x1 = xmm1;
7363 const XMMRegister x2 = xmm2;
7364 const XMMRegister x3 = xmm3;
7365
7366 const XMMRegister x4 = xmm4;
7367 const XMMRegister x5 = xmm5;
7368 const XMMRegister x6 = xmm6;
7369 const XMMRegister x7 = xmm7;
7370
7371 const Register tmp1 = r8;
7372 const Register tmp2 = r9;
7373 const Register tmp3 = r10;
7374 const Register tmp4 = r11;
7375
7376 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7377 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7378
7379#ifdef _WIN64
7380 __masm-> push(rsi);
7381 __masm-> push(rdi);
7382#endif
7383 __masm-> fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7384
7385#ifdef _WIN64
7386 __masm-> pop(rdi);
7387 __masm-> pop(rsi);
7388#endif
7389
7390 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7391 __masm-> ret(0);
7392
7393 return start;
7394
7395 }
7396
7397 address generate_libmTan() {
7398 StubCodeMark mark(this, "StubRoutines", "libmTan");
7399
7400 address start = __masm-> pc();
7401
7402 const XMMRegister x0 = xmm0;
7403 const XMMRegister x1 = xmm1;
7404 const XMMRegister x2 = xmm2;
7405 const XMMRegister x3 = xmm3;
7406
7407 const XMMRegister x4 = xmm4;
7408 const XMMRegister x5 = xmm5;
7409 const XMMRegister x6 = xmm6;
7410 const XMMRegister x7 = xmm7;
7411
7412 const Register tmp1 = r8;
7413 const Register tmp2 = r9;
7414 const Register tmp3 = r10;
7415 const Register tmp4 = r11;
7416
7417 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7418 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7419
7420#ifdef _WIN64
7421 __masm-> push(rsi);
7422 __masm-> push(rdi);
7423#endif
7424 __masm-> fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7425
7426#ifdef _WIN64
7427 __masm-> pop(rdi);
7428 __masm-> pop(rsi);
7429#endif
7430
7431 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7432 __masm-> ret(0);
7433
7434 return start;
7435
7436 }
7437
7438#undef __masm->
7439#define __masm-> masm->
7440
7441 // Continuation point for throwing of implicit exceptions that are
7442 // not handled in the current activation. Fabricates an exception
7443 // oop and initiates normal exception dispatching in this
7444 // frame. Since we need to preserve callee-saved values (currently
7445 // only for C2, but done for C1 as well) we need a callee-saved oop
7446 // map and therefore have to make these stubs into RuntimeStubs
7447 // rather than BufferBlobs. If the compiler needs all registers to
7448 // be preserved between the fault point and the exception handler
7449 // then it must assume responsibility for that in
7450 // AbstractCompiler::continuation_for_implicit_null_exception or
7451 // continuation_for_implicit_division_by_zero_exception. All other
7452 // implicit exceptions (e.g., NullPointerException or
7453 // AbstractMethodError on entry) are either at call sites or
7454 // otherwise assume that stack unwinding will be initiated, so
7455 // caller saved registers were assumed volatile in the compiler.
7456 address generate_throw_exception(const char* name,
7457 address runtime_entry,
7458 Register arg1 = noreg,
7459 Register arg2 = noreg) {
7460 // Information about frame layout at time of blocking runtime call.
7461 // Note that we only have to preserve callee-saved registers since
7462 // the compilers are responsible for supplying a continuation point
7463 // if they expect all registers to be preserved.
7464 enum layout {
7465 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
7466 rbp_off2,
7467 return_off,
7468 return_off2,
7469 framesize // inclusive of return address
7470 };
7471
7472 int insts_size = 512;
7473 int locs_size = 64;
7474
7475 CodeBuffer code(name, insts_size, locs_size);
7476 OopMapSet* oop_maps = new OopMapSet();
7477 MacroAssembler* masm = new MacroAssembler(&code);
7478
7479 address start = __masm-> pc();
7480
7481 // This is an inlined and slightly modified version of call_VM
7482 // which has the ability to fetch the return PC out of
7483 // thread-local storage and also sets up last_Java_sp slightly
7484 // differently than the real call_VM
7485
7486 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7487
7488 assert(is_even(framesize/2), "sp not 16-byte aligned")do { if (!(is_even(framesize/2))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 7488, "assert(" "is_even(framesize/2)" ") failed", "sp not 16-byte aligned"
); ::breakpoint(); } } while (0)
;
7489
7490 // return address and rbp are already in place
7491 __masm-> subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
7492
7493 int frame_complete = __masm-> pc() - start;
7494
7495 // Set up last_Java_sp and last_Java_fp
7496 address the_pc = __masm-> pc();
7497 __masm-> set_last_Java_frame(rsp, rbp, the_pc);
7498 __masm-> andptr(rsp, -(StackAlignmentInBytes)); // Align stack
7499
7500 // Call runtime
7501 if (arg1 != noreg) {
7502 assert(arg2 != c_rarg1, "clobbered")do { if (!(arg2 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 7502, "assert(" "arg2 != c_rarg1" ") failed", "clobbered");
::breakpoint(); } } while (0)
;
7503 __masm-> movptr(c_rarg1, arg1);
7504 }
7505 if (arg2 != noreg) {
7506 __masm-> movptr(c_rarg2, arg2);
7507 }
7508 __masm-> movptr(c_rarg0, r15_thread);
7509 BLOCK_COMMENT("call runtime_entry")masm-> block_comment("call runtime_entry");
7510 __masm-> call(RuntimeAddress(runtime_entry));
7511
7512 // Generate oop map
7513 OopMap* map = new OopMap(framesize, 0);
7514
7515 oop_maps->add_gc_map(the_pc - start, map);
7516
7517 __masm-> reset_last_Java_frame(true);
7518
7519 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7520
7521 // check for pending exceptions
7522#ifdef ASSERT1
7523 Label L;
7524 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()),
7525 (int32_t) NULL_WORD0L);
7526 __masm-> jcc(Assembler::notEqual, L);
7527 __masm-> should_not_reach_here();
7528 __masm-> bind(L);
7529#endif // ASSERT
7530 __masm-> jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
7531
7532
7533 // codeBlob framesize is in words (not VMRegImpl::slot_size)
7534 RuntimeStub* stub =
7535 RuntimeStub::new_runtime_stub(name,
7536 &code,
7537 frame_complete,
7538 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
7539 oop_maps, false);
7540 return stub->entry_point();
7541 }
7542
7543 void create_control_words() {
7544 // Round to nearest, 64-bit mode, exceptions masked
7545 StubRoutines::x86::_mxcsr_std = 0x1F80;
7546 }
7547
7548 // Initialization
7549 void generate_initial() {
7550 // Generates all stubs and initializes the entry points
7551
7552 // This platform-specific settings are needed by generate_call_stub()
7553 create_control_words();
7554
7555 // entry points that exist in all platforms Note: This is code
7556 // that could be shared among different platforms - however the
7557 // benefit seems to be smaller than the disadvantage of having a
7558 // much more complicated generator structure. See also comment in
7559 // stubRoutines.hpp.
7560
7561 StubRoutines::_forward_exception_entry = generate_forward_exception();
7562
7563 StubRoutines::_call_stub_entry =
7564 generate_call_stub(StubRoutines::_call_stub_return_address);
7565
7566 // is referenced by megamorphic call
7567 StubRoutines::_catch_exception_entry = generate_catch_exception();
7568
7569 // atomic calls
7570 StubRoutines::_fence_entry = generate_orderaccess_fence();
7571
7572 // platform dependent
7573 StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp();
7574
7575 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
7576
7577 StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
7578 StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
7579 StubRoutines::x86::_d2i_fixup = generate_d2i_fixup();
7580 StubRoutines::x86::_d2l_fixup = generate_d2l_fixup();
7581
7582 StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
7583 StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
7584 StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
7585 StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
7586
7587 // Build this early so it's available for the interpreter.
7588 StubRoutines::_throw_StackOverflowError_entry =
7589 generate_throw_exception("StackOverflowError throw_exception",
7590 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
7591 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
7592 throw_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
);
7593 StubRoutines::_throw_delayed_StackOverflowError_entry =
7594 generate_throw_exception("delayed StackOverflowError throw_exception",
7595 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
7596 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
7597 throw_delayed_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
);
7598 if (UseCRC32Intrinsics) {
7599 // set table address before stub generation which use it
7600 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
7601 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
7602 }
7603
7604 if (UseCRC32CIntrinsics) {
7605 bool supports_clmul = VM_Version::supports_clmul();
7606 StubRoutines::x86::generate_CRC32C_table(supports_clmul);
7607 StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
7608 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
7609 }
7610
7611 if (UseAdler32Intrinsics) {
7612 StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
7613 }
7614
7615 if (UseLibmIntrinsic && InlineIntrinsics) {
7616 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
7617 vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
7618 vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
7619 StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
7620 StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
7621 StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
7622 StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
7623 StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
7624 StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
7625 StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
7626 StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
7627 StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
7628 StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
7629 StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
7630 StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
7631 StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
7632 StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
7633 }
7634 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
7635 StubRoutines::_dexp = generate_libmExp();
7636 }
7637 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
7638 StubRoutines::_dlog = generate_libmLog();
7639 }
7640 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) {
7641 StubRoutines::_dlog10 = generate_libmLog10();
7642 }
7643 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) {
7644 StubRoutines::_dpow = generate_libmPow();
7645 }
7646 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
7647 StubRoutines::_dsin = generate_libmSin();
7648 }
7649 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
7650 StubRoutines::_dcos = generate_libmCos();
7651 }
7652 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
7653 StubRoutines::_dtan = generate_libmTan();
7654 }
7655 }
7656
7657 // Safefetch stubs.
7658 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
7659 &StubRoutines::_safefetch32_fault_pc,
7660 &StubRoutines::_safefetch32_continuation_pc);
7661 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
7662 &StubRoutines::_safefetchN_fault_pc,
7663 &StubRoutines::_safefetchN_continuation_pc);
7664 }
7665
7666 void generate_all() {
7667 // Generates all stubs and initializes the entry points
7668
7669 // These entry points require SharedInfo::stack0 to be set up in
7670 // non-core builds and need to be relocatable, so they each
7671 // fabricate a RuntimeStub internally.
7672 StubRoutines::_throw_AbstractMethodError_entry =
7673 generate_throw_exception("AbstractMethodError throw_exception",
7674 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
7675 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
7676 throw_AbstractMethodError)((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
);
7677
7678 StubRoutines::_throw_IncompatibleClassChangeError_entry =
7679 generate_throw_exception("IncompatibleClassChangeError throw_exception",
7680 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
7681 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
7682 throw_IncompatibleClassChangeError)((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
);
7683
7684 StubRoutines::_throw_NullPointerException_at_call_entry =
7685 generate_throw_exception("NullPointerException at call throw_exception",
7686 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
7687 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
7688 throw_NullPointerException_at_call)((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
);
7689
7690 // entry points that are platform specific
7691 StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF);
7692 StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
7693 StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
7694 StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
7695 StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
7696 StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x0000000100000001);
7697 StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
7698 StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
7699 StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);
7700 StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff);
7701 StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
7702 0xFFFFFFFF, 0, 0, 0);
7703 StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
7704 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
7705 StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100);
7706 StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask("vector_byte_shuffle_mask");
7707 StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100);
7708 StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
7709 StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
7710 StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
7711
7712 // support for verify_oop (must happen after universe_init)
7713 if (VerifyOops) {
7714 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
7715 }
7716
7717 // data cache line writeback
7718 StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
7719 StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
7720
7721 // arraycopy stubs used by compilers
7722 generate_arraycopy_stubs();
7723
7724 // don't bother generating these AES intrinsic stubs unless global flag is set
7725 if (UseAESIntrinsics) {
7726 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
7727 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
7728 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
7729 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
7730 if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
7731 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
7732 StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
7733 StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
7734 StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7735 StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
7736 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7737 StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
7738 } else {
7739 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
7740 }
7741 }
7742
7743 if (UseAESCTRIntrinsics) {
7744 if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
7745 if (StubRoutines::x86::_counter_mask_addr == NULL__null) {
7746 StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7747 }
7748 StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
7749 } else {
7750 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
7751 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
7752 }
7753 }
7754
7755 if (UseMD5Intrinsics) {
7756 StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
7757 StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
7758 }
7759 if (UseSHA1Intrinsics) {
7760 StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
7761 StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
7762 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
7763 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
7764 }
7765 if (UseSHA256Intrinsics) {
7766 StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
7767 char* dst = (char*)StubRoutines::x86::_k256_W;
7768 char* src = (char*)StubRoutines::x86::_k256;
7769 for (int ii = 0; ii < 16; ++ii) {
7770 memcpy(dst + 32 * ii, src + 16 * ii, 16);
7771 memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
7772 }
7773 StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
7774 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
7775 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
7776 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
7777 }
7778 if (UseSHA512Intrinsics) {
7779 StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W;
7780 StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512();
7781 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
7782 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
7783 }
7784
7785 // Generate GHASH intrinsics code
7786 if (UseGHASHIntrinsics) {
7787 if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL__null) {
7788 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7789 }
7790 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
7791 if (VM_Version::supports_avx()) {
7792 StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();
7793 StubRoutines::x86::_ghash_poly_addr = ghash_polynomial_addr();
7794 StubRoutines::_ghash_processBlocks = generate_avx_ghash_processBlocks();
7795 } else {
7796 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
7797 }
7798 }
7799
7800
7801 if (UseBASE64Intrinsics) {
7802 if(VM_Version::supports_avx2() &&
7803 VM_Version::supports_avx512bw() &&
7804 VM_Version::supports_avx512vl()) {
7805 StubRoutines::x86::_avx2_shuffle_base64 = base64_avx2_shuffle_addr();
7806 StubRoutines::x86::_avx2_input_mask_base64 = base64_avx2_input_mask_addr();
7807 StubRoutines::x86::_avx2_lut_base64 = base64_avx2_lut_addr();
7808 }
7809 StubRoutines::x86::_encoding_table_base64 = base64_encoding_table_addr();
7810 if (VM_Version::supports_avx512_vbmi()) {
7811 StubRoutines::x86::_shuffle_base64 = base64_shuffle_addr();
7812 StubRoutines::x86::_lookup_lo_base64 = base64_vbmi_lookup_lo_addr();
7813 StubRoutines::x86::_lookup_hi_base64 = base64_vbmi_lookup_hi_addr();
7814 StubRoutines::x86::_lookup_lo_base64url = base64_vbmi_lookup_lo_url_addr();
7815 StubRoutines::x86::_lookup_hi_base64url = base64_vbmi_lookup_hi_url_addr();
7816 StubRoutines::x86::_pack_vec_base64 = base64_vbmi_pack_vec_addr();
7817 StubRoutines::x86::_join_0_1_base64 = base64_vbmi_join_0_1_addr();
7818 StubRoutines::x86::_join_1_2_base64 = base64_vbmi_join_1_2_addr();
7819 StubRoutines::x86::_join_2_3_base64 = base64_vbmi_join_2_3_addr();
7820 }
7821 StubRoutines::x86::_decoding_table_base64 = base64_decoding_table_addr();
7822 StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
7823 StubRoutines::_base64_decodeBlock = generate_base64_decodeBlock();
7824 }
7825
7826 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
7827 if (bs_nm != NULL__null) {
7828 StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
7829 }
7830#ifdef COMPILER21
7831 if (UseMultiplyToLenIntrinsic) {
7832 StubRoutines::_multiplyToLen = generate_multiplyToLen();
7833 }
7834 if (UseSquareToLenIntrinsic) {
7835 StubRoutines::_squareToLen = generate_squareToLen();
7836 }
7837 if (UseMulAddIntrinsic) {
7838 StubRoutines::_mulAdd = generate_mulAdd();
7839 }
7840 if (VM_Version::supports_avx512_vbmi2()) {
7841 StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
7842 StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
7843 }
7844 if (UseMontgomeryMultiplyIntrinsic) {
7845 StubRoutines::_montgomeryMultiply
7846 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply)((address)((address_word)(SharedRuntime::montgomery_multiply)
))
;
7847 }
7848 if (UseMontgomerySquareIntrinsic) {
7849 StubRoutines::_montgomerySquare
7850 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square)((address)((address_word)(SharedRuntime::montgomery_square)));
7851 }
7852
7853 // Get svml stub routine addresses
7854 void *libjsvml = NULL__null;
7855 char ebuf[1024];
7856 char dll_name[JVM_MAXPATHLEN4096 + 1];
7857 if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "jsvml")) {
7858 libjsvml = os::dll_load(dll_name, ebuf, sizeof ebuf);
7859 }
7860 if (libjsvml != NULL__null) {
7861 // SVML method naming convention
7862 // All the methods are named as __jsvml_op<T><N>_ha_<VV>
7863 // Where:
7864 // ha stands for high accuracy
7865 // <T> is optional to indicate float/double
7866 // Set to f for vector float operation
7867 // Omitted for vector double operation
7868 // <N> is the number of elements in the vector
7869 // 1, 2, 4, 8, 16
7870 // e.g. 128 bit float vector has 4 float elements
7871 // <VV> indicates the avx/sse level:
7872 // z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2
7873 // e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns
7874 // __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns
7875
7876 log_info(library)(!(LogImpl<(LogTag::_library), (LogTag::__NO_TAG), (LogTag
::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::
__NO_TAG)>::is_level(LogLevel::Info))) ? (void)0 : LogImpl
<(LogTag::_library), (LogTag::__NO_TAG), (LogTag::__NO_TAG
), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)
>::write<LogLevel::Info>
("Loaded library %s, handle " INTPTR_FORMAT"0x%016" "l" "x", JNI_LIB_PREFIX"lib" "jsvml" JNI_LIB_SUFFIX".so", p2i(libjsvml));
7877 if (UseAVX > 2) {
7878 for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7879 int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7880 if ((!VM_Version::supports_avx512dq()) &&
7881 (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
7882 continue;
7883 }
7884 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
7885 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
7886
7887 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
7888 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
7889 }
7890 }
7891 const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
7892 for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7893 int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7894 if (vop == VectorSupport::VECTOR_OP_POW) {
7895 continue;
7896 }
7897 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7898 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
7899
7900 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7901 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
7902
7903 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7904 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
7905
7906 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7907 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
7908
7909 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7910 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
7911
7912 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7913 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
7914 }
7915 }
7916#endif // COMPILER2
7917
7918 if (UseVectorizedMismatchIntrinsic) {
7919 StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
7920 }
7921 }
7922
7923 public:
7924 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
7925 if (all) {
7926 generate_all();
7927 } else {
7928 generate_initial();
7929 }
7930 }
7931}; // end class declaration
7932
7933#define UCM_TABLE_MAX_ENTRIES16 16
7934void StubGenerator_generate(CodeBuffer* code, bool all) {
7935 if (UnsafeCopyMemory::_table == NULL__null) {
7936 UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES16);
7937 }
7938 StubGenerator g(code, all);
7939}