Bug Summary

File:jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp
Warning:line 1158, column 9
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name stubGenerator_x86_64.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

1/*
2 * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "ci/ciUtilities.hpp"
29#include "compiler/oopMap.hpp"
30#include "gc/shared/barrierSet.hpp"
31#include "gc/shared/barrierSetAssembler.hpp"
32#include "gc/shared/barrierSetNMethod.hpp"
33#include "gc/shared/gc_globals.hpp"
34#include "interpreter/interpreter.hpp"
35#include "memory/universe.hpp"
36#include "nativeInst_x86.hpp"
37#include "oops/instanceOop.hpp"
38#include "oops/method.hpp"
39#include "oops/objArrayKlass.hpp"
40#include "oops/oop.inline.hpp"
41#include "prims/methodHandles.hpp"
42#include "runtime/arguments.hpp"
43#include "runtime/frame.inline.hpp"
44#include "runtime/handles.inline.hpp"
45#include "runtime/sharedRuntime.hpp"
46#include "runtime/stubCodeGenerator.hpp"
47#include "runtime/stubRoutines.hpp"
48#include "runtime/thread.inline.hpp"
49#ifdef COMPILER21
50#include "opto/runtime.hpp"
51#endif
52#if INCLUDE_JVMCI1
53#include "jvmci/jvmci_globals.hpp"
54#endif
55#if INCLUDE_ZGC1
56#include "gc/z/zThreadLocalData.hpp"
57#endif
58
59// Declaration and definition of StubGenerator (no .hpp file).
60// For a more detailed description of the stub routine structure
61// see the comment in stubRoutines.hpp
62
63#define __masm-> _masm->
64#define TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8) (UseCompressedOops ? Address::times_4 : Address::times_8)
65#define a__((Assembler*)_masm)-> ((Assembler*)_masm)->
66
67#ifdef PRODUCT
68#define BLOCK_COMMENT(str)masm-> block_comment(str) /* nothing */
69#else
70#define BLOCK_COMMENT(str)masm-> block_comment(str) __masm-> block_comment(str)
71#endif
72
73#define BIND(label)bind(label); masm-> block_comment("label" ":") bind(label); BLOCK_COMMENT(#label ":")masm-> block_comment(#label ":")
74const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
75
76// Stub Code definitions
77
78class StubGenerator: public StubCodeGenerator {
79 private:
80
81#ifdef PRODUCT
82#define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_
(counter);
((void)0)
83#else
84 void inc_counter_np_(int& counter) {
85 // This can destroy rscratch1 if counter is far from the code cache
86 __masm-> incrementl(ExternalAddress((address)&counter));
87 }
88#define inc_counter_np(counter)masm-> block_comment("inc_counter " "counter"); inc_counter_np_
(counter);
\
89 BLOCK_COMMENT("inc_counter " #counter)masm-> block_comment("inc_counter " #counter); \
90 inc_counter_np_(counter);
91#endif
92
93 // Call stubs are used to call Java from C
94 //
95 // Linux Arguments:
96 // c_rarg0: call wrapper address address
97 // c_rarg1: result address
98 // c_rarg2: result type BasicType
99 // c_rarg3: method Method*
100 // c_rarg4: (interpreter) entry point address
101 // c_rarg5: parameters intptr_t*
102 // 16(rbp): parameter size (in words) int
103 // 24(rbp): thread Thread*
104 //
105 // [ return_from_Java ] <--- rsp
106 // [ argument word n ]
107 // ...
108 // -12 [ argument word 1 ]
109 // -11 [ saved r15 ] <--- rsp_after_call
110 // -10 [ saved r14 ]
111 // -9 [ saved r13 ]
112 // -8 [ saved r12 ]
113 // -7 [ saved rbx ]
114 // -6 [ call wrapper ]
115 // -5 [ result ]
116 // -4 [ result type ]
117 // -3 [ method ]
118 // -2 [ entry point ]
119 // -1 [ parameters ]
120 // 0 [ saved rbp ] <--- rbp
121 // 1 [ return address ]
122 // 2 [ parameter size ]
123 // 3 [ thread ]
124 //
125 // Windows Arguments:
126 // c_rarg0: call wrapper address address
127 // c_rarg1: result address
128 // c_rarg2: result type BasicType
129 // c_rarg3: method Method*
130 // 48(rbp): (interpreter) entry point address
131 // 56(rbp): parameters intptr_t*
132 // 64(rbp): parameter size (in words) int
133 // 72(rbp): thread Thread*
134 //
135 // [ return_from_Java ] <--- rsp
136 // [ argument word n ]
137 // ...
138 // -60 [ argument word 1 ]
139 // -59 [ saved xmm31 ] <--- rsp after_call
140 // [ saved xmm16-xmm30 ] (EVEX enabled, else the space is blank)
141 // -27 [ saved xmm15 ]
142 // [ saved xmm7-xmm14 ]
143 // -9 [ saved xmm6 ] (each xmm register takes 2 slots)
144 // -7 [ saved r15 ]
145 // -6 [ saved r14 ]
146 // -5 [ saved r13 ]
147 // -4 [ saved r12 ]
148 // -3 [ saved rdi ]
149 // -2 [ saved rsi ]
150 // -1 [ saved rbx ]
151 // 0 [ saved rbp ] <--- rbp
152 // 1 [ return address ]
153 // 2 [ call wrapper ]
154 // 3 [ result ]
155 // 4 [ result type ]
156 // 5 [ method ]
157 // 6 [ entry point ]
158 // 7 [ parameters ]
159 // 8 [ parameter size ]
160 // 9 [ thread ]
161 //
162 // Windows reserves the callers stack space for arguments 1-4.
163 // We spill c_rarg0-c_rarg3 to this space.
164
165 // Call stub stack layout word offsets from rbp
166 enum call_stub_layout {
167#ifdef _WIN64
168 xmm_save_first = 6, // save from xmm6
169 xmm_save_last = 31, // to xmm31
170 xmm_save_base = -9,
171 rsp_after_call_off = xmm_save_base - 2 * (xmm_save_last - xmm_save_first), // -27
172 r15_off = -7,
173 r14_off = -6,
174 r13_off = -5,
175 r12_off = -4,
176 rdi_off = -3,
177 rsi_off = -2,
178 rbx_off = -1,
179 rbp_off = 0,
180 retaddr_off = 1,
181 call_wrapper_off = 2,
182 result_off = 3,
183 result_type_off = 4,
184 method_off = 5,
185 entry_point_off = 6,
186 parameters_off = 7,
187 parameter_size_off = 8,
188 thread_off = 9
189#else
190 rsp_after_call_off = -12,
191 mxcsr_off = rsp_after_call_off,
192 r15_off = -11,
193 r14_off = -10,
194 r13_off = -9,
195 r12_off = -8,
196 rbx_off = -7,
197 call_wrapper_off = -6,
198 result_off = -5,
199 result_type_off = -4,
200 method_off = -3,
201 entry_point_off = -2,
202 parameters_off = -1,
203 rbp_off = 0,
204 retaddr_off = 1,
205 parameter_size_off = 2,
206 thread_off = 3
207#endif
208 };
209
210#ifdef _WIN64
211 Address xmm_save(int reg) {
212 assert(reg >= xmm_save_first && reg <= xmm_save_last, "XMM register number out of range")do { if (!(reg >= xmm_save_first && reg <= xmm_save_last
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 212, "assert(" "reg >= xmm_save_first && reg <= xmm_save_last"
") failed", "XMM register number out of range"); ::breakpoint
(); } } while (0)
;
213 return Address(rbp, (xmm_save_base - (reg - xmm_save_first) * 2) * wordSize);
214 }
215#endif
216
217 address generate_call_stub(address& return_address) {
218 assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
219 (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
220 "adjust this code")do { if (!((int)frame::entry_frame_after_call_words == -(int)
rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset
== (int)call_wrapper_off)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 220, "assert(" "(int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 && (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off"
") failed", "adjust this code"); ::breakpoint(); } } while (
0)
;
221 StubCodeMark mark(this, "StubRoutines", "call_stub");
222 address start = __masm-> pc();
223
224 // same as in generate_catch_exception()!
225 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
226
227 const Address call_wrapper (rbp, call_wrapper_off * wordSize);
228 const Address result (rbp, result_off * wordSize);
229 const Address result_type (rbp, result_type_off * wordSize);
230 const Address method (rbp, method_off * wordSize);
231 const Address entry_point (rbp, entry_point_off * wordSize);
232 const Address parameters (rbp, parameters_off * wordSize);
233 const Address parameter_size(rbp, parameter_size_off * wordSize);
234
235 // same as in generate_catch_exception()!
236 const Address thread (rbp, thread_off * wordSize);
237
238 const Address r15_save(rbp, r15_off * wordSize);
239 const Address r14_save(rbp, r14_off * wordSize);
240 const Address r13_save(rbp, r13_off * wordSize);
241 const Address r12_save(rbp, r12_off * wordSize);
242 const Address rbx_save(rbp, rbx_off * wordSize);
243
244 // stub code
245 __masm-> enter();
246 __masm-> subptr(rsp, -rsp_after_call_off * wordSize);
247
248 // save register parameters
249#ifndef _WIN64
250 __masm-> movptr(parameters, c_rarg5); // parameters
251 __masm-> movptr(entry_point, c_rarg4); // entry_point
252#endif
253
254 __masm-> movptr(method, c_rarg3); // method
255 __masm-> movl(result_type, c_rarg2); // result type
256 __masm-> movptr(result, c_rarg1); // result
257 __masm-> movptr(call_wrapper, c_rarg0); // call wrapper
258
259 // save regs belonging to calling function
260 __masm-> movptr(rbx_save, rbx);
261 __masm-> movptr(r12_save, r12);
262 __masm-> movptr(r13_save, r13);
263 __masm-> movptr(r14_save, r14);
264 __masm-> movptr(r15_save, r15);
265
266#ifdef _WIN64
267 int last_reg = 15;
268 if (UseAVX > 2) {
269 last_reg = 31;
270 }
271 if (VM_Version::supports_evex()) {
272 for (int i = xmm_save_first; i <= last_reg; i++) {
273 __masm-> vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
274 }
275 } else {
276 for (int i = xmm_save_first; i <= last_reg; i++) {
277 __masm-> movdqu(xmm_save(i), as_XMMRegister(i));
278 }
279 }
280
281 const Address rdi_save(rbp, rdi_off * wordSize);
282 const Address rsi_save(rbp, rsi_off * wordSize);
283
284 __masm-> movptr(rsi_save, rsi);
285 __masm-> movptr(rdi_save, rdi);
286#else
287 const Address mxcsr_save(rbp, mxcsr_off * wordSize);
288 {
289 Label skip_ldmx;
290 __masm-> stmxcsr(mxcsr_save);
291 __masm-> movl(rax, mxcsr_save);
292 __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits
293 ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
294 __masm-> cmp32(rax, mxcsr_std);
295 __masm-> jcc(Assembler::equal, skip_ldmx);
296 __masm-> ldmxcsr(mxcsr_std);
297 __masm-> bind(skip_ldmx);
298 }
299#endif
300
301 // Load up thread register
302 __masm-> movptr(r15_thread, thread);
303 __masm-> reinit_heapbase();
304
305#ifdef ASSERT1
306 // make sure we have no pending exceptions
307 {
308 Label L;
309 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L);
310 __masm-> jcc(Assembler::equal, L);
311 __masm-> stop("StubRoutines::call_stub: entered with pending exception");
312 __masm-> bind(L);
313 }
314#endif
315
316 // pass parameters if any
317 BLOCK_COMMENT("pass parameters if any")masm-> block_comment("pass parameters if any");
318 Label parameters_done;
319 __masm-> movl(c_rarg3, parameter_size);
320 __masm-> testl(c_rarg3, c_rarg3);
321 __masm-> jcc(Assembler::zero, parameters_done);
322
323 Label loop;
324 __masm-> movptr(c_rarg2, parameters); // parameter pointer
325 __masm-> movl(c_rarg1, c_rarg3); // parameter counter is in c_rarg1
326 __masm-> BIND(loop)bind(loop); masm-> block_comment("loop" ":");
327 __masm-> movptr(rax, Address(c_rarg2, 0));// get parameter
328 __masm-> addptr(c_rarg2, wordSize); // advance to next parameter
329 __masm-> decrementl(c_rarg1); // decrement counter
330 __masm-> push(rax); // pass parameter
331 __masm-> jcc(Assembler::notZero, loop);
332
333 // call Java function
334 __masm-> BIND(parameters_done)bind(parameters_done); masm-> block_comment("parameters_done"
":")
;
335 __masm-> movptr(rbx, method); // get Method*
336 __masm-> movptr(c_rarg1, entry_point); // get entry_point
337 __masm-> mov(r13, rsp); // set sender sp
338 BLOCK_COMMENT("call Java function")masm-> block_comment("call Java function");
339 __masm-> call(c_rarg1);
340
341 BLOCK_COMMENT("call_stub_return_address:")masm-> block_comment("call_stub_return_address:");
342 return_address = __masm-> pc();
343
344 // store result depending on type (everything that is not
345 // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
346 __masm-> movptr(c_rarg0, result);
347 Label is_long, is_float, is_double, exit;
348 __masm-> movl(c_rarg1, result_type);
349 __masm-> cmpl(c_rarg1, T_OBJECT);
350 __masm-> jcc(Assembler::equal, is_long);
351 __masm-> cmpl(c_rarg1, T_LONG);
352 __masm-> jcc(Assembler::equal, is_long);
353 __masm-> cmpl(c_rarg1, T_FLOAT);
354 __masm-> jcc(Assembler::equal, is_float);
355 __masm-> cmpl(c_rarg1, T_DOUBLE);
356 __masm-> jcc(Assembler::equal, is_double);
357
358 // handle T_INT case
359 __masm-> movl(Address(c_rarg0, 0), rax);
360
361 __masm-> BIND(exit)bind(exit); masm-> block_comment("exit" ":");
362
363 // pop parameters
364 __masm-> lea(rsp, rsp_after_call);
365
366#ifdef ASSERT1
367 // verify that threads correspond
368 {
369 Label L1, L2, L3;
370 __masm-> cmpptr(r15_thread, thread);
371 __masm-> jcc(Assembler::equal, L1);
372 __masm-> stop("StubRoutines::call_stub: r15_thread is corrupted");
373 __masm-> bind(L1);
374 __masm-> get_thread(rbx);
375 __masm-> cmpptr(r15_thread, thread);
376 __masm-> jcc(Assembler::equal, L2);
377 __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call");
378 __masm-> bind(L2);
379 __masm-> cmpptr(r15_thread, rbx);
380 __masm-> jcc(Assembler::equal, L3);
381 __masm-> stop("StubRoutines::call_stub: threads must correspond");
382 __masm-> bind(L3);
383 }
384#endif
385
386 // restore regs belonging to calling function
387#ifdef _WIN64
388 // emit the restores for xmm regs
389 if (VM_Version::supports_evex()) {
390 for (int i = xmm_save_first; i <= last_reg; i++) {
391 __masm-> vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
392 }
393 } else {
394 for (int i = xmm_save_first; i <= last_reg; i++) {
395 __masm-> movdqu(as_XMMRegister(i), xmm_save(i));
396 }
397 }
398#endif
399 __masm-> movptr(r15, r15_save);
400 __masm-> movptr(r14, r14_save);
401 __masm-> movptr(r13, r13_save);
402 __masm-> movptr(r12, r12_save);
403 __masm-> movptr(rbx, rbx_save);
404
405#ifdef _WIN64
406 __masm-> movptr(rdi, rdi_save);
407 __masm-> movptr(rsi, rsi_save);
408#else
409 __masm-> ldmxcsr(mxcsr_save);
410#endif
411
412 // restore rsp
413 __masm-> addptr(rsp, -rsp_after_call_off * wordSize);
414
415 // return
416 __masm-> vzeroupper();
417 __masm-> pop(rbp);
418 __masm-> ret(0);
419
420 // handle return types different from T_INT
421 __masm-> BIND(is_long)bind(is_long); masm-> block_comment("is_long" ":");
422 __masm-> movq(Address(c_rarg0, 0), rax);
423 __masm-> jmp(exit);
424
425 __masm-> BIND(is_float)bind(is_float); masm-> block_comment("is_float" ":");
426 __masm-> movflt(Address(c_rarg0, 0), xmm0);
427 __masm-> jmp(exit);
428
429 __masm-> BIND(is_double)bind(is_double); masm-> block_comment("is_double" ":");
430 __masm-> movdbl(Address(c_rarg0, 0), xmm0);
431 __masm-> jmp(exit);
432
433 return start;
434 }
435
436 // Return point for a Java call if there's an exception thrown in
437 // Java code. The exception is caught and transformed into a
438 // pending exception stored in JavaThread that can be tested from
439 // within the VM.
440 //
441 // Note: Usually the parameters are removed by the callee. In case
442 // of an exception crossing an activation frame boundary, that is
443 // not the case if the callee is compiled code => need to setup the
444 // rsp.
445 //
446 // rax: exception oop
447
448 address generate_catch_exception() {
449 StubCodeMark mark(this, "StubRoutines", "catch_exception");
450 address start = __masm-> pc();
451
452 // same as in generate_call_stub():
453 const Address rsp_after_call(rbp, rsp_after_call_off * wordSize);
454 const Address thread (rbp, thread_off * wordSize);
455
456#ifdef ASSERT1
457 // verify that threads correspond
458 {
459 Label L1, L2, L3;
460 __masm-> cmpptr(r15_thread, thread);
461 __masm-> jcc(Assembler::equal, L1);
462 __masm-> stop("StubRoutines::catch_exception: r15_thread is corrupted");
463 __masm-> bind(L1);
464 __masm-> get_thread(rbx);
465 __masm-> cmpptr(r15_thread, thread);
466 __masm-> jcc(Assembler::equal, L2);
467 __masm-> stop("StubRoutines::catch_exception: r15_thread is modified by call");
468 __masm-> bind(L2);
469 __masm-> cmpptr(r15_thread, rbx);
470 __masm-> jcc(Assembler::equal, L3);
471 __masm-> stop("StubRoutines::catch_exception: threads must correspond");
472 __masm-> bind(L3);
473 }
474#endif
475
476 // set pending exception
477 __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 477)
;
478
479 __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), rax);
480 __masm-> lea(rscratch1, ExternalAddress((address)__FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"));
481 __masm-> movptr(Address(r15_thread, Thread::exception_file_offset()), rscratch1);
482 __masm-> movl(Address(r15_thread, Thread::exception_line_offset()), (int) __LINE__482);
483
484 // complete return to VM
485 assert(StubRoutines::_call_stub_return_address != NULL,do { if (!(StubRoutines::_call_stub_return_address != __null)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 486, "assert(" "StubRoutines::_call_stub_return_address != __null"
") failed", "_call_stub_return_address must have been generated before"
); ::breakpoint(); } } while (0)
486 "_call_stub_return_address must have been generated before")do { if (!(StubRoutines::_call_stub_return_address != __null)
) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 486, "assert(" "StubRoutines::_call_stub_return_address != __null"
") failed", "_call_stub_return_address must have been generated before"
); ::breakpoint(); } } while (0)
;
487 __masm-> jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
488
489 return start;
490 }
491
492 // Continuation point for runtime calls returning with a pending
493 // exception. The pending exception check happened in the runtime
494 // or native call stub. The pending exception in Thread is
495 // converted into a Java-level exception.
496 //
497 // Contract with Java-level exception handlers:
498 // rax: exception
499 // rdx: throwing pc
500 //
501 // NOTE: At entry of this stub, exception-pc must be on stack !!
502
503 address generate_forward_exception() {
504 StubCodeMark mark(this, "StubRoutines", "forward exception");
505 address start = __masm-> pc();
506
507 // Upon entry, the sp points to the return address returning into
508 // Java (interpreted or compiled) code; i.e., the return address
509 // becomes the throwing pc.
510 //
511 // Arguments pushed before the runtime call are still on the stack
512 // but the exception handler will reset the stack pointer ->
513 // ignore them. A potential result in registers can be ignored as
514 // well.
515
516#ifdef ASSERT1
517 // make sure this code is only executed if there is a pending exception
518 {
519 Label L;
520 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL__null);
521 __masm-> jcc(Assembler::notEqual, L);
522 __masm-> stop("StubRoutines::forward exception: no pending exception (1)");
523 __masm-> bind(L);
524 }
525#endif
526
527 // compute exception handler into rbx
528 __masm-> movptr(c_rarg0, Address(rsp, 0));
529 BLOCK_COMMENT("call exception_handler_for_return_address")masm-> block_comment("call exception_handler_for_return_address"
)
;
530 __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime::exception_handler_for_return_address
)))
531 SharedRuntime::exception_handler_for_return_address)((address)((address_word)(SharedRuntime::exception_handler_for_return_address
)))
,
532 r15_thread, c_rarg0);
533 __masm-> mov(rbx, rax);
534
535 // setup rax & rdx, remove return address & clear pending exception
536 __masm-> pop(rdx);
537 __masm-> movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
538 __masm-> movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD0L);
539
540#ifdef ASSERT1
541 // make sure exception is set
542 {
543 Label L;
544 __masm-> testptr(rax, rax);
545 __masm-> jcc(Assembler::notEqual, L);
546 __masm-> stop("StubRoutines::forward exception: no pending exception (2)");
547 __masm-> bind(L);
548 }
549#endif
550
551 // continue at exception handler (return address removed)
552 // rax: exception
553 // rbx: exception handler
554 // rdx: throwing pc
555 __masm-> verify_oop(rax)_verify_oop_checked(rax, "broken oop " "rax", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 555)
;
556 __masm-> jmp(rbx);
557
558 return start;
559 }
560
561 // Support for intptr_t OrderAccess::fence()
562 //
563 // Arguments :
564 //
565 // Result:
566 address generate_orderaccess_fence() {
567 StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
568 address start = __masm-> pc();
569 __masm-> membar(Assembler::StoreLoad);
570 __masm-> ret(0);
571
572 return start;
573 }
574
575
576 // Support for intptr_t get_previous_sp()
577 //
578 // This routine is used to find the previous stack pointer for the
579 // caller.
580 address generate_get_previous_sp() {
581 StubCodeMark mark(this, "StubRoutines", "get_previous_sp");
582 address start = __masm-> pc();
583
584 __masm-> movptr(rax, rsp);
585 __masm-> addptr(rax, 8); // return address is at the top of the stack.
586 __masm-> ret(0);
587
588 return start;
589 }
590
591 //----------------------------------------------------------------------------------------------------
592 // Support for void verify_mxcsr()
593 //
594 // This routine is used with -Xcheck:jni to verify that native
595 // JNI code does not return to Java code without restoring the
596 // MXCSR register to our expected state.
597
598 address generate_verify_mxcsr() {
599 StubCodeMark mark(this, "StubRoutines", "verify_mxcsr");
600 address start = __masm-> pc();
601
602 const Address mxcsr_save(rsp, 0);
603
604 if (CheckJNICalls) {
605 Label ok_ret;
606 ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
607 __masm-> push(rax);
608 __masm-> subptr(rsp, wordSize); // allocate a temp location
609 __masm-> stmxcsr(mxcsr_save);
610 __masm-> movl(rax, mxcsr_save);
611 __masm-> andl(rax, MXCSR_MASK); // Only check control and mask bits
612 __masm-> cmp32(rax, mxcsr_std);
613 __masm-> jcc(Assembler::equal, ok_ret);
614
615 __masm-> warn("MXCSR changed by native JNI code, use -XX:+RestoreMXCSROnJNICall");
616
617 __masm-> ldmxcsr(mxcsr_std);
618
619 __masm-> bind(ok_ret);
620 __masm-> addptr(rsp, wordSize);
621 __masm-> pop(rax);
622 }
623
624 __masm-> ret(0);
625
626 return start;
627 }
628
629 address generate_f2i_fixup() {
630 StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
631 Address inout(rsp, 5 * wordSize); // return address + 4 saves
632
633 address start = __masm-> pc();
634
635 Label L;
636
637 __masm-> push(rax);
638 __masm-> push(c_rarg3);
639 __masm-> push(c_rarg2);
640 __masm-> push(c_rarg1);
641
642 __masm-> movl(rax, 0x7f800000);
643 __masm-> xorl(c_rarg3, c_rarg3);
644 __masm-> movl(c_rarg2, inout);
645 __masm-> movl(c_rarg1, c_rarg2);
646 __masm-> andl(c_rarg1, 0x7fffffff);
647 __masm-> cmpl(rax, c_rarg1); // NaN? -> 0
648 __masm-> jcc(Assembler::negative, L);
649 __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
650 __masm-> movl(c_rarg3, 0x80000000);
651 __masm-> movl(rax, 0x7fffffff);
652 __masm-> cmovl(Assembler::positive, c_rarg3, rax);
653
654 __masm-> bind(L);
655 __masm-> movptr(inout, c_rarg3);
656
657 __masm-> pop(c_rarg1);
658 __masm-> pop(c_rarg2);
659 __masm-> pop(c_rarg3);
660 __masm-> pop(rax);
661
662 __masm-> ret(0);
663
664 return start;
665 }
666
667 address generate_f2l_fixup() {
668 StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
669 Address inout(rsp, 5 * wordSize); // return address + 4 saves
670 address start = __masm-> pc();
671
672 Label L;
673
674 __masm-> push(rax);
675 __masm-> push(c_rarg3);
676 __masm-> push(c_rarg2);
677 __masm-> push(c_rarg1);
678
679 __masm-> movl(rax, 0x7f800000);
680 __masm-> xorl(c_rarg3, c_rarg3);
681 __masm-> movl(c_rarg2, inout);
682 __masm-> movl(c_rarg1, c_rarg2);
683 __masm-> andl(c_rarg1, 0x7fffffff);
684 __masm-> cmpl(rax, c_rarg1); // NaN? -> 0
685 __masm-> jcc(Assembler::negative, L);
686 __masm-> testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
687 __masm-> mov64(c_rarg3, 0x8000000000000000);
688 __masm-> mov64(rax, 0x7fffffffffffffff);
689 __masm-> cmov(Assembler::positive, c_rarg3, rax);
690
691 __masm-> bind(L);
692 __masm-> movptr(inout, c_rarg3);
693
694 __masm-> pop(c_rarg1);
695 __masm-> pop(c_rarg2);
696 __masm-> pop(c_rarg3);
697 __masm-> pop(rax);
698
699 __masm-> ret(0);
700
701 return start;
702 }
703
704 address generate_d2i_fixup() {
705 StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
706 Address inout(rsp, 6 * wordSize); // return address + 5 saves
707
708 address start = __masm-> pc();
709
710 Label L;
711
712 __masm-> push(rax);
713 __masm-> push(c_rarg3);
714 __masm-> push(c_rarg2);
715 __masm-> push(c_rarg1);
716 __masm-> push(c_rarg0);
717
718 __masm-> movl(rax, 0x7ff00000);
719 __masm-> movq(c_rarg2, inout);
720 __masm-> movl(c_rarg3, c_rarg2);
721 __masm-> mov(c_rarg1, c_rarg2);
722 __masm-> mov(c_rarg0, c_rarg2);
723 __masm-> negl(c_rarg3);
724 __masm-> shrptr(c_rarg1, 0x20);
725 __masm-> orl(c_rarg3, c_rarg2);
726 __masm-> andl(c_rarg1, 0x7fffffff);
727 __masm-> xorl(c_rarg2, c_rarg2);
728 __masm-> shrl(c_rarg3, 0x1f);
729 __masm-> orl(c_rarg1, c_rarg3);
730 __masm-> cmpl(rax, c_rarg1);
731 __masm-> jcc(Assembler::negative, L); // NaN -> 0
732 __masm-> testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
733 __masm-> movl(c_rarg2, 0x80000000);
734 __masm-> movl(rax, 0x7fffffff);
735 __masm-> cmov(Assembler::positive, c_rarg2, rax);
736
737 __masm-> bind(L);
738 __masm-> movptr(inout, c_rarg2);
739
740 __masm-> pop(c_rarg0);
741 __masm-> pop(c_rarg1);
742 __masm-> pop(c_rarg2);
743 __masm-> pop(c_rarg3);
744 __masm-> pop(rax);
745
746 __masm-> ret(0);
747
748 return start;
749 }
750
751 address generate_d2l_fixup() {
752 StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
753 Address inout(rsp, 6 * wordSize); // return address + 5 saves
754
755 address start = __masm-> pc();
756
757 Label L;
758
759 __masm-> push(rax);
760 __masm-> push(c_rarg3);
761 __masm-> push(c_rarg2);
762 __masm-> push(c_rarg1);
763 __masm-> push(c_rarg0);
764
765 __masm-> movl(rax, 0x7ff00000);
766 __masm-> movq(c_rarg2, inout);
767 __masm-> movl(c_rarg3, c_rarg2);
768 __masm-> mov(c_rarg1, c_rarg2);
769 __masm-> mov(c_rarg0, c_rarg2);
770 __masm-> negl(c_rarg3);
771 __masm-> shrptr(c_rarg1, 0x20);
772 __masm-> orl(c_rarg3, c_rarg2);
773 __masm-> andl(c_rarg1, 0x7fffffff);
774 __masm-> xorl(c_rarg2, c_rarg2);
775 __masm-> shrl(c_rarg3, 0x1f);
776 __masm-> orl(c_rarg1, c_rarg3);
777 __masm-> cmpl(rax, c_rarg1);
778 __masm-> jcc(Assembler::negative, L); // NaN -> 0
779 __masm-> testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
780 __masm-> mov64(c_rarg2, 0x8000000000000000);
781 __masm-> mov64(rax, 0x7fffffffffffffff);
782 __masm-> cmovq(Assembler::positive, c_rarg2, rax);
783
784 __masm-> bind(L);
785 __masm-> movq(inout, c_rarg2);
786
787 __masm-> pop(c_rarg0);
788 __masm-> pop(c_rarg1);
789 __masm-> pop(c_rarg2);
790 __masm-> pop(c_rarg3);
791 __masm-> pop(rax);
792
793 __masm-> ret(0);
794
795 return start;
796 }
797
798 address generate_iota_indices(const char *stub_name) {
799 __masm-> align(CodeEntryAlignment);
800 StubCodeMark mark(this, "StubRoutines", stub_name);
801 address start = __masm-> pc();
802 __masm-> emit_data64(0x0706050403020100, relocInfo::none);
803 __masm-> emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
804 __masm-> emit_data64(0x1716151413121110, relocInfo::none);
805 __masm-> emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
806 __masm-> emit_data64(0x2726252423222120, relocInfo::none);
807 __masm-> emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
808 __masm-> emit_data64(0x3736353433323130, relocInfo::none);
809 __masm-> emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
810 return start;
811 }
812
813 address generate_vector_byte_shuffle_mask(const char *stub_name) {
814 __masm-> align(CodeEntryAlignment);
815 StubCodeMark mark(this, "StubRoutines", stub_name);
816 address start = __masm-> pc();
817 __masm-> emit_data64(0x7070707070707070, relocInfo::none);
818 __masm-> emit_data64(0x7070707070707070, relocInfo::none);
819 __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
820 __masm-> emit_data64(0xF0F0F0F0F0F0F0F0, relocInfo::none);
821 return start;
822 }
823
824 address generate_fp_mask(const char *stub_name, int64_t mask) {
825 __masm-> align(CodeEntryAlignment);
826 StubCodeMark mark(this, "StubRoutines", stub_name);
827 address start = __masm-> pc();
828
829 __masm-> emit_data64( mask, relocInfo::none );
830 __masm-> emit_data64( mask, relocInfo::none );
831
832 return start;
833 }
834
835 address generate_vector_mask(const char *stub_name, int64_t mask) {
836 __masm-> align(CodeEntryAlignment);
837 StubCodeMark mark(this, "StubRoutines", stub_name);
838 address start = __masm-> pc();
839
840 __masm-> emit_data64(mask, relocInfo::none);
841 __masm-> emit_data64(mask, relocInfo::none);
842 __masm-> emit_data64(mask, relocInfo::none);
843 __masm-> emit_data64(mask, relocInfo::none);
844 __masm-> emit_data64(mask, relocInfo::none);
845 __masm-> emit_data64(mask, relocInfo::none);
846 __masm-> emit_data64(mask, relocInfo::none);
847 __masm-> emit_data64(mask, relocInfo::none);
848
849 return start;
850 }
851
852 address generate_vector_byte_perm_mask(const char *stub_name) {
853 __masm-> align(CodeEntryAlignment);
854 StubCodeMark mark(this, "StubRoutines", stub_name);
855 address start = __masm-> pc();
856
857 __masm-> emit_data64(0x0000000000000001, relocInfo::none);
858 __masm-> emit_data64(0x0000000000000003, relocInfo::none);
859 __masm-> emit_data64(0x0000000000000005, relocInfo::none);
860 __masm-> emit_data64(0x0000000000000007, relocInfo::none);
861 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
862 __masm-> emit_data64(0x0000000000000002, relocInfo::none);
863 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
864 __masm-> emit_data64(0x0000000000000006, relocInfo::none);
865
866 return start;
867 }
868
869 address generate_vector_fp_mask(const char *stub_name, int64_t mask) {
870 __masm-> align(CodeEntryAlignment);
871 StubCodeMark mark(this, "StubRoutines", stub_name);
872 address start = __masm-> pc();
873
874 __masm-> emit_data64(mask, relocInfo::none);
875 __masm-> emit_data64(mask, relocInfo::none);
876 __masm-> emit_data64(mask, relocInfo::none);
877 __masm-> emit_data64(mask, relocInfo::none);
878 __masm-> emit_data64(mask, relocInfo::none);
879 __masm-> emit_data64(mask, relocInfo::none);
880 __masm-> emit_data64(mask, relocInfo::none);
881 __masm-> emit_data64(mask, relocInfo::none);
882
883 return start;
884 }
885
886 address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
887 int32_t val0, int32_t val1, int32_t val2, int32_t val3,
888 int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
889 int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
890 int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
891 __masm-> align(CodeEntryAlignment);
892 StubCodeMark mark(this, "StubRoutines", stub_name);
893 address start = __masm-> pc();
894
895 assert(len != Assembler::AVX_NoVec, "vector len must be specified")do { if (!(len != Assembler::AVX_NoVec)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 895, "assert(" "len != Assembler::AVX_NoVec" ") failed", "vector len must be specified"
); ::breakpoint(); } } while (0)
;
896 __masm-> emit_data(val0, relocInfo::none, 0);
897 __masm-> emit_data(val1, relocInfo::none, 0);
898 __masm-> emit_data(val2, relocInfo::none, 0);
899 __masm-> emit_data(val3, relocInfo::none, 0);
900 if (len >= Assembler::AVX_256bit) {
901 __masm-> emit_data(val4, relocInfo::none, 0);
902 __masm-> emit_data(val5, relocInfo::none, 0);
903 __masm-> emit_data(val6, relocInfo::none, 0);
904 __masm-> emit_data(val7, relocInfo::none, 0);
905 if (len >= Assembler::AVX_512bit) {
906 __masm-> emit_data(val8, relocInfo::none, 0);
907 __masm-> emit_data(val9, relocInfo::none, 0);
908 __masm-> emit_data(val10, relocInfo::none, 0);
909 __masm-> emit_data(val11, relocInfo::none, 0);
910 __masm-> emit_data(val12, relocInfo::none, 0);
911 __masm-> emit_data(val13, relocInfo::none, 0);
912 __masm-> emit_data(val14, relocInfo::none, 0);
913 __masm-> emit_data(val15, relocInfo::none, 0);
914 }
915 }
916
917 return start;
918 }
919
920 // Non-destructive plausibility checks for oops
921 //
922 // Arguments:
923 // all args on stack!
924 //
925 // Stack after saving c_rarg3:
926 // [tos + 0]: saved c_rarg3
927 // [tos + 1]: saved c_rarg2
928 // [tos + 2]: saved r12 (several TemplateTable methods use it)
929 // [tos + 3]: saved flags
930 // [tos + 4]: return address
931 // * [tos + 5]: error message (char*)
932 // * [tos + 6]: object to verify (oop)
933 // * [tos + 7]: saved rax - saved by caller and bashed
934 // * [tos + 8]: saved r10 (rscratch1) - saved by caller
935 // * = popped on exit
936 address generate_verify_oop() {
937 StubCodeMark mark(this, "StubRoutines", "verify_oop");
938 address start = __masm-> pc();
939
940 Label exit, error;
941
942 __masm-> pushf();
943 __masm-> incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
944
945 __masm-> push(r12);
946
947 // save c_rarg2 and c_rarg3
948 __masm-> push(c_rarg2);
949 __masm-> push(c_rarg3);
950
951 enum {
952 // After previous pushes.
953 oop_to_verify = 6 * wordSize,
954 saved_rax = 7 * wordSize,
955 saved_r10 = 8 * wordSize,
956
957 // Before the call to MacroAssembler::debug(), see below.
958 return_addr = 16 * wordSize,
959 error_msg = 17 * wordSize
960 };
961
962 // get object
963 __masm-> movptr(rax, Address(rsp, oop_to_verify));
964
965 // make sure object is 'reasonable'
966 __masm-> testptr(rax, rax);
967 __masm-> jcc(Assembler::zero, exit); // if obj is NULL it is OK
968
969#if INCLUDE_ZGC1
970 if (UseZGC) {
971 // Check if metadata bits indicate a bad oop
972 __masm-> testptr(rax, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
973 __masm-> jcc(Assembler::notZero, error);
974 }
975#endif
976
977 // Check if the oop is in the right area of memory
978 __masm-> movptr(c_rarg2, rax);
979 __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask());
980 __masm-> andptr(c_rarg2, c_rarg3);
981 __masm-> movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits());
982 __masm-> cmpptr(c_rarg2, c_rarg3);
983 __masm-> jcc(Assembler::notZero, error);
984
985 // make sure klass is 'reasonable', which is not zero.
986 __masm-> load_klass(rax, rax, rscratch1); // get klass
987 __masm-> testptr(rax, rax);
988 __masm-> jcc(Assembler::zero, error); // if klass is NULL it is broken
989
990 // return if everything seems ok
991 __masm-> bind(exit);
992 __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back
993 __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
994 __masm-> pop(c_rarg3); // restore c_rarg3
995 __masm-> pop(c_rarg2); // restore c_rarg2
996 __masm-> pop(r12); // restore r12
997 __masm-> popf(); // restore flags
998 __masm-> ret(4 * wordSize); // pop caller saved stuff
999
1000 // handle errors
1001 __masm-> bind(error);
1002 __masm-> movptr(rax, Address(rsp, saved_rax)); // get saved rax back
1003 __masm-> movptr(rscratch1, Address(rsp, saved_r10)); // get saved r10 back
1004 __masm-> pop(c_rarg3); // get saved c_rarg3 back
1005 __masm-> pop(c_rarg2); // get saved c_rarg2 back
1006 __masm-> pop(r12); // get saved r12 back
1007 __masm-> popf(); // get saved flags off stack --
1008 // will be ignored
1009
1010 __masm-> pusha(); // push registers
1011 // (rip is already
1012 // already pushed)
1013 // debug(char* msg, int64_t pc, int64_t regs[])
1014 // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and
1015 // pushed all the registers, so now the stack looks like:
1016 // [tos + 0] 16 saved registers
1017 // [tos + 16] return address
1018 // * [tos + 17] error message (char*)
1019 // * [tos + 18] object to verify (oop)
1020 // * [tos + 19] saved rax - saved by caller and bashed
1021 // * [tos + 20] saved r10 (rscratch1) - saved by caller
1022 // * = popped on exit
1023
1024 __masm-> movptr(c_rarg0, Address(rsp, error_msg)); // pass address of error message
1025 __masm-> movptr(c_rarg1, Address(rsp, return_addr)); // pass return address
1026 __masm-> movq(c_rarg2, rsp); // pass address of regs on stack
1027 __masm-> mov(r12, rsp); // remember rsp
1028 __masm-> subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1029 __masm-> andptr(rsp, -16); // align stack as required by ABI
1030 BLOCK_COMMENT("call MacroAssembler::debug")masm-> block_comment("call MacroAssembler::debug");
1031 __masm-> call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64)))));
1032 __masm-> hlt();
1033 return start;
1034 }
1035
1036 //
1037 // Verify that a register contains clean 32-bits positive value
1038 // (high 32-bits are 0) so it could be used in 64-bits shifts.
1039 //
1040 // Input:
1041 // Rint - 32-bits value
1042 // Rtmp - scratch
1043 //
1044 void assert_clean_int(Register Rint, Register Rtmp) {
1045#ifdef ASSERT1
1046 Label L;
1047 assert_different_registers(Rtmp, Rint);
1048 __masm-> movslq(Rtmp, Rint);
1049 __masm-> cmpq(Rtmp, Rint);
1050 __masm-> jcc(Assembler::equal, L);
1051 __masm-> stop("high 32-bits of int value are not 0");
1052 __masm-> bind(L);
1053#endif
1054 }
1055
1056 // Generate overlap test for array copy stubs
1057 //
1058 // Input:
1059 // c_rarg0 - from
1060 // c_rarg1 - to
1061 // c_rarg2 - element count
1062 //
1063 // Output:
1064 // rax - &from[element count - 1]
1065 //
1066 void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {
1067 assert(no_overlap_target != NULL, "must be generated")do { if (!(no_overlap_target != __null)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1067, "assert(" "no_overlap_target != __null" ") failed", "must be generated"
); ::breakpoint(); } } while (0)
;
1068 array_overlap_test(no_overlap_target, NULL__null, sf);
1069 }
1070 void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
1071 array_overlap_test(NULL__null, &L_no_overlap, sf);
1072 }
1073 void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
1074 const Register from = c_rarg0;
1075 const Register to = c_rarg1;
1076 const Register count = c_rarg2;
1077 const Register end_from = rax;
1078
1079 __masm-> cmpptr(to, from);
1080 __masm-> lea(end_from, Address(from, count, sf, 0));
1081 if (NOLp == NULL__null) {
1082 ExternalAddress no_overlap(no_overlap_target);
1083 __masm-> jump_cc(Assembler::belowEqual, no_overlap);
1084 __masm-> cmpptr(to, end_from);
1085 __masm-> jump_cc(Assembler::aboveEqual, no_overlap);
1086 } else {
1087 __masm-> jcc(Assembler::belowEqual, (*NOLp));
1088 __masm-> cmpptr(to, end_from);
1089 __masm-> jcc(Assembler::aboveEqual, (*NOLp));
1090 }
1091 }
1092
1093 // Shuffle first three arg regs on Windows into Linux/Solaris locations.
1094 //
1095 // Outputs:
1096 // rdi - rcx
1097 // rsi - rdx
1098 // rdx - r8
1099 // rcx - r9
1100 //
1101 // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
1102 // are non-volatile. r9 and r10 should not be used by the caller.
1103 //
1104 DEBUG_ONLY(bool regs_in_thread;)bool regs_in_thread;
1105
1106 void setup_arg_regs(int nargs = 3) {
1107 const Register saved_rdi = r9;
1108 const Register saved_rsi = r10;
1109 assert(nargs == 3 || nargs == 4, "else fix")do { if (!(nargs == 3 || nargs == 4)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1109, "assert(" "nargs == 3 || nargs == 4" ") failed", "else fix"
); ::breakpoint(); } } while (0)
;
1110#ifdef _WIN64
1111 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1112 "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1112, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1113 if (nargs >= 4)
1114 __masm-> mov(rax, r9); // r9 is also saved_rdi
1115 __masm-> movptr(saved_rdi, rdi);
1116 __masm-> movptr(saved_rsi, rsi);
1117 __masm-> mov(rdi, rcx); // c_rarg0
1118 __masm-> mov(rsi, rdx); // c_rarg1
1119 __masm-> mov(rdx, r8); // c_rarg2
1120 if (nargs >= 4)
1121 __masm-> mov(rcx, rax); // c_rarg3 (via rax)
1122#else
1123 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1124 "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1124, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1125#endif
1126 DEBUG_ONLY(regs_in_thread = false;)regs_in_thread = false;
1127 }
1128
1129 void restore_arg_regs() {
1130 assert(!regs_in_thread, "wrong call to restore_arg_regs")do { if (!(!regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1130, "assert(" "!regs_in_thread" ") failed", "wrong call to restore_arg_regs"
); ::breakpoint(); } } while (0)
;
1131 const Register saved_rdi = r9;
1132 const Register saved_rsi = r10;
1133#ifdef _WIN64
1134 __masm-> movptr(rdi, saved_rdi);
1135 __masm-> movptr(rsi, saved_rsi);
1136#endif
1137 }
1138
1139 // This is used in places where r10 is a scratch register, and can
1140 // be adapted if r9 is needed also.
1141 void setup_arg_regs_using_thread() {
1142 const Register saved_r15 = r9;
1143#ifdef _WIN64
1144 __masm-> mov(saved_r15, r15); // r15 is callee saved and needs to be restored
1145 __masm-> get_thread(r15_thread);
1146 assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1147 "unexpected argument registers")do { if (!(c_rarg0 == rcx && c_rarg1 == rdx &&
c_rarg2 == r8 && c_rarg3 == r9)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1147, "assert(" "c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1148 __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())), rdi);
1149 __masm-> movptr(Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())), rsi);
1150
1151 __masm-> mov(rdi, rcx); // c_rarg0
1152 __masm-> mov(rsi, rdx); // c_rarg1
1153 __masm-> mov(rdx, r8); // c_rarg2
1154#else
1155 assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
1156 "unexpected argument registers")do { if (!(c_rarg0 == rdi && c_rarg1 == rsi &&
c_rarg2 == rdx && c_rarg3 == rcx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1156, "assert(" "c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx"
") failed", "unexpected argument registers"); ::breakpoint()
; } } while (0)
;
1157#endif
1158 DEBUG_ONLY(regs_in_thread = true;)regs_in_thread = true;
1159 }
1160
1161 void restore_arg_regs_using_thread() {
1162 assert(regs_in_thread, "wrong call to restore_arg_regs")do { if (!(regs_in_thread)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1162, "assert(" "regs_in_thread" ") failed", "wrong call to restore_arg_regs"
); ::breakpoint(); } } while (0)
;
1163 const Register saved_r15 = r9;
1164#ifdef _WIN64
1165 __masm-> get_thread(r15_thread);
1166 __masm-> movptr(rsi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rsi_offset())));
1167 __masm-> movptr(rdi, Address(r15_thread, in_bytes(JavaThread::windows_saved_rdi_offset())));
1168 __masm-> mov(r15, saved_r15); // r15 is callee saved and needs to be restored
1169#endif
1170 }
1171
1172 // Copy big chunks forward
1173 //
1174 // Inputs:
1175 // end_from - source arrays end address
1176 // end_to - destination array end address
1177 // qword_count - 64-bits element count, negative
1178 // to - scratch
1179 // L_copy_bytes - entry label
1180 // L_copy_8_bytes - exit label
1181 //
1182 void copy_bytes_forward(Register end_from, Register end_to,
1183 Register qword_count, Register to,
1184 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1185 DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here");
1186 Label L_loop;
1187 __masm-> align(OptoLoopAlignment);
1188 if (UseUnalignedLoadStores) {
1189 Label L_end;
1190 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1191 if (UseAVX >= 2) {
1192 __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1193 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1194 __masm-> vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
1195 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
1196 } else {
1197 __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
1198 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
1199 __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
1200 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
1201 __masm-> movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
1202 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
1203 __masm-> movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
1204 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
1205 }
1206
1207 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1208 __masm-> addptr(qword_count, 8);
1209 __masm-> jcc(Assembler::lessEqual, L_loop);
1210 __masm-> subptr(qword_count, 4); // sub(8) and add(4)
1211 __masm-> jccb(Assembler::greater, L_end)jccb_0(Assembler::greater, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1211)
;
1212 // Copy trailing 32 bytes
1213 if (UseAVX >= 2) {
1214 __masm-> vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1215 __masm-> vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1216 } else {
1217 __masm-> movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1218 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1219 __masm-> movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1220 __masm-> movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1221 }
1222 __masm-> addptr(qword_count, 4);
1223 __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":");
1224 if (UseAVX >= 2) {
1225 // clean upper bits of YMM registers
1226 __masm-> vpxor(xmm0, xmm0);
1227 __masm-> vpxor(xmm1, xmm1);
1228 }
1229 } else {
1230 // Copy 32-bytes per iteration
1231 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1232 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -24));
1233 __masm-> movq(Address(end_to, qword_count, Address::times_8, -24), to);
1234 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, -16));
1235 __masm-> movq(Address(end_to, qword_count, Address::times_8, -16), to);
1236 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1237 __masm-> movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1238 __masm-> movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1239 __masm-> movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1240
1241 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1242 __masm-> addptr(qword_count, 4);
1243 __masm-> jcc(Assembler::lessEqual, L_loop);
1244 }
1245 __masm-> subptr(qword_count, 4);
1246 __masm-> jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1247 }
1248
1249 // Copy big chunks backward
1250 //
1251 // Inputs:
1252 // from - source arrays address
1253 // dest - destination array address
1254 // qword_count - 64-bits element count
1255 // to - scratch
1256 // L_copy_bytes - entry label
1257 // L_copy_8_bytes - exit label
1258 //
1259 void copy_bytes_backward(Register from, Register dest,
1260 Register qword_count, Register to,
1261 Label& L_copy_bytes, Label& L_copy_8_bytes) {
1262 DEBUG_ONLY(__ stop("enter at entry label, not here"))masm-> stop("enter at entry label, not here");
1263 Label L_loop;
1264 __masm-> align(OptoLoopAlignment);
1265 if (UseUnalignedLoadStores) {
1266 Label L_end;
1267 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1268 if (UseAVX >= 2) {
1269 __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
1270 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
1271 __masm-> vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1272 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1273 } else {
1274 __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
1275 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
1276 __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
1277 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
1278 __masm-> movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
1279 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
1280 __masm-> movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
1281 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
1282 }
1283
1284 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1285 __masm-> subptr(qword_count, 8);
1286 __masm-> jcc(Assembler::greaterEqual, L_loop);
1287
1288 __masm-> addptr(qword_count, 4); // add(8) and sub(4)
1289 __masm-> jccb(Assembler::less, L_end)jccb_0(Assembler::less, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1289)
;
1290 // Copy trailing 32 bytes
1291 if (UseAVX >= 2) {
1292 __masm-> vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1293 __masm-> vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1294 } else {
1295 __masm-> movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1296 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1297 __masm-> movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1298 __masm-> movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1299 }
1300 __masm-> subptr(qword_count, 4);
1301 __masm-> BIND(L_end)bind(L_end); masm-> block_comment("L_end" ":");
1302 if (UseAVX >= 2) {
1303 // clean upper bits of YMM registers
1304 __masm-> vpxor(xmm0, xmm0);
1305 __masm-> vpxor(xmm1, xmm1);
1306 }
1307 } else {
1308 // Copy 32-bytes per iteration
1309 __masm-> BIND(L_loop)bind(L_loop); masm-> block_comment("L_loop" ":");
1310 __masm-> movq(to, Address(from, qword_count, Address::times_8, 24));
1311 __masm-> movq(Address(dest, qword_count, Address::times_8, 24), to);
1312 __masm-> movq(to, Address(from, qword_count, Address::times_8, 16));
1313 __masm-> movq(Address(dest, qword_count, Address::times_8, 16), to);
1314 __masm-> movq(to, Address(from, qword_count, Address::times_8, 8));
1315 __masm-> movq(Address(dest, qword_count, Address::times_8, 8), to);
1316 __masm-> movq(to, Address(from, qword_count, Address::times_8, 0));
1317 __masm-> movq(Address(dest, qword_count, Address::times_8, 0), to);
1318
1319 __masm-> BIND(L_copy_bytes)bind(L_copy_bytes); masm-> block_comment("L_copy_bytes" ":"
)
;
1320 __masm-> subptr(qword_count, 4);
1321 __masm-> jcc(Assembler::greaterEqual, L_loop);
1322 }
1323 __masm-> addptr(qword_count, 4);
1324 __masm-> jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1325 }
1326
1327#ifndef PRODUCT
1328 int& get_profile_ctr(int shift) {
1329 if ( 0 == shift)
1330 return SharedRuntime::_jbyte_array_copy_ctr;
1331 else if(1 == shift)
1332 return SharedRuntime::_jshort_array_copy_ctr;
1333 else if(2 == shift)
1334 return SharedRuntime::_jint_array_copy_ctr;
1335 else
1336 return SharedRuntime::_jlong_array_copy_ctr;
1337 }
1338#endif
1339
1340 void setup_argument_regs(BasicType type) {
1341 if (type == T_BYTE || type == T_SHORT) {
1342 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1343 // r9 and r10 may be used to save non-volatile registers
1344 } else {
1345 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
1346 // r9 is used to save r15_thread
1347 }
1348 }
1349
1350 void restore_argument_regs(BasicType type) {
1351 if (type == T_BYTE || type == T_SHORT) {
1352 restore_arg_regs();
1353 } else {
1354 restore_arg_regs_using_thread();
1355 }
1356 }
1357
1358#if COMPILER2_OR_JVMCI1
1359 // Note: Following rules apply to AVX3 optimized arraycopy stubs:-
1360 // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
1361 // for both special cases (various small block sizes) and aligned copy loop. This is the
1362 // default configuration.
1363 // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
1364 // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
1365 // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
1366 // better performance for disjoint copies. For conjoint/backward copy vector based
1367 // copy performs better.
1368 // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
1369 // 64 byte vector registers (ZMMs).
1370
1371 // Inputs:
1372 // c_rarg0 - source array address
1373 // c_rarg1 - destination array address
1374 // c_rarg2 - element count, treated as ssize_t, can be zero
1375 //
1376 //
1377 // Side Effects:
1378 // disjoint_copy_avx3_masked is set to the no-overlap entry point
1379 // used by generate_conjoint_[byte/int/short/long]_copy().
1380 //
1381
1382 address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift,
1383 bool aligned, bool is_oop, bool dest_uninitialized) {
1384 __masm-> align(CodeEntryAlignment);
1385 StubCodeMark mark(this, "StubRoutines", name);
1386 address start = __masm-> pc();
1387 int avx3threshold = VM_Version::avx3_threshold();
1388 bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
1389 Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
1390 Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
1391 const Register from = rdi; // source array address
1392 const Register to = rsi; // destination array address
1393 const Register count = rdx; // elements count
1394 const Register temp1 = r8;
1395 const Register temp2 = r11;
1396 const Register temp3 = rax;
1397 const Register temp4 = rcx;
1398 // End pointers are inclusive, and if count is not zero they point
1399 // to the last unit copied: end_to[0] := end_from[0]
1400
1401 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1402 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1403
1404 if (entry != NULL__null) {
1405 *entry = __masm-> pc();
1406 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1407 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1408 }
1409
1410 BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
1411 BasicType type = is_oop ? T_OBJECT : type_vec[shift];
1412
1413 setup_argument_regs(type);
1414
1415 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
1416 if (dest_uninitialized) {
1417 decorators |= IS_DEST_UNINITIALIZED;
1418 }
1419 if (aligned) {
1420 decorators |= ARRAYCOPY_ALIGNED;
1421 }
1422 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1423 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1424
1425 {
1426 // Type(shift) byte(0), short(1), int(2), long(3)
1427 int loop_size[] = { 192, 96, 48, 24};
1428 int threshold[] = { 4096, 2048, 1024, 512};
1429
1430 // UnsafeCopyMemory page error: continue after ucm
1431 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
1432 // 'from', 'to' and 'count' are now valid
1433
1434 // temp1 holds remaining count and temp4 holds running count used to compute
1435 // next address offset for start of to/from addresses (temp4 * scale).
1436 __masm-> mov64(temp4, 0);
1437 __masm-> movq(temp1, count);
1438
1439 // Zero length check.
1440 __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":");
1441 __masm-> cmpq(temp1, 0);
1442 __masm-> jcc(Assembler::lessEqual, L_exit);
1443
1444 // Special cases using 32 byte [masked] vector copy operations.
1445 __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
1446 temp4, temp3, use64byteVector, L_entry, L_exit);
1447
1448 // PRE-MAIN-POST loop for aligned copy.
1449 __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":");
1450
1451 if (avx3threshold != 0) {
1452 __masm-> cmpq(count, threshold[shift]);
1453 if (MaxVectorSize == 64) {
1454 // Copy using 64 byte vectors.
1455 __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64);
1456 } else {
1457 assert(MaxVectorSize < 64, "vector size should be < 64 bytes")do { if (!(MaxVectorSize < 64)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1457, "assert(" "MaxVectorSize < 64" ") failed", "vector size should be < 64 bytes"
); ::breakpoint(); } } while (0)
;
1458 // REP MOVS offer a faster copy path.
1459 __masm-> jcc(Assembler::greaterEqual, L_repmovs);
1460 }
1461 }
1462
1463 if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
1464 // Partial copy to make dst address 32 byte aligned.
1465 __masm-> movq(temp2, to);
1466 __masm-> andq(temp2, 31);
1467 __masm-> jcc(Assembler::equal, L_main_pre_loop);
1468
1469 __masm-> negptr(temp2);
1470 __masm-> addq(temp2, 32);
1471 if (shift) {
1472 __masm-> shrq(temp2, shift);
1473 }
1474 __masm-> movq(temp3, temp2);
1475 __masm-> copy32_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift);
1476 __masm-> movq(temp4, temp2);
1477 __masm-> movq(temp1, count);
1478 __masm-> subq(temp1, temp2);
1479
1480 __masm-> cmpq(temp1, loop_size[shift]);
1481 __masm-> jcc(Assembler::less, L_tail);
1482
1483 __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop"
":")
;
1484 __masm-> subq(temp1, loop_size[shift]);
1485
1486 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1487 __masm-> align32();
1488 __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":");
1489 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0);
1490 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64);
1491 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128);
1492 __masm-> addptr(temp4, loop_size[shift]);
1493 __masm-> subq(temp1, loop_size[shift]);
1494 __masm-> jcc(Assembler::greater, L_main_loop);
1495
1496 __masm-> addq(temp1, loop_size[shift]);
1497
1498 // Tail loop.
1499 __masm-> jmp(L_tail);
1500
1501 __masm-> BIND(L_repmovs)bind(L_repmovs); masm-> block_comment("L_repmovs" ":");
1502 __masm-> movq(temp2, temp1);
1503 // Swap to(RSI) and from(RDI) addresses to comply with REP MOVs semantics.
1504 __masm-> movq(temp3, to);
1505 __masm-> movq(to, from);
1506 __masm-> movq(from, temp3);
1507 // Save to/from for restoration post rep_mov.
1508 __masm-> movq(temp1, to);
1509 __masm-> movq(temp3, from);
1510 if(shift < 3) {
1511 __masm-> shrq(temp2, 3-shift); // quad word count
1512 }
1513 __masm-> movq(temp4 , temp2); // move quad ward count into temp4(RCX).
1514 __masm-> rep_mov();
1515 __masm-> shlq(temp2, 3); // convert quad words into byte count.
1516 if(shift) {
1517 __masm-> shrq(temp2, shift); // type specific count.
1518 }
1519 // Restore original addresses in to/from.
1520 __masm-> movq(to, temp3);
1521 __masm-> movq(from, temp1);
1522 __masm-> movq(temp4, temp2);
1523 __masm-> movq(temp1, count);
1524 __masm-> subq(temp1, temp2); // tailing part (less than a quad ward size).
1525 __masm-> jmp(L_tail);
1526 }
1527
1528 if (MaxVectorSize > 32) {
1529 __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64"
":")
;
1530 // Partial copy to make dst address 64 byte aligned.
1531 __masm-> movq(temp2, to);
1532 __masm-> andq(temp2, 63);
1533 __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes);
1534
1535 __masm-> negptr(temp2);
1536 __masm-> addq(temp2, 64);
1537 if (shift) {
1538 __masm-> shrq(temp2, shift);
1539 }
1540 __masm-> movq(temp3, temp2);
1541 __masm-> copy64_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift, 0 , true);
1542 __masm-> movq(temp4, temp2);
1543 __masm-> movq(temp1, count);
1544 __masm-> subq(temp1, temp2);
1545
1546 __masm-> cmpq(temp1, loop_size[shift]);
1547 __masm-> jcc(Assembler::less, L_tail64);
1548
1549 __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes"
":")
;
1550 __masm-> subq(temp1, loop_size[shift]);
1551
1552 // Main loop with aligned copy block size of 192 bytes at
1553 // 64 byte copy granularity.
1554 __masm-> align32();
1555 __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes"
":")
;
1556 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 0 , true);
1557 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 64, true);
1558 __masm-> copy64_avx(to, from, temp4, xmm1, false, shift, 128, true);
1559 __masm-> addptr(temp4, loop_size[shift]);
1560 __masm-> subq(temp1, loop_size[shift]);
1561 __masm-> jcc(Assembler::greater, L_main_loop_64bytes);
1562
1563 __masm-> addq(temp1, loop_size[shift]);
1564 // Zero length check.
1565 __masm-> jcc(Assembler::lessEqual, L_exit);
1566
1567 __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":");
1568
1569 // Tail handling using 64 byte [masked] vector copy operations.
1570 use64byteVector = true;
1571 __masm-> arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
1572 temp4, temp3, use64byteVector, L_entry, L_exit);
1573 }
1574 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1575 }
1576
1577 address ucme_exit_pc = __masm-> pc();
1578 // When called from generic_arraycopy r11 contains specific values
1579 // used during arraycopy epilogue, re-initializing r11.
1580 if (is_oop) {
1581 __masm-> movq(r11, shift == 3 ? count : to);
1582 }
1583 bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
1584 restore_argument_regs(type);
1585 inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)"
); inc_counter_np_(get_profile_ctr(shift));
; // Update counter after rscratch1 is free
1586 __masm-> xorptr(rax, rax); // return 0
1587 __masm-> vzeroupper();
1588 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1589 __masm-> ret(0);
1590 return start;
1591 }
1592
1593 // Inputs:
1594 // c_rarg0 - source array address
1595 // c_rarg1 - destination array address
1596 // c_rarg2 - element count, treated as ssize_t, can be zero
1597 //
1598 //
1599 address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift,
1600 address nooverlap_target, bool aligned, bool is_oop,
1601 bool dest_uninitialized) {
1602 __masm-> align(CodeEntryAlignment);
1603 StubCodeMark mark(this, "StubRoutines", name);
1604 address start = __masm-> pc();
1605
1606 int avx3threshold = VM_Version::avx3_threshold();
1607 bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
1608
1609 Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
1610 Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
1611 const Register from = rdi; // source array address
1612 const Register to = rsi; // destination array address
1613 const Register count = rdx; // elements count
1614 const Register temp1 = r8;
1615 const Register temp2 = rcx;
1616 const Register temp3 = r11;
1617 const Register temp4 = rax;
1618 // End pointers are inclusive, and if count is not zero they point
1619 // to the last unit copied: end_to[0] := end_from[0]
1620
1621 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1622 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1623
1624 if (entry != NULL__null) {
1625 *entry = __masm-> pc();
1626 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1627 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1628 }
1629
1630 array_overlap_test(nooverlap_target, (Address::ScaleFactor)(shift));
1631
1632 BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
1633 BasicType type = is_oop ? T_OBJECT : type_vec[shift];
1634
1635 setup_argument_regs(type);
1636
1637 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
1638 if (dest_uninitialized) {
1639 decorators |= IS_DEST_UNINITIALIZED;
1640 }
1641 if (aligned) {
1642 decorators |= ARRAYCOPY_ALIGNED;
1643 }
1644 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1645 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
1646 {
1647 // Type(shift) byte(0), short(1), int(2), long(3)
1648 int loop_size[] = { 192, 96, 48, 24};
1649 int threshold[] = { 4096, 2048, 1024, 512};
1650
1651 // UnsafeCopyMemory page error: continue after ucm
1652 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
1653 // 'from', 'to' and 'count' are now valid
1654
1655 // temp1 holds remaining count.
1656 __masm-> movq(temp1, count);
1657
1658 // Zero length check.
1659 __masm-> BIND(L_tail)bind(L_tail); masm-> block_comment("L_tail" ":");
1660 __masm-> cmpq(temp1, 0);
1661 __masm-> jcc(Assembler::lessEqual, L_exit);
1662
1663 __masm-> mov64(temp2, 0);
1664 __masm-> movq(temp3, temp1);
1665 // Special cases using 32 byte [masked] vector copy operations.
1666 __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
1667 temp4, use64byteVector, L_entry, L_exit);
1668
1669 // PRE-MAIN-POST loop for aligned copy.
1670 __masm-> BIND(L_entry)bind(L_entry); masm-> block_comment("L_entry" ":");
1671
1672 if ((MaxVectorSize > 32) && (avx3threshold != 0)) {
1673 __masm-> cmpq(temp1, threshold[shift]);
1674 __masm-> jcc(Assembler::greaterEqual, L_pre_main_post_64);
1675 }
1676
1677 if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
1678 // Partial copy to make dst address 32 byte aligned.
1679 __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
1680 __masm-> andq(temp2, 31);
1681 __masm-> jcc(Assembler::equal, L_main_pre_loop);
1682
1683 if (shift) {
1684 __masm-> shrq(temp2, shift);
1685 }
1686 __masm-> subq(temp1, temp2);
1687 __masm-> copy32_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift);
1688
1689 __masm-> cmpq(temp1, loop_size[shift]);
1690 __masm-> jcc(Assembler::less, L_tail);
1691
1692 __masm-> BIND(L_main_pre_loop)bind(L_main_pre_loop); masm-> block_comment("L_main_pre_loop"
":")
;
1693
1694 // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
1695 __masm-> align32();
1696 __masm-> BIND(L_main_loop)bind(L_main_loop); masm-> block_comment("L_main_loop" ":");
1697 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64);
1698 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128);
1699 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192);
1700 __masm-> subptr(temp1, loop_size[shift]);
1701 __masm-> cmpq(temp1, loop_size[shift]);
1702 __masm-> jcc(Assembler::greater, L_main_loop);
1703
1704 // Tail loop.
1705 __masm-> jmp(L_tail);
1706 }
1707
1708 if (MaxVectorSize > 32) {
1709 __masm-> BIND(L_pre_main_post_64)bind(L_pre_main_post_64); masm-> block_comment("L_pre_main_post_64"
":")
;
1710 // Partial copy to make dst address 64 byte aligned.
1711 __masm-> leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
1712 __masm-> andq(temp2, 63);
1713 __masm-> jcc(Assembler::equal, L_main_pre_loop_64bytes);
1714
1715 if (shift) {
1716 __masm-> shrq(temp2, shift);
1717 }
1718 __masm-> subq(temp1, temp2);
1719 __masm-> copy64_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift, 0 , true);
1720
1721 __masm-> cmpq(temp1, loop_size[shift]);
1722 __masm-> jcc(Assembler::less, L_tail64);
1723
1724 __masm-> BIND(L_main_pre_loop_64bytes)bind(L_main_pre_loop_64bytes); masm-> block_comment("L_main_pre_loop_64bytes"
":")
;
1725
1726 // Main loop with aligned copy block size of 192 bytes at
1727 // 64 byte copy granularity.
1728 __masm-> align32();
1729 __masm-> BIND(L_main_loop_64bytes)bind(L_main_loop_64bytes); masm-> block_comment("L_main_loop_64bytes"
":")
;
1730 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -64 , true);
1731 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -128, true);
1732 __masm-> copy64_avx(to, from, temp1, xmm1, true, shift, -192, true);
1733 __masm-> subq(temp1, loop_size[shift]);
1734 __masm-> cmpq(temp1, loop_size[shift]);
1735 __masm-> jcc(Assembler::greater, L_main_loop_64bytes);
1736
1737 // Zero length check.
1738 __masm-> cmpq(temp1, 0);
1739 __masm-> jcc(Assembler::lessEqual, L_exit);
1740
1741 __masm-> BIND(L_tail64)bind(L_tail64); masm-> block_comment("L_tail64" ":");
1742
1743 // Tail handling using 64 byte [masked] vector copy operations.
1744 use64byteVector = true;
1745 __masm-> mov64(temp2, 0);
1746 __masm-> movq(temp3, temp1);
1747 __masm-> arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
1748 temp4, use64byteVector, L_entry, L_exit);
1749 }
1750 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1751 }
1752 address ucme_exit_pc = __masm-> pc();
1753 // When called from generic_arraycopy r11 contains specific values
1754 // used during arraycopy epilogue, re-initializing r11.
1755 if(is_oop) {
1756 __masm-> movq(r11, count);
1757 }
1758 bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
1759 restore_argument_regs(type);
1760 inc_counter_np(get_profile_ctr(shift))masm-> block_comment("inc_counter " "get_profile_ctr(shift)"
); inc_counter_np_(get_profile_ctr(shift));
; // Update counter after rscratch1 is free
1761 __masm-> xorptr(rax, rax); // return 0
1762 __masm-> vzeroupper();
1763 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1764 __masm-> ret(0);
1765 return start;
1766 }
1767#endif // COMPILER2_OR_JVMCI
1768
1769
1770 // Arguments:
1771 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1772 // ignored
1773 // name - stub name string
1774 //
1775 // Inputs:
1776 // c_rarg0 - source array address
1777 // c_rarg1 - destination array address
1778 // c_rarg2 - element count, treated as ssize_t, can be zero
1779 //
1780 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
1781 // we let the hardware handle it. The one to eight bytes within words,
1782 // dwords or qwords that span cache line boundaries will still be loaded
1783 // and stored atomically.
1784 //
1785 // Side Effects:
1786 // disjoint_byte_copy_entry is set to the no-overlap entry point
1787 // used by generate_conjoint_byte_copy().
1788 //
1789 address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
1790#if COMPILER2_OR_JVMCI1
1791 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
1792 return generate_disjoint_copy_avx3_masked(entry, "jbyte_disjoint_arraycopy_avx3", 0,
1793 aligned, false, false);
1794 }
1795#endif
1796 __masm-> align(CodeEntryAlignment);
1797 StubCodeMark mark(this, "StubRoutines", name);
1798 address start = __masm-> pc();
1799
1800 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
1801 Label L_copy_byte, L_exit;
1802 const Register from = rdi; // source array address
1803 const Register to = rsi; // destination array address
1804 const Register count = rdx; // elements count
1805 const Register byte_count = rcx;
1806 const Register qword_count = count;
1807 const Register end_from = from; // source array end address
1808 const Register end_to = to; // destination array end address
1809 // End pointers are inclusive, and if count is not zero they point
1810 // to the last unit copied: end_to[0] := end_from[0]
1811
1812 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1813 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1814
1815 if (entry != NULL__null) {
1816 *entry = __masm-> pc();
1817 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1818 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1819 }
1820
1821 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1822 // r9 and r10 may be used to save non-volatile registers
1823
1824 {
1825 // UnsafeCopyMemory page error: continue after ucm
1826 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1827 // 'from', 'to' and 'count' are now valid
1828 __masm-> movptr(byte_count, count);
1829 __masm-> shrptr(count, 3); // count => qword_count
1830
1831 // Copy from low to high addresses. Use 'to' as scratch.
1832 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
1833 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
1834 __masm-> negptr(qword_count); // make the count negative
1835 __masm-> jmp(L_copy_bytes);
1836
1837 // Copy trailing qwords
1838 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
1839 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
1840 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
1841 __masm-> increment(qword_count);
1842 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
1843
1844 // Check for and copy trailing dword
1845 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
1846 __masm-> testl(byte_count, 4);
1847 __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1847)
;
1848 __masm-> movl(rax, Address(end_from, 8));
1849 __masm-> movl(Address(end_to, 8), rax);
1850
1851 __masm-> addptr(end_from, 4);
1852 __masm-> addptr(end_to, 4);
1853
1854 // Check for and copy trailing word
1855 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
1856 __masm-> testl(byte_count, 2);
1857 __masm-> jccb(Assembler::zero, L_copy_byte)jccb_0(Assembler::zero, L_copy_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1857)
;
1858 __masm-> movw(rax, Address(end_from, 8));
1859 __masm-> movw(Address(end_to, 8), rax);
1860
1861 __masm-> addptr(end_from, 2);
1862 __masm-> addptr(end_to, 2);
1863
1864 // Check for and copy trailing byte
1865 __masm-> BIND(L_copy_byte)bind(L_copy_byte); masm-> block_comment("L_copy_byte" ":");
1866 __masm-> testl(byte_count, 1);
1867 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 1867)
;
1868 __masm-> movb(rax, Address(end_from, 8));
1869 __masm-> movb(Address(end_to, 8), rax);
1870 }
1871 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
1872 address ucme_exit_pc = __masm-> pc();
1873 restore_arg_regs();
1874 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1875 __masm-> xorptr(rax, rax); // return 0
1876 __masm-> vzeroupper();
1877 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1878 __masm-> ret(0);
1879
1880 {
1881 UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
1882 // Copy in multi-bytes chunks
1883 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1884 __masm-> jmp(L_copy_4_bytes);
1885 }
1886 return start;
1887 }
1888
1889 // Arguments:
1890 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1891 // ignored
1892 // name - stub name string
1893 //
1894 // Inputs:
1895 // c_rarg0 - source array address
1896 // c_rarg1 - destination array address
1897 // c_rarg2 - element count, treated as ssize_t, can be zero
1898 //
1899 // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
1900 // we let the hardware handle it. The one to eight bytes within words,
1901 // dwords or qwords that span cache line boundaries will still be loaded
1902 // and stored atomically.
1903 //
1904 address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
1905 address* entry, const char *name) {
1906#if COMPILER2_OR_JVMCI1
1907 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
1908 return generate_conjoint_copy_avx3_masked(entry, "jbyte_conjoint_arraycopy_avx3", 0,
1909 nooverlap_target, aligned, false, false);
1910 }
1911#endif
1912 __masm-> align(CodeEntryAlignment);
1913 StubCodeMark mark(this, "StubRoutines", name);
1914 address start = __masm-> pc();
1915
1916 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
1917 const Register from = rdi; // source array address
1918 const Register to = rsi; // destination array address
1919 const Register count = rdx; // elements count
1920 const Register byte_count = rcx;
1921 const Register qword_count = count;
1922
1923 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
1924 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
1925
1926 if (entry != NULL__null) {
1927 *entry = __masm-> pc();
1928 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
1929 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
1930 }
1931
1932 array_overlap_test(nooverlap_target, Address::times_1);
1933 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
1934 // r9 and r10 may be used to save non-volatile registers
1935
1936 {
1937 // UnsafeCopyMemory page error: continue after ucm
1938 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1939 // 'from', 'to' and 'count' are now valid
1940 __masm-> movptr(byte_count, count);
1941 __masm-> shrptr(count, 3); // count => qword_count
1942
1943 // Copy from high to low addresses.
1944
1945 // Check for and copy trailing byte
1946 __masm-> testl(byte_count, 1);
1947 __masm-> jcc(Assembler::zero, L_copy_2_bytes);
1948 __masm-> movb(rax, Address(from, byte_count, Address::times_1, -1));
1949 __masm-> movb(Address(to, byte_count, Address::times_1, -1), rax);
1950 __masm-> decrement(byte_count); // Adjust for possible trailing word
1951
1952 // Check for and copy trailing word
1953 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
1954 __masm-> testl(byte_count, 2);
1955 __masm-> jcc(Assembler::zero, L_copy_4_bytes);
1956 __masm-> movw(rax, Address(from, byte_count, Address::times_1, -2));
1957 __masm-> movw(Address(to, byte_count, Address::times_1, -2), rax);
1958
1959 // Check for and copy trailing dword
1960 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
1961 __masm-> testl(byte_count, 4);
1962 __masm-> jcc(Assembler::zero, L_copy_bytes);
1963 __masm-> movl(rax, Address(from, qword_count, Address::times_8));
1964 __masm-> movl(Address(to, qword_count, Address::times_8), rax);
1965 __masm-> jmp(L_copy_bytes);
1966
1967 // Copy trailing qwords
1968 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
1969 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
1970 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
1971 __masm-> decrement(qword_count);
1972 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
1973 }
1974 restore_arg_regs();
1975 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1976 __masm-> xorptr(rax, rax); // return 0
1977 __masm-> vzeroupper();
1978 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1979 __masm-> ret(0);
1980
1981 {
1982 // UnsafeCopyMemory page error: continue after ucm
1983 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
1984 // Copy in multi-bytes chunks
1985 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
1986 }
1987 restore_arg_regs();
1988 inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jbyte_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jbyte_array_copy_ctr);
; // Update counter after rscratch1 is free
1989 __masm-> xorptr(rax, rax); // return 0
1990 __masm-> vzeroupper();
1991 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
1992 __masm-> ret(0);
1993
1994 return start;
1995 }
1996
1997 // Arguments:
1998 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
1999 // ignored
2000 // name - stub name string
2001 //
2002 // Inputs:
2003 // c_rarg0 - source array address
2004 // c_rarg1 - destination array address
2005 // c_rarg2 - element count, treated as ssize_t, can be zero
2006 //
2007 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
2008 // let the hardware handle it. The two or four words within dwords
2009 // or qwords that span cache line boundaries will still be loaded
2010 // and stored atomically.
2011 //
2012 // Side Effects:
2013 // disjoint_short_copy_entry is set to the no-overlap entry point
2014 // used by generate_conjoint_short_copy().
2015 //
2016 address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
2017#if COMPILER2_OR_JVMCI1
2018 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2019 return generate_disjoint_copy_avx3_masked(entry, "jshort_disjoint_arraycopy_avx3", 1,
2020 aligned, false, false);
2021 }
2022#endif
2023
2024 __masm-> align(CodeEntryAlignment);
2025 StubCodeMark mark(this, "StubRoutines", name);
2026 address start = __masm-> pc();
2027
2028 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
2029 const Register from = rdi; // source array address
2030 const Register to = rsi; // destination array address
2031 const Register count = rdx; // elements count
2032 const Register word_count = rcx;
2033 const Register qword_count = count;
2034 const Register end_from = from; // source array end address
2035 const Register end_to = to; // destination array end address
2036 // End pointers are inclusive, and if count is not zero they point
2037 // to the last unit copied: end_to[0] := end_from[0]
2038
2039 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2040 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2041
2042 if (entry != NULL__null) {
2043 *entry = __masm-> pc();
2044 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2045 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2046 }
2047
2048 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2049 // r9 and r10 may be used to save non-volatile registers
2050
2051 {
2052 // UnsafeCopyMemory page error: continue after ucm
2053 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2054 // 'from', 'to' and 'count' are now valid
2055 __masm-> movptr(word_count, count);
2056 __masm-> shrptr(count, 2); // count => qword_count
2057
2058 // Copy from low to high addresses. Use 'to' as scratch.
2059 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2060 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2061 __masm-> negptr(qword_count);
2062 __masm-> jmp(L_copy_bytes);
2063
2064 // Copy trailing qwords
2065 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2066 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2067 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2068 __masm-> increment(qword_count);
2069 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2070
2071 // Original 'dest' is trashed, so we can't use it as a
2072 // base register for a possible trailing word copy
2073
2074 // Check for and copy trailing dword
2075 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2076 __masm-> testl(word_count, 2);
2077 __masm-> jccb(Assembler::zero, L_copy_2_bytes)jccb_0(Assembler::zero, L_copy_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2077)
;
2078 __masm-> movl(rax, Address(end_from, 8));
2079 __masm-> movl(Address(end_to, 8), rax);
2080
2081 __masm-> addptr(end_from, 4);
2082 __masm-> addptr(end_to, 4);
2083
2084 // Check for and copy trailing word
2085 __masm-> BIND(L_copy_2_bytes)bind(L_copy_2_bytes); masm-> block_comment("L_copy_2_bytes"
":")
;
2086 __masm-> testl(word_count, 1);
2087 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2087)
;
2088 __masm-> movw(rax, Address(end_from, 8));
2089 __masm-> movw(Address(end_to, 8), rax);
2090 }
2091 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2092 address ucme_exit_pc = __masm-> pc();
2093 restore_arg_regs();
2094 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2095 __masm-> xorptr(rax, rax); // return 0
2096 __masm-> vzeroupper();
2097 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2098 __masm-> ret(0);
2099
2100 {
2101 UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
2102 // Copy in multi-bytes chunks
2103 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2104 __masm-> jmp(L_copy_4_bytes);
2105 }
2106
2107 return start;
2108 }
2109
2110 address generate_fill(BasicType t, bool aligned, const char *name) {
2111 __masm-> align(CodeEntryAlignment);
2112 StubCodeMark mark(this, "StubRoutines", name);
2113 address start = __masm-> pc();
2114
2115 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2116
2117 const Register to = c_rarg0; // destination array address
2118 const Register value = c_rarg1; // value
2119 const Register count = c_rarg2; // elements count
2120 __masm-> mov(r11, count);
2121
2122 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2123
2124 __masm-> generate_fill(t, aligned, to, value, r11, rax, xmm0);
2125
2126 __masm-> vzeroupper();
2127 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2128 __masm-> ret(0);
2129 return start;
2130 }
2131
2132 // Arguments:
2133 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2134 // ignored
2135 // name - stub name string
2136 //
2137 // Inputs:
2138 // c_rarg0 - source array address
2139 // c_rarg1 - destination array address
2140 // c_rarg2 - element count, treated as ssize_t, can be zero
2141 //
2142 // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
2143 // let the hardware handle it. The two or four words within dwords
2144 // or qwords that span cache line boundaries will still be loaded
2145 // and stored atomically.
2146 //
2147 address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
2148 address *entry, const char *name) {
2149#if COMPILER2_OR_JVMCI1
2150 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2151 return generate_conjoint_copy_avx3_masked(entry, "jshort_conjoint_arraycopy_avx3", 1,
2152 nooverlap_target, aligned, false, false);
2153 }
2154#endif
2155 __masm-> align(CodeEntryAlignment);
2156 StubCodeMark mark(this, "StubRoutines", name);
2157 address start = __masm-> pc();
2158
2159 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
2160 const Register from = rdi; // source array address
2161 const Register to = rsi; // destination array address
2162 const Register count = rdx; // elements count
2163 const Register word_count = rcx;
2164 const Register qword_count = count;
2165
2166 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2167 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2168
2169 if (entry != NULL__null) {
2170 *entry = __masm-> pc();
2171 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2172 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2173 }
2174
2175 array_overlap_test(nooverlap_target, Address::times_2);
2176 setup_arg_regs(); // from => rdi, to => rsi, count => rdx
2177 // r9 and r10 may be used to save non-volatile registers
2178
2179 {
2180 // UnsafeCopyMemory page error: continue after ucm
2181 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2182 // 'from', 'to' and 'count' are now valid
2183 __masm-> movptr(word_count, count);
2184 __masm-> shrptr(count, 2); // count => qword_count
2185
2186 // Copy from high to low addresses. Use 'to' as scratch.
2187
2188 // Check for and copy trailing word
2189 __masm-> testl(word_count, 1);
2190 __masm-> jccb(Assembler::zero, L_copy_4_bytes)jccb_0(Assembler::zero, L_copy_4_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2190)
;
2191 __masm-> movw(rax, Address(from, word_count, Address::times_2, -2));
2192 __masm-> movw(Address(to, word_count, Address::times_2, -2), rax);
2193
2194 // Check for and copy trailing dword
2195 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2196 __masm-> testl(word_count, 2);
2197 __masm-> jcc(Assembler::zero, L_copy_bytes);
2198 __masm-> movl(rax, Address(from, qword_count, Address::times_8));
2199 __masm-> movl(Address(to, qword_count, Address::times_8), rax);
2200 __masm-> jmp(L_copy_bytes);
2201
2202 // Copy trailing qwords
2203 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2204 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2205 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2206 __masm-> decrement(qword_count);
2207 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2208 }
2209 restore_arg_regs();
2210 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2211 __masm-> xorptr(rax, rax); // return 0
2212 __masm-> vzeroupper();
2213 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2214 __masm-> ret(0);
2215
2216 {
2217 // UnsafeCopyMemory page error: continue after ucm
2218 UnsafeCopyMemoryMark ucmm(this, !aligned, true);
2219 // Copy in multi-bytes chunks
2220 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2221 }
2222 restore_arg_regs();
2223 inc_counter_np(SharedRuntime::_jshort_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jshort_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jshort_array_copy_ctr);
; // Update counter after rscratch1 is free
2224 __masm-> xorptr(rax, rax); // return 0
2225 __masm-> vzeroupper();
2226 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2227 __masm-> ret(0);
2228
2229 return start;
2230 }
2231
2232 // Arguments:
2233 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2234 // ignored
2235 // is_oop - true => oop array, so generate store check code
2236 // name - stub name string
2237 //
2238 // Inputs:
2239 // c_rarg0 - source array address
2240 // c_rarg1 - destination array address
2241 // c_rarg2 - element count, treated as ssize_t, can be zero
2242 //
2243 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
2244 // the hardware handle it. The two dwords within qwords that span
2245 // cache line boundaries will still be loaded and stored atomicly.
2246 //
2247 // Side Effects:
2248 // disjoint_int_copy_entry is set to the no-overlap entry point
2249 // used by generate_conjoint_int_oop_copy().
2250 //
2251 address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
2252 const char *name, bool dest_uninitialized = false) {
2253#if COMPILER2_OR_JVMCI1
2254 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2255 return generate_disjoint_copy_avx3_masked(entry, "jint_disjoint_arraycopy_avx3", 2,
2256 aligned, is_oop, dest_uninitialized);
2257 }
2258#endif
2259
2260 __masm-> align(CodeEntryAlignment);
2261 StubCodeMark mark(this, "StubRoutines", name);
2262 address start = __masm-> pc();
2263
2264 Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_exit;
2265 const Register from = rdi; // source array address
2266 const Register to = rsi; // destination array address
2267 const Register count = rdx; // elements count
2268 const Register dword_count = rcx;
2269 const Register qword_count = count;
2270 const Register end_from = from; // source array end address
2271 const Register end_to = to; // destination array end address
2272 // End pointers are inclusive, and if count is not zero they point
2273 // to the last unit copied: end_to[0] := end_from[0]
2274
2275 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2276 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2277
2278 if (entry != NULL__null) {
2279 *entry = __masm-> pc();
2280 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2281 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2282 }
2283
2284 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2285 // r9 is used to save r15_thread
2286
2287 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2288 if (dest_uninitialized) {
2289 decorators |= IS_DEST_UNINITIALIZED;
2290 }
2291 if (aligned) {
2292 decorators |= ARRAYCOPY_ALIGNED;
2293 }
2294
2295 BasicType type = is_oop ? T_OBJECT : T_INT;
2296 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2297 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2298
2299 {
2300 // UnsafeCopyMemory page error: continue after ucm
2301 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2302 // 'from', 'to' and 'count' are now valid
2303 __masm-> movptr(dword_count, count);
2304 __masm-> shrptr(count, 1); // count => qword_count
2305
2306 // Copy from low to high addresses. Use 'to' as scratch.
2307 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2308 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2309 __masm-> negptr(qword_count);
2310 __masm-> jmp(L_copy_bytes);
2311
2312 // Copy trailing qwords
2313 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2314 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2315 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2316 __masm-> increment(qword_count);
2317 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2318
2319 // Check for and copy trailing dword
2320 __masm-> BIND(L_copy_4_bytes)bind(L_copy_4_bytes); masm-> block_comment("L_copy_4_bytes"
":")
;
2321 __masm-> testl(dword_count, 1); // Only byte test since the value is 0 or 1
2322 __masm-> jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2322)
;
2323 __masm-> movl(rax, Address(end_from, 8));
2324 __masm-> movl(Address(end_to, 8), rax);
2325 }
2326 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2327 address ucme_exit_pc = __masm-> pc();
2328 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2329 restore_arg_regs_using_thread();
2330 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2331 __masm-> vzeroupper();
2332 __masm-> xorptr(rax, rax); // return 0
2333 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2334 __masm-> ret(0);
2335
2336 {
2337 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
2338 // Copy in multi-bytes chunks
2339 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2340 __masm-> jmp(L_copy_4_bytes);
2341 }
2342
2343 return start;
2344 }
2345
2346 // Arguments:
2347 // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
2348 // ignored
2349 // is_oop - true => oop array, so generate store check code
2350 // name - stub name string
2351 //
2352 // Inputs:
2353 // c_rarg0 - source array address
2354 // c_rarg1 - destination array address
2355 // c_rarg2 - element count, treated as ssize_t, can be zero
2356 //
2357 // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
2358 // the hardware handle it. The two dwords within qwords that span
2359 // cache line boundaries will still be loaded and stored atomicly.
2360 //
2361 address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
2362 address *entry, const char *name,
2363 bool dest_uninitialized = false) {
2364#if COMPILER2_OR_JVMCI1
2365 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2366 return generate_conjoint_copy_avx3_masked(entry, "jint_conjoint_arraycopy_avx3", 2,
2367 nooverlap_target, aligned, is_oop, dest_uninitialized);
2368 }
2369#endif
2370 __masm-> align(CodeEntryAlignment);
2371 StubCodeMark mark(this, "StubRoutines", name);
2372 address start = __masm-> pc();
2373
2374 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2375 const Register from = rdi; // source array address
2376 const Register to = rsi; // destination array address
2377 const Register count = rdx; // elements count
2378 const Register dword_count = rcx;
2379 const Register qword_count = count;
2380
2381 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2382 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2383
2384 if (entry != NULL__null) {
2385 *entry = __masm-> pc();
2386 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2387 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2388 }
2389
2390 array_overlap_test(nooverlap_target, Address::times_4);
2391 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2392 // r9 is used to save r15_thread
2393
2394 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
2395 if (dest_uninitialized) {
2396 decorators |= IS_DEST_UNINITIALIZED;
2397 }
2398 if (aligned) {
2399 decorators |= ARRAYCOPY_ALIGNED;
2400 }
2401
2402 BasicType type = is_oop ? T_OBJECT : T_INT;
2403 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2404 // no registers are destroyed by this call
2405 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2406
2407 assert_clean_int(count, rax); // Make sure 'count' is clean int.
2408 {
2409 // UnsafeCopyMemory page error: continue after ucm
2410 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2411 // 'from', 'to' and 'count' are now valid
2412 __masm-> movptr(dword_count, count);
2413 __masm-> shrptr(count, 1); // count => qword_count
2414
2415 // Copy from high to low addresses. Use 'to' as scratch.
2416
2417 // Check for and copy trailing dword
2418 __masm-> testl(dword_count, 1);
2419 __masm-> jcc(Assembler::zero, L_copy_bytes);
2420 __masm-> movl(rax, Address(from, dword_count, Address::times_4, -4));
2421 __masm-> movl(Address(to, dword_count, Address::times_4, -4), rax);
2422 __masm-> jmp(L_copy_bytes);
2423
2424 // Copy trailing qwords
2425 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2426 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2427 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2428 __masm-> decrement(qword_count);
2429 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2430 }
2431 if (is_oop) {
2432 __masm-> jmp(L_exit);
2433 }
2434 restore_arg_regs_using_thread();
2435 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2436 __masm-> xorptr(rax, rax); // return 0
2437 __masm-> vzeroupper();
2438 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2439 __masm-> ret(0);
2440
2441 {
2442 // UnsafeCopyMemory page error: continue after ucm
2443 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2444 // Copy in multi-bytes chunks
2445 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2446 }
2447
2448 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2449 bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
2450 restore_arg_regs_using_thread();
2451 inc_counter_np(SharedRuntime::_jint_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jint_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jint_array_copy_ctr);
; // Update counter after rscratch1 is free
2452 __masm-> xorptr(rax, rax); // return 0
2453 __masm-> vzeroupper();
2454 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2455 __masm-> ret(0);
2456
2457 return start;
2458 }
2459
2460 // Arguments:
2461 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2462 // ignored
2463 // is_oop - true => oop array, so generate store check code
2464 // name - stub name string
2465 //
2466 // Inputs:
2467 // c_rarg0 - source array address
2468 // c_rarg1 - destination array address
2469 // c_rarg2 - element count, treated as ssize_t, can be zero
2470 //
2471 // Side Effects:
2472 // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
2473 // no-overlap entry point used by generate_conjoint_long_oop_copy().
2474 //
2475 address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
2476 const char *name, bool dest_uninitialized = false) {
2477#if COMPILER2_OR_JVMCI1
2478 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2479 return generate_disjoint_copy_avx3_masked(entry, "jlong_disjoint_arraycopy_avx3", 3,
2480 aligned, is_oop, dest_uninitialized);
2481 }
2482#endif
2483 __masm-> align(CodeEntryAlignment);
2484 StubCodeMark mark(this, "StubRoutines", name);
2485 address start = __masm-> pc();
2486
2487 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2488 const Register from = rdi; // source array address
2489 const Register to = rsi; // destination array address
2490 const Register qword_count = rdx; // elements count
2491 const Register end_from = from; // source array end address
2492 const Register end_to = rcx; // destination array end address
2493 const Register saved_count = r11;
2494 // End pointers are inclusive, and if count is not zero they point
2495 // to the last unit copied: end_to[0] := end_from[0]
2496
2497 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2498 // Save no-overlap entry point for generate_conjoint_long_oop_copy()
2499 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2500
2501 if (entry != NULL__null) {
2502 *entry = __masm-> pc();
2503 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2504 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2505 }
2506
2507 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2508 // r9 is used to save r15_thread
2509 // 'from', 'to' and 'qword_count' are now valid
2510
2511 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
2512 if (dest_uninitialized) {
2513 decorators |= IS_DEST_UNINITIALIZED;
2514 }
2515 if (aligned) {
2516 decorators |= ARRAYCOPY_ALIGNED;
2517 }
2518
2519 BasicType type = is_oop ? T_OBJECT : T_LONG;
2520 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2521 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2522 {
2523 // UnsafeCopyMemory page error: continue after ucm
2524 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2525
2526 // Copy from low to high addresses. Use 'to' as scratch.
2527 __masm-> lea(end_from, Address(from, qword_count, Address::times_8, -8));
2528 __masm-> lea(end_to, Address(to, qword_count, Address::times_8, -8));
2529 __masm-> negptr(qword_count);
2530 __masm-> jmp(L_copy_bytes);
2531
2532 // Copy trailing qwords
2533 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2534 __masm-> movq(rax, Address(end_from, qword_count, Address::times_8, 8));
2535 __masm-> movq(Address(end_to, qword_count, Address::times_8, 8), rax);
2536 __masm-> increment(qword_count);
2537 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2538 }
2539 if (is_oop) {
2540 __masm-> jmp(L_exit);
2541 } else {
2542 restore_arg_regs_using_thread();
2543 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2544 __masm-> xorptr(rax, rax); // return 0
2545 __masm-> vzeroupper();
2546 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2547 __masm-> ret(0);
2548 }
2549
2550 {
2551 // UnsafeCopyMemory page error: continue after ucm
2552 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2553 // Copy in multi-bytes chunks
2554 copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2555 }
2556
2557 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2558 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2559 restore_arg_regs_using_thread();
2560 if (is_oop) {
2561 inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);
; // Update counter after rscratch1 is free
2562 } else {
2563 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2564 }
2565 __masm-> vzeroupper();
2566 __masm-> xorptr(rax, rax); // return 0
2567 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2568 __masm-> ret(0);
2569
2570 return start;
2571 }
2572
2573 // Arguments:
2574 // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
2575 // ignored
2576 // is_oop - true => oop array, so generate store check code
2577 // name - stub name string
2578 //
2579 // Inputs:
2580 // c_rarg0 - source array address
2581 // c_rarg1 - destination array address
2582 // c_rarg2 - element count, treated as ssize_t, can be zero
2583 //
2584 address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
2585 address nooverlap_target, address *entry,
2586 const char *name, bool dest_uninitialized = false) {
2587#if COMPILER2_OR_JVMCI1
2588 if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
2589 return generate_conjoint_copy_avx3_masked(entry, "jlong_conjoint_arraycopy_avx3", 3,
2590 nooverlap_target, aligned, is_oop, dest_uninitialized);
2591 }
2592#endif
2593 __masm-> align(CodeEntryAlignment);
2594 StubCodeMark mark(this, "StubRoutines", name);
2595 address start = __masm-> pc();
2596
2597 Label L_copy_bytes, L_copy_8_bytes, L_exit;
2598 const Register from = rdi; // source array address
2599 const Register to = rsi; // destination array address
2600 const Register qword_count = rdx; // elements count
2601 const Register saved_count = rcx;
2602
2603 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2604 assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
2605
2606 if (entry != NULL__null) {
2607 *entry = __masm-> pc();
2608 // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
2609 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2610 }
2611
2612 array_overlap_test(nooverlap_target, Address::times_8);
2613 setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
2614 // r9 is used to save r15_thread
2615 // 'from', 'to' and 'qword_count' are now valid
2616
2617 DecoratorSet decorators = IN_HEAP | IS_ARRAY;
2618 if (dest_uninitialized) {
2619 decorators |= IS_DEST_UNINITIALIZED;
2620 }
2621 if (aligned) {
2622 decorators |= ARRAYCOPY_ALIGNED;
2623 }
2624
2625 BasicType type = is_oop ? T_OBJECT : T_LONG;
2626 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2627 bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
2628 {
2629 // UnsafeCopyMemory page error: continue after ucm
2630 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2631
2632 __masm-> jmp(L_copy_bytes);
2633
2634 // Copy trailing qwords
2635 __masm-> BIND(L_copy_8_bytes)bind(L_copy_8_bytes); masm-> block_comment("L_copy_8_bytes"
":")
;
2636 __masm-> movq(rax, Address(from, qword_count, Address::times_8, -8));
2637 __masm-> movq(Address(to, qword_count, Address::times_8, -8), rax);
2638 __masm-> decrement(qword_count);
2639 __masm-> jcc(Assembler::notZero, L_copy_8_bytes);
2640 }
2641 if (is_oop) {
2642 __masm-> jmp(L_exit);
2643 } else {
2644 restore_arg_regs_using_thread();
2645 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2646 __masm-> xorptr(rax, rax); // return 0
2647 __masm-> vzeroupper();
2648 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2649 __masm-> ret(0);
2650 }
2651 {
2652 // UnsafeCopyMemory page error: continue after ucm
2653 UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
2654
2655 // Copy in multi-bytes chunks
2656 copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
2657 }
2658 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
2659 bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
2660 restore_arg_regs_using_thread();
2661 if (is_oop) {
2662 inc_counter_np(SharedRuntime::_oop_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_oop_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_oop_array_copy_ctr);
; // Update counter after rscratch1 is free
2663 } else {
2664 inc_counter_np(SharedRuntime::_jlong_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_jlong_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_jlong_array_copy_ctr);
; // Update counter after rscratch1 is free
2665 }
2666 __masm-> vzeroupper();
2667 __masm-> xorptr(rax, rax); // return 0
2668 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2669 __masm-> ret(0);
2670
2671 return start;
2672 }
2673
2674
2675 // Helper for generating a dynamic type check.
2676 // Smashes no registers.
2677 void generate_type_check(Register sub_klass,
2678 Register super_check_offset,
2679 Register super_klass,
2680 Label& L_success) {
2681 assert_different_registers(sub_klass, super_check_offset, super_klass);
2682
2683 BLOCK_COMMENT("type_check:")masm-> block_comment("type_check:");
2684
2685 Label L_miss;
2686
2687 __masm-> check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL__null,
2688 super_check_offset);
2689 __masm-> check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL__null);
2690
2691 // Fall through on failure!
2692 __masm-> BIND(L_miss)bind(L_miss); masm-> block_comment("L_miss" ":");
2693 }
2694
2695 //
2696 // Generate checkcasting array copy stub
2697 //
2698 // Input:
2699 // c_rarg0 - source array address
2700 // c_rarg1 - destination array address
2701 // c_rarg2 - element count, treated as ssize_t, can be zero
2702 // c_rarg3 - size_t ckoff (super_check_offset)
2703 // not Win64
2704 // c_rarg4 - oop ckval (super_klass)
2705 // Win64
2706 // rsp+40 - oop ckval (super_klass)
2707 //
2708 // Output:
2709 // rax == 0 - success
2710 // rax == -1^K - failure, where K is partial transfer count
2711 //
2712 address generate_checkcast_copy(const char *name, address *entry,
2713 bool dest_uninitialized = false) {
2714
2715 Label L_load_element, L_store_element, L_do_card_marks, L_done;
2716
2717 // Input registers (after setup_arg_regs)
2718 const Register from = rdi; // source array address
2719 const Register to = rsi; // destination array address
2720 const Register length = rdx; // elements count
2721 const Register ckoff = rcx; // super_check_offset
2722 const Register ckval = r8; // super_klass
2723
2724 // Registers used as temps (r13, r14 are save-on-entry)
2725 const Register end_from = from; // source array end address
2726 const Register end_to = r13; // destination array end address
2727 const Register count = rdx; // -(count_remaining)
2728 const Register r14_length = r14; // saved copy of length
2729 // End pointers are inclusive, and if length is not zero they point
2730 // to the last unit copied: end_to[0] := end_from[0]
2731
2732 const Register rax_oop = rax; // actual oop copied
2733 const Register r11_klass = r11; // oop._klass
2734
2735 //---------------------------------------------------------------
2736 // Assembler stub will be used for this call to arraycopy
2737 // if the two arrays are subtypes of Object[] but the
2738 // destination array type is not equal to or a supertype
2739 // of the source type. Each element must be separately
2740 // checked.
2741
2742 __masm-> align(CodeEntryAlignment);
2743 StubCodeMark mark(this, "StubRoutines", name);
2744 address start = __masm-> pc();
2745
2746 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2747
2748#ifdef ASSERT1
2749 // caller guarantees that the arrays really are different
2750 // otherwise, we would have to make conjoint checks
2751 { Label L;
2752 array_overlap_test(L, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8));
2753 __masm-> stop("checkcast_copy within a single array");
2754 __masm-> bind(L);
2755 }
2756#endif //ASSERT
2757
2758 setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
2759 // ckoff => rcx, ckval => r8
2760 // r9 and r10 may be used to save non-volatile registers
2761#ifdef _WIN64
2762 // last argument (#4) is on stack on Win64
2763 __masm-> movptr(ckval, Address(rsp, 6 * wordSize));
2764#endif
2765
2766 // Caller of this entry point must set up the argument registers.
2767 if (entry != NULL__null) {
2768 *entry = __masm-> pc();
2769 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
2770 }
2771
2772 // allocate spill slots for r13, r14
2773 enum {
2774 saved_r13_offset,
2775 saved_r14_offset,
2776 saved_r10_offset,
2777 saved_rbp_offset
2778 };
2779 __masm-> subptr(rsp, saved_rbp_offset * wordSize);
2780 __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13);
2781 __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14);
2782 __masm-> movptr(Address(rsp, saved_r10_offset * wordSize), r10);
2783
2784#ifdef ASSERT1
2785 Label L2;
2786 __masm-> get_thread(r14);
2787 __masm-> cmpptr(r15_thread, r14);
2788 __masm-> jcc(Assembler::equal, L2);
2789 __masm-> stop("StubRoutines::call_stub: r15_thread is modified by call");
2790 __masm-> bind(L2);
2791#endif // ASSERT
2792
2793 // check that int operands are properly extended to size_t
2794 assert_clean_int(length, rax);
2795 assert_clean_int(ckoff, rax);
2796
2797#ifdef ASSERT1
2798 BLOCK_COMMENT("assert consistent ckoff/ckval")masm-> block_comment("assert consistent ckoff/ckval");
2799 // The ckoff and ckval must be mutually consistent,
2800 // even though caller generates both.
2801 { Label L;
2802 int sco_offset = in_bytes(Klass::super_check_offset_offset());
2803 __masm-> cmpl(ckoff, Address(ckval, sco_offset));
2804 __masm-> jcc(Assembler::equal, L);
2805 __masm-> stop("super_check_offset inconsistent");
2806 __masm-> bind(L);
2807 }
2808#endif //ASSERT
2809
2810 // Loop-invariant addresses. They are exclusive end pointers.
2811 Address end_from_addr(from, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2812 Address end_to_addr(to, length, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2813 // Loop-variant addresses. They assume post-incremented count < 0.
2814 Address from_element_addr(end_from, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2815 Address to_element_addr(end_to, count, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8), 0);
2816
2817 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
2818 if (dest_uninitialized) {
2819 decorators |= IS_DEST_UNINITIALIZED;
2820 }
2821
2822 BasicType type = T_OBJECT;
2823 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2824 bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
2825
2826 // Copy from low to high addresses, indexed from the end of each array.
2827 __masm-> lea(end_from, end_from_addr);
2828 __masm-> lea(end_to, end_to_addr);
2829 __masm-> movptr(r14_length, length); // save a copy of the length
2830 assert(length == count, "")do { if (!(length == count)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2830, "assert(" "length == count" ") failed", ""); ::breakpoint
(); } } while (0)
; // else fix next line:
2831 __masm-> negptr(count); // negate and test the length
2832 __masm-> jcc(Assembler::notZero, L_load_element);
2833
2834 // Empty array: Nothing to do.
2835 __masm-> xorptr(rax, rax); // return 0 on (trivial) success
2836 __masm-> jmp(L_done);
2837
2838 // ======== begin loop ========
2839 // (Loop is rotated; its entry is L_load_element.)
2840 // Loop control:
2841 // for (count = -count; count != 0; count++)
2842 // Base pointers src, dst are biased by 8*(count-1),to last element.
2843 __masm-> align(OptoLoopAlignment);
2844
2845 __masm-> BIND(L_store_element)bind(L_store_element); masm-> block_comment("L_store_element"
":")
;
2846 __masm-> store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW); // store the oop
2847 __masm-> increment(count); // increment the count toward zero
2848 __masm-> jcc(Assembler::zero, L_do_card_marks);
2849
2850 // ======== loop entry is here ========
2851 __masm-> BIND(L_load_element)bind(L_load_element); masm-> block_comment("L_load_element"
":")
;
2852 __masm-> load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop
2853 __masm-> testptr(rax_oop, rax_oop);
2854 __masm-> jcc(Assembler::zero, L_store_element);
2855
2856 __masm-> load_klass(r11_klass, rax_oop, rscratch1);// query the object klass
2857 generate_type_check(r11_klass, ckoff, ckval, L_store_element);
2858 // ======== end loop ========
2859
2860 // It was a real error; we must depend on the caller to finish the job.
2861 // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
2862 // Emit GC store barriers for the oops we have copied (r14 + rdx),
2863 // and report their number to the caller.
2864 assert_different_registers(rax, r14_length, count, to, end_to, rcx, rscratch1);
2865 Label L_post_barrier;
2866 __masm-> addptr(r14_length, count); // K = (original - remaining) oops
2867 __masm-> movptr(rax, r14_length); // save the value
2868 __masm-> notptr(rax); // report (-1^K) to caller (does not affect flags)
2869 __masm-> jccb(Assembler::notZero, L_post_barrier)jccb_0(Assembler::notZero, L_post_barrier, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2869)
;
2870 __masm-> jmp(L_done); // K == 0, nothing was copied, skip post barrier
2871
2872 // Come here on success only.
2873 __masm-> BIND(L_do_card_marks)bind(L_do_card_marks); masm-> block_comment("L_do_card_marks"
":")
;
2874 __masm-> xorptr(rax, rax); // return 0 on success
2875
2876 __masm-> BIND(L_post_barrier)bind(L_post_barrier); masm-> block_comment("L_post_barrier"
":")
;
2877 bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
2878
2879 // Common exit point (success or failure).
2880 __masm-> BIND(L_done)bind(L_done); masm-> block_comment("L_done" ":");
2881 __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize));
2882 __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize));
2883 __masm-> movptr(r10, Address(rsp, saved_r10_offset * wordSize));
2884 restore_arg_regs();
2885 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_checkcast_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_checkcast_array_copy_ctr);
; // Update counter after rscratch1 is free
2886 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
2887 __masm-> ret(0);
2888
2889 return start;
2890 }
2891
2892 //
2893 // Generate 'unsafe' array copy stub
2894 // Though just as safe as the other stubs, it takes an unscaled
2895 // size_t argument instead of an element count.
2896 //
2897 // Input:
2898 // c_rarg0 - source array address
2899 // c_rarg1 - destination array address
2900 // c_rarg2 - byte count, treated as ssize_t, can be zero
2901 //
2902 // Examines the alignment of the operands and dispatches
2903 // to a long, int, short, or byte copy loop.
2904 //
2905 address generate_unsafe_copy(const char *name,
2906 address byte_copy_entry, address short_copy_entry,
2907 address int_copy_entry, address long_copy_entry) {
2908
2909 Label L_long_aligned, L_int_aligned, L_short_aligned;
2910
2911 // Input registers (before setup_arg_regs)
2912 const Register from = c_rarg0; // source array address
2913 const Register to = c_rarg1; // destination array address
2914 const Register size = c_rarg2; // byte count (size_t)
2915
2916 // Register used as a temp
2917 const Register bits = rax; // test copy of low bits
2918
2919 __masm-> align(CodeEntryAlignment);
2920 StubCodeMark mark(this, "StubRoutines", name);
2921 address start = __masm-> pc();
2922
2923 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
2924
2925 // bump this on entry, not on exit:
2926 inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_unsafe_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_unsafe_array_copy_ctr);
;
2927
2928 __masm-> mov(bits, from);
2929 __masm-> orptr(bits, to);
2930 __masm-> orptr(bits, size);
2931
2932 __masm-> testb(bits, BytesPerLong-1);
2933 __masm-> jccb(Assembler::zero, L_long_aligned)jccb_0(Assembler::zero, L_long_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2933)
;
2934
2935 __masm-> testb(bits, BytesPerInt-1);
2936 __masm-> jccb(Assembler::zero, L_int_aligned)jccb_0(Assembler::zero, L_int_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 2936)
;
2937
2938 __masm-> testb(bits, BytesPerShort-1);
2939 __masm-> jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
2940
2941 __masm-> BIND(L_short_aligned)bind(L_short_aligned); masm-> block_comment("L_short_aligned"
":")
;
2942 __masm-> shrptr(size, LogBytesPerShort); // size => short_count
2943 __masm-> jump(RuntimeAddress(short_copy_entry));
2944
2945 __masm-> BIND(L_int_aligned)bind(L_int_aligned); masm-> block_comment("L_int_aligned" ":"
)
;
2946 __masm-> shrptr(size, LogBytesPerInt); // size => int_count
2947 __masm-> jump(RuntimeAddress(int_copy_entry));
2948
2949 __masm-> BIND(L_long_aligned)bind(L_long_aligned); masm-> block_comment("L_long_aligned"
":")
;
2950 __masm-> shrptr(size, LogBytesPerLong); // size => qword_count
2951 __masm-> jump(RuntimeAddress(long_copy_entry));
2952
2953 return start;
2954 }
2955
2956 // Perform range checks on the proposed arraycopy.
2957 // Kills temp, but nothing else.
2958 // Also, clean the sign bits of src_pos and dst_pos.
2959 void arraycopy_range_checks(Register src, // source array oop (c_rarg0)
2960 Register src_pos, // source position (c_rarg1)
2961 Register dst, // destination array oo (c_rarg2)
2962 Register dst_pos, // destination position (c_rarg3)
2963 Register length,
2964 Register temp,
2965 Label& L_failed) {
2966 BLOCK_COMMENT("arraycopy_range_checks:")masm-> block_comment("arraycopy_range_checks:");
2967
2968 // if (src_pos + length > arrayOop(src)->length()) FAIL;
2969 __masm-> movl(temp, length);
2970 __masm-> addl(temp, src_pos); // src_pos + length
2971 __masm-> cmpl(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
2972 __masm-> jcc(Assembler::above, L_failed);
2973
2974 // if (dst_pos + length > arrayOop(dst)->length()) FAIL;
2975 __masm-> movl(temp, length);
2976 __masm-> addl(temp, dst_pos); // dst_pos + length
2977 __masm-> cmpl(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
2978 __masm-> jcc(Assembler::above, L_failed);
2979
2980 // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
2981 // Move with sign extension can be used since they are positive.
2982 __masm-> movslq(src_pos, src_pos);
2983 __masm-> movslq(dst_pos, dst_pos);
2984
2985 BLOCK_COMMENT("arraycopy_range_checks done")masm-> block_comment("arraycopy_range_checks done");
2986 }
2987
2988 //
2989 // Generate generic array copy stubs
2990 //
2991 // Input:
2992 // c_rarg0 - src oop
2993 // c_rarg1 - src_pos (32-bits)
2994 // c_rarg2 - dst oop
2995 // c_rarg3 - dst_pos (32-bits)
2996 // not Win64
2997 // c_rarg4 - element count (32-bits)
2998 // Win64
2999 // rsp+40 - element count (32-bits)
3000 //
3001 // Output:
3002 // rax == 0 - success
3003 // rax == -1^K - failure, where K is partial transfer count
3004 //
3005 address generate_generic_copy(const char *name,
3006 address byte_copy_entry, address short_copy_entry,
3007 address int_copy_entry, address oop_copy_entry,
3008 address long_copy_entry, address checkcast_copy_entry) {
3009
3010 Label L_failed, L_failed_0, L_objArray;
3011 Label L_copy_shorts, L_copy_ints, L_copy_longs;
3012
3013 // Input registers
3014 const Register src = c_rarg0; // source array oop
3015 const Register src_pos = c_rarg1; // source position
3016 const Register dst = c_rarg2; // destination array oop
3017 const Register dst_pos = c_rarg3; // destination position
3018#ifndef _WIN64
3019 const Register length = c_rarg4;
3020 const Register rklass_tmp = r9; // load_klass
3021#else
3022 const Address length(rsp, 7 * wordSize); // elements count is on stack on Win64
3023 const Register rklass_tmp = rdi; // load_klass
3024#endif
3025
3026 { int modulus = CodeEntryAlignment;
3027 int target = modulus - 5; // 5 = sizeof jmp(L_failed)
3028 int advance = target - (__masm-> offset() % modulus);
3029 if (advance < 0) advance += modulus;
3030 if (advance > 0) __masm-> nop(advance);
3031 }
3032 StubCodeMark mark(this, "StubRoutines", name);
3033
3034 // Short-hop target to L_failed. Makes for denser prologue code.
3035 __masm-> BIND(L_failed_0)bind(L_failed_0); masm-> block_comment("L_failed_0" ":");
3036 __masm-> jmp(L_failed);
3037 assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed")do { if (!(masm-> offset() % CodeEntryAlignment == 0)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3037, "assert(" "masm-> offset() % CodeEntryAlignment == 0"
") failed", "no further alignment needed"); ::breakpoint(); }
} while (0)
;
3038
3039 __masm-> align(CodeEntryAlignment);
3040 address start = __masm-> pc();
3041
3042 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3043
3044#ifdef _WIN64
3045 __masm-> push(rklass_tmp); // rdi is callee-save on Windows
3046#endif
3047
3048 // bump this on entry, not on exit:
3049 inc_counter_np(SharedRuntime::_generic_array_copy_ctr)masm-> block_comment("inc_counter " "SharedRuntime::_generic_array_copy_ctr"
); inc_counter_np_(SharedRuntime::_generic_array_copy_ctr);
;
3050
3051 //-----------------------------------------------------------------------
3052 // Assembler stub will be used for this call to arraycopy
3053 // if the following conditions are met:
3054 //
3055 // (1) src and dst must not be null.
3056 // (2) src_pos must not be negative.
3057 // (3) dst_pos must not be negative.
3058 // (4) length must not be negative.
3059 // (5) src klass and dst klass should be the same and not NULL.
3060 // (6) src and dst should be arrays.
3061 // (7) src_pos + length must not exceed length of src.
3062 // (8) dst_pos + length must not exceed length of dst.
3063 //
3064
3065 // if (src == NULL) return -1;
3066 __masm-> testptr(src, src); // src oop
3067 size_t j1off = __masm-> offset();
3068 __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3068)
;
3069
3070 // if (src_pos < 0) return -1;
3071 __masm-> testl(src_pos, src_pos); // src_pos (32-bits)
3072 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3072)
;
3073
3074 // if (dst == NULL) return -1;
3075 __masm-> testptr(dst, dst); // dst oop
3076 __masm-> jccb(Assembler::zero, L_failed_0)jccb_0(Assembler::zero, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3076)
;
3077
3078 // if (dst_pos < 0) return -1;
3079 __masm-> testl(dst_pos, dst_pos); // dst_pos (32-bits)
3080 size_t j4off = __masm-> offset();
3081 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3081)
;
3082
3083 // The first four tests are very dense code,
3084 // but not quite dense enough to put four
3085 // jumps in a 16-byte instruction fetch buffer.
3086 // That's good, because some branch predicters
3087 // do not like jumps so close together.
3088 // Make sure of this.
3089 guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps")do { if (!(((j1off ^ j4off) & ~15) != 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3089, "guarantee(" "((j1off ^ j4off) & ~15) != 0" ") failed"
, "I$ line of 1st & 4th jumps"); ::breakpoint(); } } while
(0)
;
3090
3091 // registers used as temp
3092 const Register r11_length = r11; // elements count to copy
3093 const Register r10_src_klass = r10; // array klass
3094
3095 // if (length < 0) return -1;
3096 __masm-> movl(r11_length, length); // length (elements count, 32-bits value)
3097 __masm-> testl(r11_length, r11_length);
3098 __masm-> jccb(Assembler::negative, L_failed_0)jccb_0(Assembler::negative, L_failed_0, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3098)
;
3099
3100 __masm-> load_klass(r10_src_klass, src, rklass_tmp);
3101#ifdef ASSERT1
3102 // assert(src->klass() != NULL);
3103 {
3104 BLOCK_COMMENT("assert klasses not null {")masm-> block_comment("assert klasses not null {");
3105 Label L1, L2;
3106 __masm-> testptr(r10_src_klass, r10_src_klass);
3107 __masm-> jcc(Assembler::notZero, L2); // it is broken if klass is NULL
3108 __masm-> bind(L1);
3109 __masm-> stop("broken null klass");
3110 __masm-> bind(L2);
3111 __masm-> load_klass(rax, dst, rklass_tmp);
3112 __masm-> cmpq(rax, 0);
3113 __masm-> jcc(Assembler::equal, L1); // this would be broken also
3114 BLOCK_COMMENT("} assert klasses not null done")masm-> block_comment("} assert klasses not null done");
3115 }
3116#endif
3117
3118 // Load layout helper (32-bits)
3119 //
3120 // |array_tag| | header_size | element_type | |log2_element_size|
3121 // 32 30 24 16 8 2 0
3122 //
3123 // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
3124 //
3125
3126 const int lh_offset = in_bytes(Klass::layout_helper_offset());
3127
3128 // Handle objArrays completely differently...
3129 const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
3130 __masm-> cmpl(Address(r10_src_klass, lh_offset), objArray_lh);
3131 __masm-> jcc(Assembler::equal, L_objArray);
3132
3133 // if (src->klass() != dst->klass()) return -1;
3134 __masm-> load_klass(rax, dst, rklass_tmp);
3135 __masm-> cmpq(r10_src_klass, rax);
3136 __masm-> jcc(Assembler::notEqual, L_failed);
3137
3138 const Register rax_lh = rax; // layout helper
3139 __masm-> movl(rax_lh, Address(r10_src_klass, lh_offset));
3140
3141 // if (!src->is_Array()) return -1;
3142 __masm-> cmpl(rax_lh, Klass::_lh_neutral_value);
3143 __masm-> jcc(Assembler::greaterEqual, L_failed);
3144
3145 // At this point, it is known to be a typeArray (array_tag 0x3).
3146#ifdef ASSERT1
3147 {
3148 BLOCK_COMMENT("assert primitive array {")masm-> block_comment("assert primitive array {");
3149 Label L;
3150 __masm-> cmpl(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
3151 __masm-> jcc(Assembler::greaterEqual, L);
3152 __masm-> stop("must be a primitive array");
3153 __masm-> bind(L);
3154 BLOCK_COMMENT("} assert primitive array done")masm-> block_comment("} assert primitive array done");
3155 }
3156#endif
3157
3158 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3159 r10, L_failed);
3160
3161 // TypeArrayKlass
3162 //
3163 // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
3164 // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
3165 //
3166
3167 const Register r10_offset = r10; // array offset
3168 const Register rax_elsize = rax_lh; // element size
3169
3170 __masm-> movl(r10_offset, rax_lh);
3171 __masm-> shrl(r10_offset, Klass::_lh_header_size_shift);
3172 __masm-> andptr(r10_offset, Klass::_lh_header_size_mask); // array_offset
3173 __masm-> addptr(src, r10_offset); // src array offset
3174 __masm-> addptr(dst, r10_offset); // dst array offset
3175 BLOCK_COMMENT("choose copy loop based on element size")masm-> block_comment("choose copy loop based on element size"
)
;
3176 __masm-> andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
3177
3178#ifdef _WIN64
3179 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3180#endif
3181
3182 // next registers should be set before the jump to corresponding stub
3183 const Register from = c_rarg0; // source array address
3184 const Register to = c_rarg1; // destination array address
3185 const Register count = c_rarg2; // elements count
3186
3187 // 'from', 'to', 'count' registers should be set in such order
3188 // since they are the same as 'src', 'src_pos', 'dst'.
3189
3190 __masm-> cmpl(rax_elsize, 0);
3191 __masm-> jccb(Assembler::notEqual, L_copy_shorts)jccb_0(Assembler::notEqual, L_copy_shorts, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3191)
;
3192 __masm-> lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
3193 __masm-> lea(to, Address(dst, dst_pos, Address::times_1, 0));// dst_addr
3194 __masm-> movl2ptr(count, r11_length); // length
3195 __masm-> jump(RuntimeAddress(byte_copy_entry));
3196
3197 __masm-> BIND(L_copy_shorts)bind(L_copy_shorts); masm-> block_comment("L_copy_shorts" ":"
)
;
3198 __masm-> cmpl(rax_elsize, LogBytesPerShort);
3199 __masm-> jccb(Assembler::notEqual, L_copy_ints)jccb_0(Assembler::notEqual, L_copy_ints, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3199)
;
3200 __masm-> lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
3201 __masm-> lea(to, Address(dst, dst_pos, Address::times_2, 0));// dst_addr
3202 __masm-> movl2ptr(count, r11_length); // length
3203 __masm-> jump(RuntimeAddress(short_copy_entry));
3204
3205 __masm-> BIND(L_copy_ints)bind(L_copy_ints); masm-> block_comment("L_copy_ints" ":");
3206 __masm-> cmpl(rax_elsize, LogBytesPerInt);
3207 __masm-> jccb(Assembler::notEqual, L_copy_longs)jccb_0(Assembler::notEqual, L_copy_longs, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3207)
;
3208 __masm-> lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
3209 __masm-> lea(to, Address(dst, dst_pos, Address::times_4, 0));// dst_addr
3210 __masm-> movl2ptr(count, r11_length); // length
3211 __masm-> jump(RuntimeAddress(int_copy_entry));
3212
3213 __masm-> BIND(L_copy_longs)bind(L_copy_longs); masm-> block_comment("L_copy_longs" ":"
)
;
3214#ifdef ASSERT1
3215 {
3216 BLOCK_COMMENT("assert long copy {")masm-> block_comment("assert long copy {");
3217 Label L;
3218 __masm-> cmpl(rax_elsize, LogBytesPerLong);
3219 __masm-> jcc(Assembler::equal, L);
3220 __masm-> stop("must be long copy, but elsize is wrong");
3221 __masm-> bind(L);
3222 BLOCK_COMMENT("} assert long copy done")masm-> block_comment("} assert long copy done");
3223 }
3224#endif
3225 __masm-> lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
3226 __masm-> lea(to, Address(dst, dst_pos, Address::times_8, 0));// dst_addr
3227 __masm-> movl2ptr(count, r11_length); // length
3228 __masm-> jump(RuntimeAddress(long_copy_entry));
3229
3230 // ObjArrayKlass
3231 __masm-> BIND(L_objArray)bind(L_objArray); masm-> block_comment("L_objArray" ":");
3232 // live at this point: r10_src_klass, r11_length, src[_pos], dst[_pos]
3233
3234 Label L_plain_copy, L_checkcast_copy;
3235 // test array classes for subtyping
3236 __masm-> load_klass(rax, dst, rklass_tmp);
3237 __masm-> cmpq(r10_src_klass, rax); // usual case is exact equality
3238 __masm-> jcc(Assembler::notEqual, L_checkcast_copy);
3239
3240 // Identically typed arrays can be copied without element-wise checks.
3241 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3242 r10, L_failed);
3243
3244 __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3245 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
3246 __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3247 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
3248 __masm-> movl2ptr(count, r11_length); // length
3249 __masm-> BIND(L_plain_copy)bind(L_plain_copy); masm-> block_comment("L_plain_copy" ":"
)
;
3250#ifdef _WIN64
3251 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3252#endif
3253 __masm-> jump(RuntimeAddress(oop_copy_entry));
3254
3255 __masm-> BIND(L_checkcast_copy)bind(L_checkcast_copy); masm-> block_comment("L_checkcast_copy"
":")
;
3256 // live at this point: r10_src_klass, r11_length, rax (dst_klass)
3257 {
3258 // Before looking at dst.length, make sure dst is also an objArray.
3259 __masm-> cmpl(Address(rax, lh_offset), objArray_lh);
3260 __masm-> jcc(Assembler::notEqual, L_failed);
3261
3262 // It is safe to examine both src.length and dst.length.
3263 arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
3264 rax, L_failed);
3265
3266 const Register r11_dst_klass = r11;
3267 __masm-> load_klass(r11_dst_klass, dst, rklass_tmp); // reload
3268
3269 // Marshal the base address arguments now, freeing registers.
3270 __masm-> lea(from, Address(src, src_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3271 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
3272 __masm-> lea(to, Address(dst, dst_pos, TIMES_OOP(UseCompressedOops ? Address::times_4 : Address::times_8),
3273 arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
3274 __masm-> movl(count, length); // length (reloaded)
3275 Register sco_temp = c_rarg3; // this register is free now
3276 assert_different_registers(from, to, count, sco_temp,
3277 r11_dst_klass, r10_src_klass);
3278 assert_clean_int(count, sco_temp);
3279
3280 // Generate the type check.
3281 const int sco_offset = in_bytes(Klass::super_check_offset_offset());
3282 __masm-> movl(sco_temp, Address(r11_dst_klass, sco_offset));
3283 assert_clean_int(sco_temp, rax);
3284 generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy);
3285
3286 // Fetch destination element klass from the ObjArrayKlass header.
3287 int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
3288 __masm-> movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
3289 __masm-> movl( sco_temp, Address(r11_dst_klass, sco_offset));
3290 assert_clean_int(sco_temp, rax);
3291
3292#ifdef _WIN64
3293 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3294#endif
3295
3296 // the checkcast_copy loop needs two extra arguments:
3297 assert(c_rarg3 == sco_temp, "#3 already in place")do { if (!(c_rarg3 == sco_temp)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3297, "assert(" "c_rarg3 == sco_temp" ") failed", "#3 already in place"
); ::breakpoint(); } } while (0)
;
3298 // Set up arguments for checkcast_copy_entry.
3299 setup_arg_regs(4);
3300 __masm-> movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris
3301 __masm-> jump(RuntimeAddress(checkcast_copy_entry));
3302 }
3303
3304 __masm-> BIND(L_failed)bind(L_failed); masm-> block_comment("L_failed" ":");
3305#ifdef _WIN64
3306 __masm-> pop(rklass_tmp); // Restore callee-save rdi
3307#endif
3308 __masm-> xorptr(rax, rax);
3309 __masm-> notptr(rax); // return -1
3310 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3311 __masm-> ret(0);
3312
3313 return start;
3314 }
3315
3316 address generate_data_cache_writeback() {
3317 const Register src = c_rarg0; // source address
3318
3319 __masm-> align(CodeEntryAlignment);
3320
3321 StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback");
3322
3323 address start = __masm-> pc();
3324 __masm-> enter();
3325 __masm-> cache_wb(Address(src, 0));
3326 __masm-> leave();
3327 __masm-> ret(0);
3328
3329 return start;
3330 }
3331
3332 address generate_data_cache_writeback_sync() {
3333 const Register is_pre = c_rarg0; // pre or post sync
3334
3335 __masm-> align(CodeEntryAlignment);
3336
3337 StubCodeMark mark(this, "StubRoutines", "_data_cache_writeback_sync");
3338
3339 // pre wbsync is a no-op
3340 // post wbsync translates to an sfence
3341
3342 Label skip;
3343 address start = __masm-> pc();
3344 __masm-> enter();
3345 __masm-> cmpl(is_pre, 0);
3346 __masm-> jcc(Assembler::notEqual, skip);
3347 __masm-> cache_wbsync(false);
3348 __masm-> bind(skip);
3349 __masm-> leave();
3350 __masm-> ret(0);
3351
3352 return start;
3353 }
3354
3355 void generate_arraycopy_stubs() {
3356 address entry;
3357 address entry_jbyte_arraycopy;
3358 address entry_jshort_arraycopy;
3359 address entry_jint_arraycopy;
3360 address entry_oop_arraycopy;
3361 address entry_jlong_arraycopy;
3362 address entry_checkcast_arraycopy;
3363
3364 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
3365 "jbyte_disjoint_arraycopy");
3366 StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
3367 "jbyte_arraycopy");
3368
3369 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
3370 "jshort_disjoint_arraycopy");
3371 StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
3372 "jshort_arraycopy");
3373
3374 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, &entry,
3375 "jint_disjoint_arraycopy");
3376 StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, entry,
3377 &entry_jint_arraycopy, "jint_arraycopy");
3378
3379 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, &entry,
3380 "jlong_disjoint_arraycopy");
3381 StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, entry,
3382 &entry_jlong_arraycopy, "jlong_arraycopy");
3383
3384
3385 if (UseCompressedOops) {
3386 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, &entry,
3387 "oop_disjoint_arraycopy");
3388 StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, entry,
3389 &entry_oop_arraycopy, "oop_arraycopy");
3390 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, &entry,
3391 "oop_disjoint_arraycopy_uninit",
3392 /*dest_uninitialized*/true);
3393 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, entry,
3394 NULL__null, "oop_arraycopy_uninit",
3395 /*dest_uninitialized*/true);
3396 } else {
3397 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, &entry,
3398 "oop_disjoint_arraycopy");
3399 StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, entry,
3400 &entry_oop_arraycopy, "oop_arraycopy");
3401 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, &entry,
3402 "oop_disjoint_arraycopy_uninit",
3403 /*dest_uninitialized*/true);
3404 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, entry,
3405 NULL__null, "oop_arraycopy_uninit",
3406 /*dest_uninitialized*/true);
3407 }
3408
3409 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
3410 StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL__null,
3411 /*dest_uninitialized*/true);
3412
3413 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
3414 entry_jbyte_arraycopy,
3415 entry_jshort_arraycopy,
3416 entry_jint_arraycopy,
3417 entry_jlong_arraycopy);
3418 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
3419 entry_jbyte_arraycopy,
3420 entry_jshort_arraycopy,
3421 entry_jint_arraycopy,
3422 entry_oop_arraycopy,
3423 entry_jlong_arraycopy,
3424 entry_checkcast_arraycopy);
3425
3426 StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
3427 StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
3428 StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
3429 StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
3430 StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
3431 StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
3432
3433 // We don't generate specialized code for HeapWord-aligned source
3434 // arrays, so just use the code we've already generated
3435 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
3436 StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy;
3437
3438 StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
3439 StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy;
3440
3441 StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy;
3442 StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy;
3443
3444 StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy;
3445 StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy;
3446
3447 StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy;
3448 StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy;
3449
3450 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit;
3451 StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
3452 }
3453
3454 // AES intrinsic stubs
3455 enum {AESBlockSize = 16};
3456
3457 address generate_key_shuffle_mask() {
3458 __masm-> align(16);
3459 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
3460 address start = __masm-> pc();
3461 __masm-> emit_data64( 0x0405060700010203, relocInfo::none );
3462 __masm-> emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
3463 return start;
3464 }
3465
3466 address generate_counter_shuffle_mask() {
3467 __masm-> align(16);
3468 StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
3469 address start = __masm-> pc();
3470 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
3471 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
3472 return start;
3473 }
3474
3475 // Utility routine for loading a 128-bit key word in little endian format
3476 // can optionally specify that the shuffle mask is already in an xmmregister
3477 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL__null) {
3478 __masm-> movdqu(xmmdst, Address(key, offset));
3479 if (xmm_shuf_mask != NULL__null) {
3480 __masm-> pshufb(xmmdst, xmm_shuf_mask);
3481 } else {
3482 __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3483 }
3484 }
3485
3486 // Utility routine for increase 128bit counter (iv in CTR mode)
3487 void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
3488 __masm-> pextrq(reg, xmmdst, 0x0);
3489 __masm-> addq(reg, inc_delta);
3490 __masm-> pinsrq(xmmdst, reg, 0x0);
3491 __masm-> jcc(Assembler::carryClear, next_block); // jump if no carry
3492 __masm-> pextrq(reg, xmmdst, 0x01); // Carry
3493 __masm-> addq(reg, 0x01);
3494 __masm-> pinsrq(xmmdst, reg, 0x01); //Carry end
3495 __masm-> BIND(next_block)bind(next_block); masm-> block_comment("next_block" ":"); // next instruction
3496 }
3497
3498 // Arguments:
3499 //
3500 // Inputs:
3501 // c_rarg0 - source byte array address
3502 // c_rarg1 - destination byte array address
3503 // c_rarg2 - K (key) in little endian int array
3504 //
3505 address generate_aescrypt_encryptBlock() {
3506 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3506, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3507 __masm-> align(CodeEntryAlignment);
3508 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
3509 Label L_doLast;
3510 address start = __masm-> pc();
3511
3512 const Register from = c_rarg0; // source array address
3513 const Register to = c_rarg1; // destination array address
3514 const Register key = c_rarg2; // key array address
3515 const Register keylen = rax;
3516
3517 const XMMRegister xmm_result = xmm0;
3518 const XMMRegister xmm_key_shuf_mask = xmm1;
3519 // On win64 xmm6-xmm15 must be preserved so don't use them.
3520 const XMMRegister xmm_temp1 = xmm2;
3521 const XMMRegister xmm_temp2 = xmm3;
3522 const XMMRegister xmm_temp3 = xmm4;
3523 const XMMRegister xmm_temp4 = xmm5;
3524
3525 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3526
3527 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
3528 __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3529
3530 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3531 __masm-> movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
3532
3533 // For encryption, the java expanded key ordering is just what we need
3534 // we don't know if the key is aligned, hence not using load-execute form
3535
3536 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
3537 __masm-> pxor(xmm_result, xmm_temp1);
3538
3539 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
3540 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
3541 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
3542 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
3543
3544 __masm-> aesenc(xmm_result, xmm_temp1);
3545 __masm-> aesenc(xmm_result, xmm_temp2);
3546 __masm-> aesenc(xmm_result, xmm_temp3);
3547 __masm-> aesenc(xmm_result, xmm_temp4);
3548
3549 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
3550 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
3551 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
3552 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
3553
3554 __masm-> aesenc(xmm_result, xmm_temp1);
3555 __masm-> aesenc(xmm_result, xmm_temp2);
3556 __masm-> aesenc(xmm_result, xmm_temp3);
3557 __masm-> aesenc(xmm_result, xmm_temp4);
3558
3559 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
3560 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
3561
3562 __masm-> cmpl(keylen, 44);
3563 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3563)
;
3564
3565 __masm-> aesenc(xmm_result, xmm_temp1);
3566 __masm-> aesenc(xmm_result, xmm_temp2);
3567
3568 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
3569 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
3570
3571 __masm-> cmpl(keylen, 52);
3572 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3572)
;
3573
3574 __masm-> aesenc(xmm_result, xmm_temp1);
3575 __masm-> aesenc(xmm_result, xmm_temp2);
3576
3577 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
3578 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
3579
3580 __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":");
3581 __masm-> aesenc(xmm_result, xmm_temp1);
3582 __masm-> aesenclast(xmm_result, xmm_temp2);
3583 __masm-> movdqu(Address(to, 0), xmm_result); // store the result
3584 __masm-> xorptr(rax, rax); // return 0
3585 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3586 __masm-> ret(0);
3587
3588 return start;
3589 }
3590
3591
3592 // Arguments:
3593 //
3594 // Inputs:
3595 // c_rarg0 - source byte array address
3596 // c_rarg1 - destination byte array address
3597 // c_rarg2 - K (key) in little endian int array
3598 //
3599 address generate_aescrypt_decryptBlock() {
3600 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3600, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3601 __masm-> align(CodeEntryAlignment);
3602 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
3603 Label L_doLast;
3604 address start = __masm-> pc();
3605
3606 const Register from = c_rarg0; // source array address
3607 const Register to = c_rarg1; // destination array address
3608 const Register key = c_rarg2; // key array address
3609 const Register keylen = rax;
3610
3611 const XMMRegister xmm_result = xmm0;
3612 const XMMRegister xmm_key_shuf_mask = xmm1;
3613 // On win64 xmm6-xmm15 must be preserved so don't use them.
3614 const XMMRegister xmm_temp1 = xmm2;
3615 const XMMRegister xmm_temp2 = xmm3;
3616 const XMMRegister xmm_temp3 = xmm4;
3617 const XMMRegister xmm_temp4 = xmm5;
3618
3619 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3620
3621 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
3622 __masm-> movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3623
3624 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3625 __masm-> movdqu(xmm_result, Address(from, 0));
3626
3627 // for decryption java expanded key ordering is rotated one position from what we want
3628 // so we start from 0x10 here and hit 0x00 last
3629 // we don't know if the key is aligned, hence not using load-execute form
3630 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
3631 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
3632 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
3633 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
3634
3635 __masm-> pxor (xmm_result, xmm_temp1);
3636 __masm-> aesdec(xmm_result, xmm_temp2);
3637 __masm-> aesdec(xmm_result, xmm_temp3);
3638 __masm-> aesdec(xmm_result, xmm_temp4);
3639
3640 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
3641 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
3642 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
3643 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
3644
3645 __masm-> aesdec(xmm_result, xmm_temp1);
3646 __masm-> aesdec(xmm_result, xmm_temp2);
3647 __masm-> aesdec(xmm_result, xmm_temp3);
3648 __masm-> aesdec(xmm_result, xmm_temp4);
3649
3650 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
3651 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
3652 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
3653
3654 __masm-> cmpl(keylen, 44);
3655 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3655)
;
3656
3657 __masm-> aesdec(xmm_result, xmm_temp1);
3658 __masm-> aesdec(xmm_result, xmm_temp2);
3659
3660 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
3661 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
3662
3663 __masm-> cmpl(keylen, 52);
3664 __masm-> jccb(Assembler::equal, L_doLast)jccb_0(Assembler::equal, L_doLast, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3664)
;
3665
3666 __masm-> aesdec(xmm_result, xmm_temp1);
3667 __masm-> aesdec(xmm_result, xmm_temp2);
3668
3669 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
3670 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
3671
3672 __masm-> BIND(L_doLast)bind(L_doLast); masm-> block_comment("L_doLast" ":");
3673 __masm-> aesdec(xmm_result, xmm_temp1);
3674 __masm-> aesdec(xmm_result, xmm_temp2);
3675
3676 // for decryption the aesdeclast operation is always on key+0x00
3677 __masm-> aesdeclast(xmm_result, xmm_temp3);
3678 __masm-> movdqu(Address(to, 0), xmm_result); // store the result
3679 __masm-> xorptr(rax, rax); // return 0
3680 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3681 __masm-> ret(0);
3682
3683 return start;
3684 }
3685
3686
3687 // Arguments:
3688 //
3689 // Inputs:
3690 // c_rarg0 - source byte array address
3691 // c_rarg1 - destination byte array address
3692 // c_rarg2 - K (key) in little endian int array
3693 // c_rarg3 - r vector byte array address
3694 // c_rarg4 - input length
3695 //
3696 // Output:
3697 // rax - input length
3698 //
3699 address generate_cipherBlockChaining_encryptAESCrypt() {
3700 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3700, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3701 __masm-> align(CodeEntryAlignment);
3702 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
3703 address start = __masm-> pc();
3704
3705 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
3706 const Register from = c_rarg0; // source array address
3707 const Register to = c_rarg1; // destination array address
3708 const Register key = c_rarg2; // key array address
3709 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
3710 // and left with the results of the last encryption block
3711#ifndef _WIN64
3712 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
3713#else
3714 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
3715 const Register len_reg = r11; // pick the volatile windows register
3716#endif
3717 const Register pos = rax;
3718
3719 // xmm register assignments for the loops below
3720 const XMMRegister xmm_result = xmm0;
3721 const XMMRegister xmm_temp = xmm1;
3722 // keys 0-10 preloaded into xmm2-xmm12
3723 const int XMM_REG_NUM_KEY_FIRST = 2;
3724 const int XMM_REG_NUM_KEY_LAST = 15;
3725 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
3726 const XMMRegister xmm_key10 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
3727 const XMMRegister xmm_key11 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
3728 const XMMRegister xmm_key12 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
3729 const XMMRegister xmm_key13 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
3730
3731 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3732
3733#ifdef _WIN64
3734 // on win64, fill len_reg from stack position
3735 __masm-> movl(len_reg, len_mem);
3736#else
3737 __masm-> push(len_reg); // Save
3738#endif
3739
3740 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
3741 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3742 // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
3743 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
3744 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
3745 offset += 0x10;
3746 }
3747 __masm-> movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
3748
3749 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
3750 __masm-> movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3751 __masm-> cmpl(rax, 44);
3752 __masm-> jcc(Assembler::notEqual, L_key_192_256);
3753
3754 // 128 bit code follows here
3755 __masm-> movptr(pos, 0);
3756 __masm-> align(OptoLoopAlignment);
3757
3758 __masm-> BIND(L_loopTop_128)bind(L_loopTop_128); masm-> block_comment("L_loopTop_128" ":"
)
;
3759 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3760 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3761 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3762 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
3763 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3764 }
3765 __masm-> aesenclast(xmm_result, xmm_key10);
3766 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3767 // no need to store r to memory until we exit
3768 __masm-> addptr(pos, AESBlockSize);
3769 __masm-> subptr(len_reg, AESBlockSize);
3770 __masm-> jcc(Assembler::notEqual, L_loopTop_128);
3771
3772 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
3773 __masm-> movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object
3774
3775#ifdef _WIN64
3776 __masm-> movl(rax, len_mem);
3777#else
3778 __masm-> pop(rax); // return length
3779#endif
3780 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
3781 __masm-> ret(0);
3782
3783 __masm-> BIND(L_key_192_256)bind(L_key_192_256); masm-> block_comment("L_key_192_256" ":"
)
;
3784 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
3785 load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
3786 load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
3787 __masm-> cmpl(rax, 52);
3788 __masm-> jcc(Assembler::notEqual, L_key_256);
3789
3790 // 192-bit code follows here (could be changed to use more xmm registers)
3791 __masm-> movptr(pos, 0);
3792 __masm-> align(OptoLoopAlignment);
3793
3794 __masm-> BIND(L_loopTop_192)bind(L_loopTop_192); masm-> block_comment("L_loopTop_192" ":"
)
;
3795 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3796 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3797 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3798 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
3799 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3800 }
3801 __masm-> aesenclast(xmm_result, xmm_key12);
3802 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3803 // no need to store r to memory until we exit
3804 __masm-> addptr(pos, AESBlockSize);
3805 __masm-> subptr(len_reg, AESBlockSize);
3806 __masm-> jcc(Assembler::notEqual, L_loopTop_192);
3807 __masm-> jmp(L_exit);
3808
3809 __masm-> BIND(L_key_256)bind(L_key_256); masm-> block_comment("L_key_256" ":");
3810 // 256-bit code follows here (could be changed to use more xmm registers)
3811 load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
3812 __masm-> movptr(pos, 0);
3813 __masm-> align(OptoLoopAlignment);
3814
3815 __masm-> BIND(L_loopTop_256)bind(L_loopTop_256); masm-> block_comment("L_loopTop_256" ":"
)
;
3816 __masm-> movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input
3817 __masm-> pxor (xmm_result, xmm_temp); // xor with the current r vector
3818 __masm-> pxor (xmm_result, xmm_key0); // do the aes rounds
3819 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
3820 __masm-> aesenc(xmm_result, as_XMMRegister(rnum));
3821 }
3822 load_key(xmm_temp, key, 0xe0);
3823 __masm-> aesenclast(xmm_result, xmm_temp);
3824 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
3825 // no need to store r to memory until we exit
3826 __masm-> addptr(pos, AESBlockSize);
3827 __masm-> subptr(len_reg, AESBlockSize);
3828 __masm-> jcc(Assembler::notEqual, L_loopTop_256);
3829 __masm-> jmp(L_exit);
3830
3831 return start;
3832 }
3833
3834 // Safefetch stubs.
3835 void generate_safefetch(const char* name, int size, address* entry,
3836 address* fault_pc, address* continuation_pc) {
3837 // safefetch signatures:
3838 // int SafeFetch32(int* adr, int errValue);
3839 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
3840 //
3841 // arguments:
3842 // c_rarg0 = adr
3843 // c_rarg1 = errValue
3844 //
3845 // result:
3846 // PPC_RET = *adr or errValue
3847
3848 StubCodeMark mark(this, "StubRoutines", name);
3849
3850 // Entry point, pc or function descriptor.
3851 *entry = __masm-> pc();
3852
3853 // Load *adr into c_rarg1, may fault.
3854 *fault_pc = __masm-> pc();
3855 switch (size) {
3856 case 4:
3857 // int32_t
3858 __masm-> movl(c_rarg1, Address(c_rarg0, 0));
3859 break;
3860 case 8:
3861 // int64_t
3862 __masm-> movq(c_rarg1, Address(c_rarg0, 0));
3863 break;
3864 default:
3865 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3865); ::breakpoint(); } while (0)
;
3866 }
3867
3868 // return errValue or *adr
3869 *continuation_pc = __masm-> pc();
3870 __masm-> movq(rax, c_rarg1);
3871 __masm-> ret(0);
3872 }
3873
3874 // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
3875 // to hide instruction latency
3876 //
3877 // Arguments:
3878 //
3879 // Inputs:
3880 // c_rarg0 - source byte array address
3881 // c_rarg1 - destination byte array address
3882 // c_rarg2 - K (key) in little endian int array
3883 // c_rarg3 - r vector byte array address
3884 // c_rarg4 - input length
3885 //
3886 // Output:
3887 // rax - input length
3888 //
3889 address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
3890 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 3890, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
3891 __masm-> align(CodeEntryAlignment);
3892 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
3893 address start = __masm-> pc();
3894
3895 const Register from = c_rarg0; // source array address
3896 const Register to = c_rarg1; // destination array address
3897 const Register key = c_rarg2; // key array address
3898 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
3899 // and left with the results of the last encryption block
3900#ifndef _WIN64
3901 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
3902#else
3903 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
3904 const Register len_reg = r11; // pick the volatile windows register
3905#endif
3906 const Register pos = rax;
3907
3908 const int PARALLEL_FACTOR = 4;
3909 const int ROUNDS[3] = { 10, 12, 14 }; // aes rounds for key128, key192, key256
3910
3911 Label L_exit;
3912 Label L_singleBlock_loopTopHead[3]; // 128, 192, 256
3913 Label L_singleBlock_loopTopHead2[3]; // 128, 192, 256
3914 Label L_singleBlock_loopTop[3]; // 128, 192, 256
3915 Label L_multiBlock_loopTopHead[3]; // 128, 192, 256
3916 Label L_multiBlock_loopTop[3]; // 128, 192, 256
3917
3918 // keys 0-10 preloaded into xmm5-xmm15
3919 const int XMM_REG_NUM_KEY_FIRST = 5;
3920 const int XMM_REG_NUM_KEY_LAST = 15;
3921 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
3922 const XMMRegister xmm_key_last = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
3923
3924 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
3925
3926#ifdef _WIN64
3927 // on win64, fill len_reg from stack position
3928 __masm-> movl(len_reg, len_mem);
3929#else
3930 __masm-> push(len_reg); // Save
3931#endif
3932 __masm-> push(rbx);
3933 // the java expanded key ordering is rotated one position from what we want
3934 // so we start from 0x10 here and hit 0x00 last
3935 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
3936 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
3937 // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
3938 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
3939 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
3940 offset += 0x10;
3941 }
3942 load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
3943
3944 const XMMRegister xmm_prev_block_cipher = xmm1; // holds cipher of previous block
3945
3946 // registers holding the four results in the parallelized loop
3947 const XMMRegister xmm_result0 = xmm0;
3948 const XMMRegister xmm_result1 = xmm2;
3949 const XMMRegister xmm_result2 = xmm3;
3950 const XMMRegister xmm_result3 = xmm4;
3951
3952 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
3953
3954 __masm-> xorptr(pos, pos);
3955
3956 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
3957 __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
3958 __masm-> cmpl(rbx, 52);
3959 __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[1]);
3960 __masm-> cmpl(rbx, 60);
3961 __masm-> jcc(Assembler::equal, L_multiBlock_loopTopHead[2]);
3962
3963#define DoFour(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1
, src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc
(xmm_result3, src_reg);
\
3964 __masm-> opc(xmm_result0, src_reg); \
3965 __masm-> opc(xmm_result1, src_reg); \
3966 __masm-> opc(xmm_result2, src_reg); \
3967 __masm-> opc(xmm_result3, src_reg); \
3968
3969 for (int k = 0; k < 3; ++k) {
3970 __masm-> BIND(L_multiBlock_loopTopHead[k])bind(L_multiBlock_loopTopHead[k]); masm-> block_comment("L_multiBlock_loopTopHead[k]"
":")
;
3971 if (k != 0) {
3972 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
3973 __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead2[k]);
3974 }
3975 if (k == 1) {
3976 __masm-> subptr(rsp, 6 * wordSize);
3977 __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
3978 load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0
3979 __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15);
3980 load_key(xmm1, key, 0xc0); // 0xc0;
3981 __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1);
3982 } else if (k == 2) {
3983 __masm-> subptr(rsp, 10 * wordSize);
3984 __masm-> movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
3985 load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes upto 0xe0
3986 __masm-> movdqu(Address(rsp, 6 * wordSize), xmm15);
3987 load_key(xmm1, key, 0xe0); // 0xe0;
3988 __masm-> movdqu(Address(rsp, 8 * wordSize), xmm1);
3989 load_key(xmm15, key, 0xb0); // 0xb0;
3990 __masm-> movdqu(Address(rsp, 2 * wordSize), xmm15);
3991 load_key(xmm1, key, 0xc0); // 0xc0;
3992 __masm-> movdqu(Address(rsp, 4 * wordSize), xmm1);
3993 }
3994 __masm-> align(OptoLoopAlignment);
3995 __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]"
":")
;
3996 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
3997 __masm-> jcc(Assembler::less, L_singleBlock_loopTopHead[k]);
3998
3999 if (k != 0) {
4000 __masm-> movdqu(xmm15, Address(rsp, 2 * wordSize));
4001 __masm-> movdqu(xmm1, Address(rsp, 4 * wordSize));
4002 }
4003
4004 __masm-> movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers
4005 __masm-> movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4006 __masm-> movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4007 __masm-> movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
4008
4009 DoFour(pxor, xmm_key_first)masm-> pxor(xmm_result0, xmm_key_first); masm-> pxor(xmm_result1
, xmm_key_first); masm-> pxor(xmm_result2, xmm_key_first);
masm-> pxor(xmm_result3, xmm_key_first);
;
4010 if (k == 0) {
4011 for (int rnum = 1; rnum < ROUNDS[k]; rnum++) {
4012 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4013 }
4014 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4015 } else if (k == 1) {
4016 for (int rnum = 1; rnum <= ROUNDS[k]-2; rnum++) {
4017 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4018 }
4019 __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
4020 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xc0
4021 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again
4022 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4023 } else if (k == 2) {
4024 for (int rnum = 1; rnum <= ROUNDS[k] - 4; rnum++) {
4025 DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST))masm-> aesdec(xmm_result0, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result1, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result2, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
)); masm-> aesdec(xmm_result3, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST
));
;
4026 }
4027 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xc0
4028 __masm-> movdqu(xmm15, Address(rsp, 6 * wordSize));
4029 __masm-> movdqu(xmm1, Address(rsp, 8 * wordSize));
4030 DoFour(aesdec, xmm15)masm-> aesdec(xmm_result0, xmm15); masm-> aesdec(xmm_result1
, xmm15); masm-> aesdec(xmm_result2, xmm15); masm-> aesdec
(xmm_result3, xmm15);
; // key : 0xd0
4031 __masm-> movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
4032 DoFour(aesdec, xmm1)masm-> aesdec(xmm_result0, xmm1); masm-> aesdec(xmm_result1
, xmm1); masm-> aesdec(xmm_result2, xmm1); masm-> aesdec
(xmm_result3, xmm1);
; // key : 0xe0
4033 __masm-> movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // xmm1 needs to be loaded again
4034 DoFour(aesdeclast, xmm_key_last)masm-> aesdeclast(xmm_result0, xmm_key_last); masm-> aesdeclast
(xmm_result1, xmm_key_last); masm-> aesdeclast(xmm_result2
, xmm_key_last); masm-> aesdeclast(xmm_result3, xmm_key_last
);
;
4035 }
4036
4037 // for each result, xor with the r vector of previous cipher block
4038 __masm-> pxor(xmm_result0, xmm_prev_block_cipher);
4039 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4040 __masm-> pxor(xmm_result1, xmm_prev_block_cipher);
4041 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4042 __masm-> pxor(xmm_result2, xmm_prev_block_cipher);
4043 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4044 __masm-> pxor(xmm_result3, xmm_prev_block_cipher);
4045 __masm-> movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks
4046 if (k != 0) {
4047 __masm-> movdqu(Address(rvec, 0x00), xmm_prev_block_cipher);
4048 }
4049
4050 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); // store 4 results into the next 64 bytes of output
4051 __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
4052 __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
4053 __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
4054
4055 __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize);
4056 __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize);
4057 __masm-> jmp(L_multiBlock_loopTop[k]);
4058
4059 // registers used in the non-parallelized loops
4060 // xmm register assignments for the loops below
4061 const XMMRegister xmm_result = xmm0;
4062 const XMMRegister xmm_prev_block_cipher_save = xmm2;
4063 const XMMRegister xmm_key11 = xmm3;
4064 const XMMRegister xmm_key12 = xmm4;
4065 const XMMRegister key_tmp = xmm4;
4066
4067 __masm-> BIND(L_singleBlock_loopTopHead[k])bind(L_singleBlock_loopTopHead[k]); masm-> block_comment("L_singleBlock_loopTopHead[k]"
":")
;
4068 if (k == 1) {
4069 __masm-> addptr(rsp, 6 * wordSize);
4070 } else if (k == 2) {
4071 __masm-> addptr(rsp, 10 * wordSize);
4072 }
4073 __masm-> cmpptr(len_reg, 0); // any blocks left??
4074 __masm-> jcc(Assembler::equal, L_exit);
4075 __masm-> BIND(L_singleBlock_loopTopHead2[k])bind(L_singleBlock_loopTopHead2[k]); masm-> block_comment(
"L_singleBlock_loopTopHead2[k]" ":")
;
4076 if (k == 1) {
4077 load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes upto 0xc0
4078 load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes upto 0xc0
4079 }
4080 if (k == 2) {
4081 load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes upto 0xe0
4082 }
4083 __masm-> align(OptoLoopAlignment);
4084 __masm-> BIND(L_singleBlock_loopTop[k])bind(L_singleBlock_loopTop[k]); masm-> block_comment("L_singleBlock_loopTop[k]"
":")
;
4085 __masm-> movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
4086 __masm-> movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
4087 __masm-> pxor(xmm_result, xmm_key_first); // do the aes dec rounds
4088 for (int rnum = 1; rnum <= 9 ; rnum++) {
4089 __masm-> aesdec(xmm_result, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
4090 }
4091 if (k == 1) {
4092 __masm-> aesdec(xmm_result, xmm_key11);
4093 __masm-> aesdec(xmm_result, xmm_key12);
4094 }
4095 if (k == 2) {
4096 __masm-> aesdec(xmm_result, xmm_key11);
4097 load_key(key_tmp, key, 0xc0);
4098 __masm-> aesdec(xmm_result, key_tmp);
4099 load_key(key_tmp, key, 0xd0);
4100 __masm-> aesdec(xmm_result, key_tmp);
4101 load_key(key_tmp, key, 0xe0);
4102 __masm-> aesdec(xmm_result, key_tmp);
4103 }
4104
4105 __masm-> aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
4106 __masm-> pxor(xmm_result, xmm_prev_block_cipher); // xor with the current r vector
4107 __masm-> movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
4108 // no need to store r to memory until we exit
4109 __masm-> movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
4110 __masm-> addptr(pos, AESBlockSize);
4111 __masm-> subptr(len_reg, AESBlockSize);
4112 __masm-> jcc(Assembler::notEqual, L_singleBlock_loopTop[k]);
4113 if (k != 2) {
4114 __masm-> jmp(L_exit);
4115 }
4116 } //for 128/192/256
4117
4118 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
4119 __masm-> movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object
4120 __masm-> pop(rbx);
4121#ifdef _WIN64
4122 __masm-> movl(rax, len_mem);
4123#else
4124 __masm-> pop(rax); // return length
4125#endif
4126 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4127 __masm-> ret(0);
4128 return start;
4129}
4130
4131 address generate_electronicCodeBook_encryptAESCrypt() {
4132 __masm-> align(CodeEntryAlignment);
4133 StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_encryptAESCrypt");
4134 address start = __masm-> pc();
4135 const Register from = c_rarg0; // source array address
4136 const Register to = c_rarg1; // destination array address
4137 const Register key = c_rarg2; // key array address
4138 const Register len = c_rarg3; // src len (must be multiple of blocksize 16)
4139 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4140 __masm-> aesecb_encrypt(from, to, key, len);
4141 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4142 __masm-> ret(0);
4143 return start;
4144 }
4145
4146 address generate_electronicCodeBook_decryptAESCrypt() {
4147 __masm-> align(CodeEntryAlignment);
4148 StubCodeMark mark(this, "StubRoutines", "electronicCodeBook_decryptAESCrypt");
4149 address start = __masm-> pc();
4150 const Register from = c_rarg0; // source array address
4151 const Register to = c_rarg1; // destination array address
4152 const Register key = c_rarg2; // key array address
4153 const Register len = c_rarg3; // src len (must be multiple of blocksize 16)
4154 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4155 __masm-> aesecb_decrypt(from, to, key, len);
4156 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4157 __masm-> ret(0);
4158 return start;
4159 }
4160
4161 // ofs and limit are use for multi-block byte array.
4162 // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs)
4163 address generate_md5_implCompress(bool multi_block, const char *name) {
4164 __masm-> align(CodeEntryAlignment);
4165 StubCodeMark mark(this, "StubRoutines", name);
4166 address start = __masm-> pc();
4167
4168 const Register buf_param = r15;
4169 const Address state_param(rsp, 0 * wordSize);
4170 const Address ofs_param (rsp, 1 * wordSize );
4171 const Address limit_param(rsp, 1 * wordSize + 4);
4172
4173 __masm-> enter();
4174 __masm-> push(rbx);
4175 __masm-> push(rdi);
4176 __masm-> push(rsi);
4177 __masm-> push(r15);
4178 __masm-> subptr(rsp, 2 * wordSize);
4179
4180 __masm-> movptr(buf_param, c_rarg0);
4181 __masm-> movptr(state_param, c_rarg1);
4182 if (multi_block) {
4183 __masm-> movl(ofs_param, c_rarg2);
4184 __masm-> movl(limit_param, c_rarg3);
4185 }
4186 __masm-> fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block);
4187
4188 __masm-> addptr(rsp, 2 * wordSize);
4189 __masm-> pop(r15);
4190 __masm-> pop(rsi);
4191 __masm-> pop(rdi);
4192 __masm-> pop(rbx);
4193 __masm-> leave();
4194 __masm-> ret(0);
4195 return start;
4196 }
4197
4198 address generate_upper_word_mask() {
4199 __masm-> align64();
4200 StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
4201 address start = __masm-> pc();
4202 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4203 __masm-> emit_data64(0xFFFFFFFF00000000, relocInfo::none);
4204 return start;
4205 }
4206
4207 address generate_shuffle_byte_flip_mask() {
4208 __masm-> align64();
4209 StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
4210 address start = __masm-> pc();
4211 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4212 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4213 return start;
4214 }
4215
4216 // ofs and limit are use for multi-block byte array.
4217 // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4218 address generate_sha1_implCompress(bool multi_block, const char *name) {
4219 __masm-> align(CodeEntryAlignment);
4220 StubCodeMark mark(this, "StubRoutines", name);
4221 address start = __masm-> pc();
4222
4223 Register buf = c_rarg0;
4224 Register state = c_rarg1;
4225 Register ofs = c_rarg2;
4226 Register limit = c_rarg3;
4227
4228 const XMMRegister abcd = xmm0;
4229 const XMMRegister e0 = xmm1;
4230 const XMMRegister e1 = xmm2;
4231 const XMMRegister msg0 = xmm3;
4232
4233 const XMMRegister msg1 = xmm4;
4234 const XMMRegister msg2 = xmm5;
4235 const XMMRegister msg3 = xmm6;
4236 const XMMRegister shuf_mask = xmm7;
4237
4238 __masm-> enter();
4239
4240 __masm-> subptr(rsp, 4 * wordSize);
4241
4242 __masm-> fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
4243 buf, state, ofs, limit, rsp, multi_block);
4244
4245 __masm-> addptr(rsp, 4 * wordSize);
4246
4247 __masm-> leave();
4248 __masm-> ret(0);
4249 return start;
4250 }
4251
4252 address generate_pshuffle_byte_flip_mask() {
4253 __masm-> align64();
4254 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
4255 address start = __masm-> pc();
4256 __masm-> emit_data64(0x0405060700010203, relocInfo::none);
4257 __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
4258
4259 if (VM_Version::supports_avx2()) {
4260 __masm-> emit_data64(0x0405060700010203, relocInfo::none); // second copy
4261 __masm-> emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
4262 // _SHUF_00BA
4263 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4264 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4265 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4266 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4267 // _SHUF_DC00
4268 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4269 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4270 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4271 __masm-> emit_data64(0x0b0a090803020100, relocInfo::none);
4272 }
4273
4274 return start;
4275 }
4276
4277 //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
4278 address generate_pshuffle_byte_flip_mask_sha512() {
4279 __masm-> align32();
4280 StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512");
4281 address start = __masm-> pc();
4282 if (VM_Version::supports_avx2()) {
4283 __masm-> emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK
4284 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4285 __masm-> emit_data64(0x1011121314151617, relocInfo::none);
4286 __masm-> emit_data64(0x18191a1b1c1d1e1f, relocInfo::none);
4287 __masm-> emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO
4288 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4289 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4290 __masm-> emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
4291 }
4292
4293 return start;
4294 }
4295
4296// ofs and limit are use for multi-block byte array.
4297// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
4298 address generate_sha256_implCompress(bool multi_block, const char *name) {
4299 assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_sha() || VM_Version::supports_avx2
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4299, "assert(" "VM_Version::supports_sha() || VM_Version::supports_avx2()"
") failed", ""); ::breakpoint(); } } while (0)
;
4300 __masm-> align(CodeEntryAlignment);
4301 StubCodeMark mark(this, "StubRoutines", name);
4302 address start = __masm-> pc();
4303
4304 Register buf = c_rarg0;
4305 Register state = c_rarg1;
4306 Register ofs = c_rarg2;
4307 Register limit = c_rarg3;
4308
4309 const XMMRegister msg = xmm0;
4310 const XMMRegister state0 = xmm1;
4311 const XMMRegister state1 = xmm2;
4312 const XMMRegister msgtmp0 = xmm3;
4313
4314 const XMMRegister msgtmp1 = xmm4;
4315 const XMMRegister msgtmp2 = xmm5;
4316 const XMMRegister msgtmp3 = xmm6;
4317 const XMMRegister msgtmp4 = xmm7;
4318
4319 const XMMRegister shuf_mask = xmm8;
4320
4321 __masm-> enter();
4322
4323 __masm-> subptr(rsp, 4 * wordSize);
4324
4325 if (VM_Version::supports_sha()) {
4326 __masm-> fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4327 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4328 } else if (VM_Version::supports_avx2()) {
4329 __masm-> sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4330 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4331 }
4332 __masm-> addptr(rsp, 4 * wordSize);
4333 __masm-> vzeroupper();
4334 __masm-> leave();
4335 __masm-> ret(0);
4336 return start;
4337 }
4338
4339 address generate_sha512_implCompress(bool multi_block, const char *name) {
4340 assert(VM_Version::supports_avx2(), "")do { if (!(VM_Version::supports_avx2())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4340, "assert(" "VM_Version::supports_avx2()" ") failed", ""
); ::breakpoint(); } } while (0)
;
4341 assert(VM_Version::supports_bmi2(), "")do { if (!(VM_Version::supports_bmi2())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4341, "assert(" "VM_Version::supports_bmi2()" ") failed", ""
); ::breakpoint(); } } while (0)
;
4342 __masm-> align(CodeEntryAlignment);
4343 StubCodeMark mark(this, "StubRoutines", name);
4344 address start = __masm-> pc();
4345
4346 Register buf = c_rarg0;
4347 Register state = c_rarg1;
4348 Register ofs = c_rarg2;
4349 Register limit = c_rarg3;
4350
4351 const XMMRegister msg = xmm0;
4352 const XMMRegister state0 = xmm1;
4353 const XMMRegister state1 = xmm2;
4354 const XMMRegister msgtmp0 = xmm3;
4355 const XMMRegister msgtmp1 = xmm4;
4356 const XMMRegister msgtmp2 = xmm5;
4357 const XMMRegister msgtmp3 = xmm6;
4358 const XMMRegister msgtmp4 = xmm7;
4359
4360 const XMMRegister shuf_mask = xmm8;
4361
4362 __masm-> enter();
4363
4364 __masm-> sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
4365 buf, state, ofs, limit, rsp, multi_block, shuf_mask);
4366
4367 __masm-> vzeroupper();
4368 __masm-> leave();
4369 __masm-> ret(0);
4370 return start;
4371 }
4372
4373 address ghash_polynomial512_addr() {
4374 __masm-> align(CodeEntryAlignment);
4375 StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
4376 address start = __masm-> pc();
4377 __masm-> emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
4378 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4379 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4380 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4381 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4382 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4383 __masm-> emit_data64(0x00000001C2000000, relocInfo::none);
4384 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4385 __masm-> emit_data64(0x0000000000000001, relocInfo::none); // POLY
4386 __masm-> emit_data64(0xC200000000000000, relocInfo::none);
4387 __masm-> emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
4388 __masm-> emit_data64(0x0000000100000000, relocInfo::none);
4389 return start;
4390}
4391
4392 // Vector AES Galois Counter Mode implementation. Parameters:
4393 // Windows regs | Linux regs
4394 // in = c_rarg0 (rcx) | c_rarg0 (rsi)
4395 // len = c_rarg1 (rdx) | c_rarg1 (rdi)
4396 // ct = c_rarg2 (r8) | c_rarg2 (rdx)
4397 // out = c_rarg3 (r9) | c_rarg3 (rcx)
4398 // key = r10 | c_rarg4 (r8)
4399 // state = r13 | c_rarg5 (r9)
4400 // subkeyHtbl = r14 | r11
4401 // counter = rsi | r12
4402 // return - number of processed bytes
4403 address generate_galoisCounterMode_AESCrypt() {
4404 __masm-> align(CodeEntryAlignment);
4405 StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
4406 address start = __masm-> pc();
4407 const Register in = c_rarg0;
4408 const Register len = c_rarg1;
4409 const Register ct = c_rarg2;
4410 const Register out = c_rarg3;
4411 // and updated with the incremented counter in the end
4412#ifndef _WIN64
4413 const Register key = c_rarg4;
4414 const Register state = c_rarg5;
4415 const Address subkeyH_mem(rbp, 2 * wordSize);
4416 const Register subkeyHtbl = r11;
4417 const Address avx512_subkeyH_mem(rbp, 3 * wordSize);
4418 const Register avx512_subkeyHtbl = r13;
4419 const Address counter_mem(rbp, 4 * wordSize);
4420 const Register counter = r12;
4421#else
4422 const Address key_mem(rbp, 6 * wordSize);
4423 const Register key = r10;
4424 const Address state_mem(rbp, 7 * wordSize);
4425 const Register state = r13;
4426 const Address subkeyH_mem(rbp, 8 * wordSize);
4427 const Register subkeyHtbl = r14;
4428 const Address avx512_subkeyH_mem(rbp, 9 * wordSize);
4429 const Register avx512_subkeyHtbl = r12;
4430 const Address counter_mem(rbp, 10 * wordSize);
4431 const Register counter = rsi;
4432#endif
4433 __masm-> enter();
4434 // Save state before entering routine
4435 __masm-> push(r12);
4436 __masm-> push(r13);
4437 __masm-> push(r14);
4438 __masm-> push(r15);
4439 __masm-> push(rbx);
4440#ifdef _WIN64
4441 // on win64, fill len_reg from stack position
4442 __masm-> push(rsi);
4443 __masm-> movptr(key, key_mem);
4444 __masm-> movptr(state, state_mem);
4445#endif
4446 __masm-> movptr(subkeyHtbl, subkeyH_mem);
4447 __masm-> movptr(avx512_subkeyHtbl, avx512_subkeyH_mem);
4448 __masm-> movptr(counter, counter_mem);
4449
4450 __masm-> aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
4451
4452 // Restore state before leaving routine
4453#ifdef _WIN64
4454 __masm-> pop(rsi);
4455#endif
4456 __masm-> pop(rbx);
4457 __masm-> pop(r15);
4458 __masm-> pop(r14);
4459 __masm-> pop(r13);
4460 __masm-> pop(r12);
4461
4462 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4463 __masm-> ret(0);
4464 return start;
4465 }
4466
4467 // This mask is used for incrementing counter value(linc0, linc4, etc.)
4468 address counter_mask_addr() {
4469 __masm-> align64();
4470 StubCodeMark mark(this, "StubRoutines", "counter_mask_addr");
4471 address start = __masm-> pc();
4472 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);//lbswapmask
4473 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4474 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4475 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4476 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4477 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4478 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
4479 __masm-> emit_data64(0x0001020304050607, relocInfo::none);
4480 __masm-> emit_data64(0x0000000000000000, relocInfo::none);//linc0 = counter_mask_addr+64
4481 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4482 __masm-> emit_data64(0x0000000000000001, relocInfo::none);//counter_mask_addr() + 80
4483 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4484 __masm-> emit_data64(0x0000000000000002, relocInfo::none);
4485 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4486 __masm-> emit_data64(0x0000000000000003, relocInfo::none);
4487 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4488 __masm-> emit_data64(0x0000000000000004, relocInfo::none);//linc4 = counter_mask_addr() + 128
4489 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4490 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4491 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4492 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4493 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4494 __masm-> emit_data64(0x0000000000000004, relocInfo::none);
4495 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4496 __masm-> emit_data64(0x0000000000000008, relocInfo::none);//linc8 = counter_mask_addr() + 192
4497 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4498 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4499 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4500 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4501 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4502 __masm-> emit_data64(0x0000000000000008, relocInfo::none);
4503 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4504 __masm-> emit_data64(0x0000000000000020, relocInfo::none);//linc32 = counter_mask_addr() + 256
4505 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4506 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4507 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4508 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4509 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4510 __masm-> emit_data64(0x0000000000000020, relocInfo::none);
4511 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4512 __masm-> emit_data64(0x0000000000000010, relocInfo::none);//linc16 = counter_mask_addr() + 320
4513 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4514 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4515 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4516 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4517 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4518 __masm-> emit_data64(0x0000000000000010, relocInfo::none);
4519 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
4520 return start;
4521 }
4522
4523 // Vector AES Counter implementation
4524 address generate_counterMode_VectorAESCrypt() {
4525 __masm-> align(CodeEntryAlignment);
4526 StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
4527 address start = __masm-> pc();
4528 const Register from = c_rarg0; // source array address
4529 const Register to = c_rarg1; // destination array address
4530 const Register key = c_rarg2; // key array address r8
4531 const Register counter = c_rarg3; // counter byte array initialized from counter array address
4532 // and updated with the incremented counter in the end
4533#ifndef _WIN64
4534 const Register len_reg = c_rarg4;
4535 const Register saved_encCounter_start = c_rarg5;
4536 const Register used_addr = r10;
4537 const Address used_mem(rbp, 2 * wordSize);
4538 const Register used = r11;
4539#else
4540 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4541 const Address saved_encCounter_mem(rbp, 7 * wordSize); // saved encrypted counter is on stack on Win64
4542 const Address used_mem(rbp, 8 * wordSize); // used length is on stack on Win64
4543 const Register len_reg = r10; // pick the first volatile windows register
4544 const Register saved_encCounter_start = r11;
4545 const Register used_addr = r13;
4546 const Register used = r14;
4547#endif
4548 __masm-> enter();
4549 // Save state before entering routine
4550 __masm-> push(r12);
4551 __masm-> push(r13);
4552 __masm-> push(r14);
4553 __masm-> push(r15);
4554#ifdef _WIN64
4555 // on win64, fill len_reg from stack position
4556 __masm-> movl(len_reg, len_mem);
4557 __masm-> movptr(saved_encCounter_start, saved_encCounter_mem);
4558 __masm-> movptr(used_addr, used_mem);
4559 __masm-> movl(used, Address(used_addr, 0));
4560#else
4561 __masm-> push(len_reg); // Save
4562 __masm-> movptr(used_addr, used_mem);
4563 __masm-> movl(used, Address(used_addr, 0));
4564#endif
4565 __masm-> push(rbx);
4566 __masm-> aesctr_encrypt(from, to, key, counter, len_reg, used, used_addr, saved_encCounter_start);
4567 // Restore state before leaving routine
4568 __masm-> pop(rbx);
4569#ifdef _WIN64
4570 __masm-> movl(rax, len_mem); // return length
4571#else
4572 __masm-> pop(rax); // return length
4573#endif
4574 __masm-> pop(r15);
4575 __masm-> pop(r14);
4576 __masm-> pop(r13);
4577 __masm-> pop(r12);
4578
4579 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4580 __masm-> ret(0);
4581 return start;
4582 }
4583
4584 // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
4585 // to hide instruction latency
4586 //
4587 // Arguments:
4588 //
4589 // Inputs:
4590 // c_rarg0 - source byte array address
4591 // c_rarg1 - destination byte array address
4592 // c_rarg2 - K (key) in little endian int array
4593 // c_rarg3 - counter vector byte array address
4594 // Linux
4595 // c_rarg4 - input length
4596 // c_rarg5 - saved encryptedCounter start
4597 // rbp + 6 * wordSize - saved used length
4598 // Windows
4599 // rbp + 6 * wordSize - input length
4600 // rbp + 7 * wordSize - saved encryptedCounter start
4601 // rbp + 8 * wordSize - saved used length
4602 //
4603 // Output:
4604 // rax - input length
4605 //
4606 address generate_counterMode_AESCrypt_Parallel() {
4607 assert(UseAES, "need AES instructions and misaligned SSE support")do { if (!(UseAES)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4607, "assert(" "UseAES" ") failed", "need AES instructions and misaligned SSE support"
); ::breakpoint(); } } while (0)
;
4608 __masm-> align(CodeEntryAlignment);
4609 StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
4610 address start = __masm-> pc();
4611 const Register from = c_rarg0; // source array address
4612 const Register to = c_rarg1; // destination array address
4613 const Register key = c_rarg2; // key array address
4614 const Register counter = c_rarg3; // counter byte array initialized from counter array address
4615 // and updated with the incremented counter in the end
4616#ifndef _WIN64
4617 const Register len_reg = c_rarg4;
4618 const Register saved_encCounter_start = c_rarg5;
4619 const Register used_addr = r10;
4620 const Address used_mem(rbp, 2 * wordSize);
4621 const Register used = r11;
4622#else
4623 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4624 const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64
4625 const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64
4626 const Register len_reg = r10; // pick the first volatile windows register
4627 const Register saved_encCounter_start = r11;
4628 const Register used_addr = r13;
4629 const Register used = r14;
4630#endif
4631 const Register pos = rax;
4632
4633 const int PARALLEL_FACTOR = 6;
4634 const XMMRegister xmm_counter_shuf_mask = xmm0;
4635 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
4636 const XMMRegister xmm_curr_counter = xmm2;
4637
4638 const XMMRegister xmm_key_tmp0 = xmm3;
4639 const XMMRegister xmm_key_tmp1 = xmm4;
4640
4641 // registers holding the four results in the parallelized loop
4642 const XMMRegister xmm_result0 = xmm5;
4643 const XMMRegister xmm_result1 = xmm6;
4644 const XMMRegister xmm_result2 = xmm7;
4645 const XMMRegister xmm_result3 = xmm8;
4646 const XMMRegister xmm_result4 = xmm9;
4647 const XMMRegister xmm_result5 = xmm10;
4648
4649 const XMMRegister xmm_from0 = xmm11;
4650 const XMMRegister xmm_from1 = xmm12;
4651 const XMMRegister xmm_from2 = xmm13;
4652 const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
4653 const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
4654 const XMMRegister xmm_from5 = xmm4;
4655
4656 //for key_128, key_192, key_256
4657 const int rounds[3] = {10, 12, 14};
4658 Label L_exit_preLoop, L_preLoop_start;
4659 Label L_multiBlock_loopTop[3];
4660 Label L_singleBlockLoopTop[3];
4661 Label L__incCounter[3][6]; //for 6 blocks
4662 Label L__incCounter_single[3]; //for single block, key128, key192, key256
4663 Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
4664 Label L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
4665
4666 Label L_exit;
4667
4668 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
4669
4670#ifdef _WIN64
4671 // allocate spill slots for r13, r14
4672 enum {
4673 saved_r13_offset,
4674 saved_r14_offset
4675 };
4676 __masm-> subptr(rsp, 2 * wordSize);
4677 __masm-> movptr(Address(rsp, saved_r13_offset * wordSize), r13);
4678 __masm-> movptr(Address(rsp, saved_r14_offset * wordSize), r14);
4679
4680 // on win64, fill len_reg from stack position
4681 __masm-> movl(len_reg, len_mem);
4682 __masm-> movptr(saved_encCounter_start, saved_encCounter_mem);
4683 __masm-> movptr(used_addr, used_mem);
4684 __masm-> movl(used, Address(used_addr, 0));
4685#else
4686 __masm-> push(len_reg); // Save
4687 __masm-> movptr(used_addr, used_mem);
4688 __masm-> movl(used, Address(used_addr, 0));
4689#endif
4690
4691 __masm-> push(rbx); // Save RBX
4692 __masm-> movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
4693 __masm-> movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
4694 __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
4695 __masm-> movptr(pos, 0);
4696
4697 // Use the partially used encrpyted counter from last invocation
4698 __masm-> BIND(L_preLoop_start)bind(L_preLoop_start); masm-> block_comment("L_preLoop_start"
":")
;
4699 __masm-> cmpptr(used, 16);
4700 __masm-> jcc(Assembler::aboveEqual, L_exit_preLoop);
4701 __masm-> cmpptr(len_reg, 0);
4702 __masm-> jcc(Assembler::lessEqual, L_exit_preLoop);
4703 __masm-> movb(rbx, Address(saved_encCounter_start, used));
4704 __masm-> xorb(rbx, Address(from, pos));
4705 __masm-> movb(Address(to, pos), rbx);
4706 __masm-> addptr(pos, 1);
4707 __masm-> addptr(used, 1);
4708 __masm-> subptr(len_reg, 1);
4709
4710 __masm-> jmp(L_preLoop_start);
4711
4712 __masm-> BIND(L_exit_preLoop)bind(L_exit_preLoop); masm-> block_comment("L_exit_preLoop"
":")
;
4713 __masm-> movl(Address(used_addr, 0), used);
4714
4715 // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
4716 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
4717 __masm-> movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
4718 __masm-> cmpl(rbx, 52);
4719 __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[1]);
4720 __masm-> cmpl(rbx, 60);
4721 __masm-> jcc(Assembler::equal, L_multiBlock_loopTop[2]);
4722
4723#define CTR_DoSix(opc, src_reg)masm-> opc(xmm_result0, src_reg); masm-> opc(xmm_result1
, src_reg); masm-> opc(xmm_result2, src_reg); masm-> opc
(xmm_result3, src_reg); masm-> opc(xmm_result4, src_reg); masm
-> opc(xmm_result5, src_reg);
\
4724 __masm-> opc(xmm_result0, src_reg); \
4725 __masm-> opc(xmm_result1, src_reg); \
4726 __masm-> opc(xmm_result2, src_reg); \
4727 __masm-> opc(xmm_result3, src_reg); \
4728 __masm-> opc(xmm_result4, src_reg); \
4729 __masm-> opc(xmm_result5, src_reg);
4730
4731 // k == 0 : generate code for key_128
4732 // k == 1 : generate code for key_192
4733 // k == 2 : generate code for key_256
4734 for (int k = 0; k < 3; ++k) {
4735 //multi blocks starts here
4736 __masm-> align(OptoLoopAlignment);
4737 __masm-> BIND(L_multiBlock_loopTop[k])bind(L_multiBlock_loopTop[k]); masm-> block_comment("L_multiBlock_loopTop[k]"
":")
;
4738 __masm-> cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
4739 __masm-> jcc(Assembler::less, L_singleBlockLoopTop[k]);
4740 load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
4741
4742 //load, then increase counters
4743 CTR_DoSix(movdqa, xmm_curr_counter)masm-> movdqa(xmm_result0, xmm_curr_counter); masm-> movdqa
(xmm_result1, xmm_curr_counter); masm-> movdqa(xmm_result2
, xmm_curr_counter); masm-> movdqa(xmm_result3, xmm_curr_counter
); masm-> movdqa(xmm_result4, xmm_curr_counter); masm->
movdqa(xmm_result5, xmm_curr_counter);
;
4744 inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]);
4745 inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]);
4746 inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]);
4747 inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]);
4748 inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]);
4749 inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]);
4750 CTR_DoSix(pshufb, xmm_counter_shuf_mask)masm-> pshufb(xmm_result0, xmm_counter_shuf_mask); masm->
pshufb(xmm_result1, xmm_counter_shuf_mask); masm-> pshufb
(xmm_result2, xmm_counter_shuf_mask); masm-> pshufb(xmm_result3
, xmm_counter_shuf_mask); masm-> pshufb(xmm_result4, xmm_counter_shuf_mask
); masm-> pshufb(xmm_result5, xmm_counter_shuf_mask);
; // after increased, shuffled counters back for PXOR
4751 CTR_DoSix(pxor, xmm_key_tmp0)masm-> pxor(xmm_result0, xmm_key_tmp0); masm-> pxor(xmm_result1
, xmm_key_tmp0); masm-> pxor(xmm_result2, xmm_key_tmp0); masm
-> pxor(xmm_result3, xmm_key_tmp0); masm-> pxor(xmm_result4
, xmm_key_tmp0); masm-> pxor(xmm_result5, xmm_key_tmp0);
; //PXOR with Round 0 key
4752
4753 //load two ROUND_KEYs at a time
4754 for (int i = 1; i < rounds[k]; ) {
4755 load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask);
4756 load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask);
4757 CTR_DoSix(aesenc, xmm_key_tmp1)masm-> aesenc(xmm_result0, xmm_key_tmp1); masm-> aesenc
(xmm_result1, xmm_key_tmp1); masm-> aesenc(xmm_result2, xmm_key_tmp1
); masm-> aesenc(xmm_result3, xmm_key_tmp1); masm-> aesenc
(xmm_result4, xmm_key_tmp1); masm-> aesenc(xmm_result5, xmm_key_tmp1
);
;
4758 i++;
4759 if (i != rounds[k]) {
4760 CTR_DoSix(aesenc, xmm_key_tmp0)masm-> aesenc(xmm_result0, xmm_key_tmp0); masm-> aesenc
(xmm_result1, xmm_key_tmp0); masm-> aesenc(xmm_result2, xmm_key_tmp0
); masm-> aesenc(xmm_result3, xmm_key_tmp0); masm-> aesenc
(xmm_result4, xmm_key_tmp0); masm-> aesenc(xmm_result5, xmm_key_tmp0
);
;
4761 } else {
4762 CTR_DoSix(aesenclast, xmm_key_tmp0)masm-> aesenclast(xmm_result0, xmm_key_tmp0); masm-> aesenclast
(xmm_result1, xmm_key_tmp0); masm-> aesenclast(xmm_result2
, xmm_key_tmp0); masm-> aesenclast(xmm_result3, xmm_key_tmp0
); masm-> aesenclast(xmm_result4, xmm_key_tmp0); masm->
aesenclast(xmm_result5, xmm_key_tmp0);
;
4763 }
4764 i++;
4765 }
4766
4767 // get next PARALLEL_FACTOR blocks into xmm_result registers
4768 __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4769 __masm-> movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
4770 __masm-> movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
4771 __masm-> movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
4772 __masm-> movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize));
4773 __masm-> movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize));
4774
4775 __masm-> pxor(xmm_result0, xmm_from0);
4776 __masm-> pxor(xmm_result1, xmm_from1);
4777 __masm-> pxor(xmm_result2, xmm_from2);
4778 __masm-> pxor(xmm_result3, xmm_from3);
4779 __masm-> pxor(xmm_result4, xmm_from4);
4780 __masm-> pxor(xmm_result5, xmm_from5);
4781
4782 // store 6 results into the next 64 bytes of output
4783 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
4784 __masm-> movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
4785 __masm-> movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
4786 __masm-> movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
4787 __masm-> movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4);
4788 __masm-> movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5);
4789
4790 __masm-> addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
4791 __masm-> subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
4792 __masm-> jmp(L_multiBlock_loopTop[k]);
4793
4794 // singleBlock starts here
4795 __masm-> align(OptoLoopAlignment);
4796 __masm-> BIND(L_singleBlockLoopTop[k])bind(L_singleBlockLoopTop[k]); masm-> block_comment("L_singleBlockLoopTop[k]"
":")
;
4797 __masm-> cmpptr(len_reg, 0);
4798 __masm-> jcc(Assembler::lessEqual, L_exit);
4799 load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
4800 __masm-> movdqa(xmm_result0, xmm_curr_counter);
4801 inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]);
4802 __masm-> pshufb(xmm_result0, xmm_counter_shuf_mask);
4803 __masm-> pxor(xmm_result0, xmm_key_tmp0);
4804 for (int i = 1; i < rounds[k]; i++) {
4805 load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask);
4806 __masm-> aesenc(xmm_result0, xmm_key_tmp0);
4807 }
4808 load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask);
4809 __masm-> aesenclast(xmm_result0, xmm_key_tmp0);
4810 __masm-> cmpptr(len_reg, AESBlockSize);
4811 __masm-> jcc(Assembler::less, L_processTail_insr[k]);
4812 __masm-> movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
4813 __masm-> pxor(xmm_result0, xmm_from0);
4814 __masm-> movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
4815 __masm-> addptr(pos, AESBlockSize);
4816 __masm-> subptr(len_reg, AESBlockSize);
4817 __masm-> jmp(L_singleBlockLoopTop[k]);
4818 __masm-> BIND(L_processTail_insr[k])bind(L_processTail_insr[k]); masm-> block_comment("L_processTail_insr[k]"
":")
; // Process the tail part of the input array
4819 __masm-> addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register
4820 __masm-> testptr(len_reg, 8);
4821 __masm-> jcc(Assembler::zero, L_processTail_4_insr[k]);
4822 __masm-> subptr(pos,8);
4823 __masm-> pinsrq(xmm_from0, Address(from, pos), 0);
4824 __masm-> BIND(L_processTail_4_insr[k])bind(L_processTail_4_insr[k]); masm-> block_comment("L_processTail_4_insr[k]"
":")
;
4825 __masm-> testptr(len_reg, 4);
4826 __masm-> jcc(Assembler::zero, L_processTail_2_insr[k]);
4827 __masm-> subptr(pos,4);
4828 __masm-> pslldq(xmm_from0, 4);
4829 __masm-> pinsrd(xmm_from0, Address(from, pos), 0);
4830 __masm-> BIND(L_processTail_2_insr[k])bind(L_processTail_2_insr[k]); masm-> block_comment("L_processTail_2_insr[k]"
":")
;
4831 __masm-> testptr(len_reg, 2);
4832 __masm-> jcc(Assembler::zero, L_processTail_1_insr[k]);
4833 __masm-> subptr(pos, 2);
4834 __masm-> pslldq(xmm_from0, 2);
4835 __masm-> pinsrw(xmm_from0, Address(from, pos), 0);
4836 __masm-> BIND(L_processTail_1_insr[k])bind(L_processTail_1_insr[k]); masm-> block_comment("L_processTail_1_insr[k]"
":")
;
4837 __masm-> testptr(len_reg, 1);
4838 __masm-> jcc(Assembler::zero, L_processTail_exit_insr[k]);
4839 __masm-> subptr(pos, 1);
4840 __masm-> pslldq(xmm_from0, 1);
4841 __masm-> pinsrb(xmm_from0, Address(from, pos), 0);
4842 __masm-> BIND(L_processTail_exit_insr[k])bind(L_processTail_exit_insr[k]); masm-> block_comment("L_processTail_exit_insr[k]"
":")
;
4843
4844 __masm-> movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes.
4845 __masm-> pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation.
4846
4847 __masm-> testptr(len_reg, 8);
4848 __masm-> jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array
4849 __masm-> pextrq(Address(to, pos), xmm_result0, 0);
4850 __masm-> psrldq(xmm_result0, 8);
4851 __masm-> addptr(pos, 8);
4852 __masm-> BIND(L_processTail_4_extr[k])bind(L_processTail_4_extr[k]); masm-> block_comment("L_processTail_4_extr[k]"
":")
;
4853 __masm-> testptr(len_reg, 4);
4854 __masm-> jcc(Assembler::zero, L_processTail_2_extr[k]);
4855 __masm-> pextrd(Address(to, pos), xmm_result0, 0);
4856 __masm-> psrldq(xmm_result0, 4);
4857 __masm-> addptr(pos, 4);
4858 __masm-> BIND(L_processTail_2_extr[k])bind(L_processTail_2_extr[k]); masm-> block_comment("L_processTail_2_extr[k]"
":")
;
4859 __masm-> testptr(len_reg, 2);
4860 __masm-> jcc(Assembler::zero, L_processTail_1_extr[k]);
4861 __masm-> pextrw(Address(to, pos), xmm_result0, 0);
4862 __masm-> psrldq(xmm_result0, 2);
4863 __masm-> addptr(pos, 2);
4864 __masm-> BIND(L_processTail_1_extr[k])bind(L_processTail_1_extr[k]); masm-> block_comment("L_processTail_1_extr[k]"
":")
;
4865 __masm-> testptr(len_reg, 1);
4866 __masm-> jcc(Assembler::zero, L_processTail_exit_extr[k]);
4867 __masm-> pextrb(Address(to, pos), xmm_result0, 0);
4868
4869 __masm-> BIND(L_processTail_exit_extr[k])bind(L_processTail_exit_extr[k]); masm-> block_comment("L_processTail_exit_extr[k]"
":")
;
4870 __masm-> movl(Address(used_addr, 0), len_reg);
4871 __masm-> jmp(L_exit);
4872
4873 }
4874
4875 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
4876 __masm-> pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
4877 __masm-> movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
4878 __masm-> pop(rbx); // pop the saved RBX.
4879#ifdef _WIN64
4880 __masm-> movl(rax, len_mem);
4881 __masm-> movptr(r13, Address(rsp, saved_r13_offset * wordSize));
4882 __masm-> movptr(r14, Address(rsp, saved_r14_offset * wordSize));
4883 __masm-> addptr(rsp, 2 * wordSize);
4884#else
4885 __masm-> pop(rax); // return 'len'
4886#endif
4887 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
4888 __masm-> ret(0);
4889 return start;
4890 }
4891
4892void roundDec(XMMRegister xmm_reg) {
4893 __masm-> vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
4894 __masm-> vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
4895 __masm-> vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
4896 __masm-> vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
4897 __masm-> vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
4898 __masm-> vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
4899 __masm-> vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
4900 __masm-> vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
4901}
4902
4903void roundDeclast(XMMRegister xmm_reg) {
4904 __masm-> vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
4905 __masm-> vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
4906 __masm-> vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
4907 __masm-> vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
4908 __masm-> vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
4909 __masm-> vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
4910 __masm-> vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
4911 __masm-> vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
4912}
4913
4914 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL__null) {
4915 __masm-> movdqu(xmmdst, Address(key, offset));
4916 if (xmm_shuf_mask != NULL__null) {
4917 __masm-> pshufb(xmmdst, xmm_shuf_mask);
4918 } else {
4919 __masm-> pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
4920 }
4921 __masm-> evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
4922
4923 }
4924
4925address generate_cipherBlockChaining_decryptVectorAESCrypt() {
4926 assert(VM_Version::supports_avx512_vaes(), "need AES instructions and misaligned SSE support")do { if (!(VM_Version::supports_avx512_vaes())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 4926, "assert(" "VM_Version::supports_avx512_vaes()" ") failed"
, "need AES instructions and misaligned SSE support"); ::breakpoint
(); } } while (0)
;
4927 __masm-> align(CodeEntryAlignment);
4928 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
4929 address start = __masm-> pc();
4930
4931 const Register from = c_rarg0; // source array address
4932 const Register to = c_rarg1; // destination array address
4933 const Register key = c_rarg2; // key array address
4934 const Register rvec = c_rarg3; // r byte array initialized from initvector array address
4935 // and left with the results of the last encryption block
4936#ifndef _WIN64
4937 const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
4938#else
4939 const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
4940 const Register len_reg = r11; // pick the volatile windows register
4941#endif
4942
4943 Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop,
4944 Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit;
4945
4946 __masm-> enter();
4947
4948#ifdef _WIN64
4949 // on win64, fill len_reg from stack position
4950 __masm-> movl(len_reg, len_mem);
4951#else
4952 __masm-> push(len_reg); // Save
4953#endif
4954 __masm-> push(rbx);
4955 __masm-> vzeroupper();
4956
4957 // Temporary variable declaration for swapping key bytes
4958 const XMMRegister xmm_key_shuf_mask = xmm1;
4959 __masm-> movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
4960
4961 // Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
4962 const Register rounds = rbx;
4963 __masm-> movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
4964
4965 const XMMRegister IV = xmm0;
4966 // Load IV and broadcast value to 512-bits
4967 __masm-> evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit);
4968
4969 // Temporary variables for storing round keys
4970 const XMMRegister RK0 = xmm30;
4971 const XMMRegister RK1 = xmm9;
4972 const XMMRegister RK2 = xmm18;
4973 const XMMRegister RK3 = xmm19;
4974 const XMMRegister RK4 = xmm20;
4975 const XMMRegister RK5 = xmm21;
4976 const XMMRegister RK6 = xmm22;
4977 const XMMRegister RK7 = xmm23;
4978 const XMMRegister RK8 = xmm24;
4979 const XMMRegister RK9 = xmm25;
4980 const XMMRegister RK10 = xmm26;
4981
4982 // Load and shuffle key
4983 // the java expanded key ordering is rotated one position from what we want
4984 // so we start from 1*16 here and hit 0*16 last
4985 ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask);
4986 ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask);
4987 ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask);
4988 ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask);
4989 ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask);
4990 ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask);
4991 ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask);
4992 ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask);
4993 ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask);
4994 ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask);
4995 ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask);
4996
4997 // Variables for storing source cipher text
4998 const XMMRegister S0 = xmm10;
4999 const XMMRegister S1 = xmm11;
5000 const XMMRegister S2 = xmm12;
5001 const XMMRegister S3 = xmm13;
5002 const XMMRegister S4 = xmm14;
5003 const XMMRegister S5 = xmm15;
5004 const XMMRegister S6 = xmm16;
5005 const XMMRegister S7 = xmm17;
5006
5007 // Variables for storing decrypted text
5008 const XMMRegister B0 = xmm1;
5009 const XMMRegister B1 = xmm2;
5010 const XMMRegister B2 = xmm3;
5011 const XMMRegister B3 = xmm4;
5012 const XMMRegister B4 = xmm5;
5013 const XMMRegister B5 = xmm6;
5014 const XMMRegister B6 = xmm7;
5015 const XMMRegister B7 = xmm8;
5016
5017 __masm-> cmpl(rounds, 44);
5018 __masm-> jcc(Assembler::greater, KEY_192);
5019 __masm-> jmp(Loop);
5020
5021 __masm-> BIND(KEY_192)bind(KEY_192); masm-> block_comment("KEY_192" ":");
5022 const XMMRegister RK11 = xmm27;
5023 const XMMRegister RK12 = xmm28;
5024 ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask);
5025 ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask);
5026
5027 __masm-> cmpl(rounds, 52);
5028 __masm-> jcc(Assembler::greater, KEY_256);
5029 __masm-> jmp(Loop);
5030
5031 __masm-> BIND(KEY_256)bind(KEY_256); masm-> block_comment("KEY_256" ":");
5032 const XMMRegister RK13 = xmm29;
5033 const XMMRegister RK14 = xmm31;
5034 ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask);
5035 ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask);
5036
5037 __masm-> BIND(Loop)bind(Loop); masm-> block_comment("Loop" ":");
5038 __masm-> cmpl(len_reg, 512);
5039 __masm-> jcc(Assembler::below, Lcbc_dec_rem);
5040 __masm-> BIND(Loop1)bind(Loop1); masm-> block_comment("Loop1" ":");
5041 __masm-> subl(len_reg, 512);
5042 __masm-> evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit);
5043 __masm-> evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit);
5044 __masm-> evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit);
5045 __masm-> evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit);
5046 __masm-> evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit);
5047 __masm-> evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit);
5048 __masm-> evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit);
5049 __masm-> evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit);
5050 __masm-> leaq(from, Address(from, 8 * 64));
5051
5052 __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
5053 __masm-> evpxorq(B1, S1, RK1, Assembler::AVX_512bit);
5054 __masm-> evpxorq(B2, S2, RK1, Assembler::AVX_512bit);
5055 __masm-> evpxorq(B3, S3, RK1, Assembler::AVX_512bit);
5056 __masm-> evpxorq(B4, S4, RK1, Assembler::AVX_512bit);
5057 __masm-> evpxorq(B5, S5, RK1, Assembler::AVX_512bit);
5058 __masm-> evpxorq(B6, S6, RK1, Assembler::AVX_512bit);
5059 __masm-> evpxorq(B7, S7, RK1, Assembler::AVX_512bit);
5060
5061 __masm-> evalignq(IV, S0, IV, 0x06);
5062 __masm-> evalignq(S0, S1, S0, 0x06);
5063 __masm-> evalignq(S1, S2, S1, 0x06);
5064 __masm-> evalignq(S2, S3, S2, 0x06);
5065 __masm-> evalignq(S3, S4, S3, 0x06);
5066 __masm-> evalignq(S4, S5, S4, 0x06);
5067 __masm-> evalignq(S5, S6, S5, 0x06);
5068 __masm-> evalignq(S6, S7, S6, 0x06);
5069
5070 roundDec(RK2);
5071 roundDec(RK3);
5072 roundDec(RK4);
5073 roundDec(RK5);
5074 roundDec(RK6);
5075 roundDec(RK7);
5076 roundDec(RK8);
5077 roundDec(RK9);
5078 roundDec(RK10);
5079
5080 __masm-> cmpl(rounds, 44);
5081 __masm-> jcc(Assembler::belowEqual, L_128);
5082 roundDec(RK11);
5083 roundDec(RK12);
5084
5085 __masm-> cmpl(rounds, 52);
5086 __masm-> jcc(Assembler::belowEqual, L_192);
5087 roundDec(RK13);
5088 roundDec(RK14);
5089
5090 __masm-> BIND(L_256)bind(L_256); masm-> block_comment("L_256" ":");
5091 roundDeclast(RK0);
5092 __masm-> jmp(Loop2);
5093
5094 __masm-> BIND(L_128)bind(L_128); masm-> block_comment("L_128" ":");
5095 roundDeclast(RK0);
5096 __masm-> jmp(Loop2);
5097
5098 __masm-> BIND(L_192)bind(L_192); masm-> block_comment("L_192" ":");
5099 roundDeclast(RK0);
5100
5101 __masm-> BIND(Loop2)bind(Loop2); masm-> block_comment("Loop2" ":");
5102 __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit);
5103 __masm-> evpxorq(B1, B1, S0, Assembler::AVX_512bit);
5104 __masm-> evpxorq(B2, B2, S1, Assembler::AVX_512bit);
5105 __masm-> evpxorq(B3, B3, S2, Assembler::AVX_512bit);
5106 __masm-> evpxorq(B4, B4, S3, Assembler::AVX_512bit);
5107 __masm-> evpxorq(B5, B5, S4, Assembler::AVX_512bit);
5108 __masm-> evpxorq(B6, B6, S5, Assembler::AVX_512bit);
5109 __masm-> evpxorq(B7, B7, S6, Assembler::AVX_512bit);
5110 __masm-> evmovdquq(IV, S7, Assembler::AVX_512bit);
5111
5112 __masm-> evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit);
5113 __masm-> evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit);
5114 __masm-> evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit);
5115 __masm-> evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit);
5116 __masm-> evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit);
5117 __masm-> evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit);
5118 __masm-> evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit);
5119 __masm-> evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit);
5120 __masm-> leaq(to, Address(to, 8 * 64));
5121 __masm-> jmp(Loop);
5122
5123 __masm-> BIND(Lcbc_dec_rem)bind(Lcbc_dec_rem); masm-> block_comment("Lcbc_dec_rem" ":"
)
;
5124 __masm-> evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit);
5125
5126 __masm-> BIND(Lcbc_dec_rem_loop)bind(Lcbc_dec_rem_loop); masm-> block_comment("Lcbc_dec_rem_loop"
":")
;
5127 __masm-> subl(len_reg, 16);
5128 __masm-> jcc(Assembler::carrySet, Lcbc_dec_ret);
5129
5130 __masm-> movdqu(S0, Address(from, 0));
5131 __masm-> evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
5132 __masm-> vaesdec(B0, B0, RK2, Assembler::AVX_512bit);
5133 __masm-> vaesdec(B0, B0, RK3, Assembler::AVX_512bit);
5134 __masm-> vaesdec(B0, B0, RK4, Assembler::AVX_512bit);
5135 __masm-> vaesdec(B0, B0, RK5, Assembler::AVX_512bit);
5136 __masm-> vaesdec(B0, B0, RK6, Assembler::AVX_512bit);
5137 __masm-> vaesdec(B0, B0, RK7, Assembler::AVX_512bit);
5138 __masm-> vaesdec(B0, B0, RK8, Assembler::AVX_512bit);
5139 __masm-> vaesdec(B0, B0, RK9, Assembler::AVX_512bit);
5140 __masm-> vaesdec(B0, B0, RK10, Assembler::AVX_512bit);
5141 __masm-> cmpl(rounds, 44);
5142 __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
5143
5144 __masm-> vaesdec(B0, B0, RK11, Assembler::AVX_512bit);
5145 __masm-> vaesdec(B0, B0, RK12, Assembler::AVX_512bit);
5146 __masm-> cmpl(rounds, 52);
5147 __masm-> jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
5148
5149 __masm-> vaesdec(B0, B0, RK13, Assembler::AVX_512bit);
5150 __masm-> vaesdec(B0, B0, RK14, Assembler::AVX_512bit);
5151
5152 __masm-> BIND(Lcbc_dec_rem_last)bind(Lcbc_dec_rem_last); masm-> block_comment("Lcbc_dec_rem_last"
":")
;
5153 __masm-> vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit);
5154
5155 __masm-> evpxorq(B0, B0, IV, Assembler::AVX_512bit);
5156 __masm-> evmovdquq(IV, S0, Assembler::AVX_512bit);
5157 __masm-> movdqu(Address(to, 0), B0);
5158 __masm-> leaq(from, Address(from, 16));
5159 __masm-> leaq(to, Address(to, 16));
5160 __masm-> jmp(Lcbc_dec_rem_loop);
5161
5162 __masm-> BIND(Lcbc_dec_ret)bind(Lcbc_dec_ret); masm-> block_comment("Lcbc_dec_ret" ":"
)
;
5163 __masm-> movdqu(Address(rvec, 0), IV);
5164
5165 // Zero out the round keys
5166 __masm-> evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit);
5167 __masm-> evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit);
5168 __masm-> evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit);
5169 __masm-> evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit);
5170 __masm-> evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit);
5171 __masm-> evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit);
5172 __masm-> evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit);
5173 __masm-> evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit);
5174 __masm-> evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit);
5175 __masm-> evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit);
5176 __masm-> evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit);
5177 __masm-> cmpl(rounds, 44);
5178 __masm-> jcc(Assembler::belowEqual, Lcbc_exit);
5179 __masm-> evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit);
5180 __masm-> evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit);
5181 __masm-> cmpl(rounds, 52);
5182 __masm-> jcc(Assembler::belowEqual, Lcbc_exit);
5183 __masm-> evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit);
5184 __masm-> evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit);
5185
5186 __masm-> BIND(Lcbc_exit)bind(Lcbc_exit); masm-> block_comment("Lcbc_exit" ":");
5187 __masm-> pop(rbx);
5188#ifdef _WIN64
5189 __masm-> movl(rax, len_mem);
5190#else
5191 __masm-> pop(rax); // return length
5192#endif
5193 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
5194 __masm-> ret(0);
5195 return start;
5196}
5197
5198// Polynomial x^128+x^127+x^126+x^121+1
5199address ghash_polynomial_addr() {
5200 __masm-> align(CodeEntryAlignment);
5201 StubCodeMark mark(this, "StubRoutines", "_ghash_poly_addr");
5202 address start = __masm-> pc();
5203 __masm-> emit_data64(0x0000000000000001, relocInfo::none);
5204 __masm-> emit_data64(0xc200000000000000, relocInfo::none);
5205 return start;
5206}
5207
5208address ghash_shufflemask_addr() {
5209 __masm-> align(CodeEntryAlignment);
5210 StubCodeMark mark(this, "StubRoutines", "_ghash_shuffmask_addr");
5211 address start = __masm-> pc();
5212 __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
5213 __masm-> emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
5214 return start;
5215}
5216
5217// Ghash single and multi block operations using AVX instructions
5218address generate_avx_ghash_processBlocks() {
5219 __masm-> align(CodeEntryAlignment);
5220
5221 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
5222 address start = __masm-> pc();
5223
5224 // arguments
5225 const Register state = c_rarg0;
5226 const Register htbl = c_rarg1;
5227 const Register data = c_rarg2;
5228 const Register blocks = c_rarg3;
5229 __masm-> enter();
5230 // Save state before entering routine
5231 __masm-> avx_ghash(state, htbl, data, blocks);
5232 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
5233 __masm-> ret(0);
5234 return start;
5235}
5236
5237 // byte swap x86 long
5238 address generate_ghash_long_swap_mask() {
5239 __masm-> align(CodeEntryAlignment);
5240 StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
5241 address start = __masm-> pc();
5242 __masm-> emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
5243 __masm-> emit_data64(0x0706050403020100, relocInfo::none );
5244 return start;
5245 }
5246
5247 // byte swap x86 byte array
5248 address generate_ghash_byte_swap_mask() {
5249 __masm-> align(CodeEntryAlignment);
5250 StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
5251 address start = __masm-> pc();
5252 __masm-> emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
5253 __masm-> emit_data64(0x0001020304050607, relocInfo::none );
5254 return start;
5255 }
5256
5257 /* Single and multi-block ghash operations */
5258 address generate_ghash_processBlocks() {
5259 __masm-> align(CodeEntryAlignment);
5260 Label L_ghash_loop, L_exit;
5261 StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
5262 address start = __masm-> pc();
5263
5264 const Register state = c_rarg0;
5265 const Register subkeyH = c_rarg1;
5266 const Register data = c_rarg2;
5267 const Register blocks = c_rarg3;
5268
5269 const XMMRegister xmm_temp0 = xmm0;
5270 const XMMRegister xmm_temp1 = xmm1;
5271 const XMMRegister xmm_temp2 = xmm2;
5272 const XMMRegister xmm_temp3 = xmm3;
5273 const XMMRegister xmm_temp4 = xmm4;
5274 const XMMRegister xmm_temp5 = xmm5;
5275 const XMMRegister xmm_temp6 = xmm6;
5276 const XMMRegister xmm_temp7 = xmm7;
5277 const XMMRegister xmm_temp8 = xmm8;
5278 const XMMRegister xmm_temp9 = xmm9;
5279 const XMMRegister xmm_temp10 = xmm10;
5280
5281 __masm-> enter();
5282
5283 __masm-> movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
5284
5285 __masm-> movdqu(xmm_temp0, Address(state, 0));
5286 __masm-> pshufb(xmm_temp0, xmm_temp10);
5287
5288
5289 __masm-> BIND(L_ghash_loop)bind(L_ghash_loop); masm-> block_comment("L_ghash_loop" ":"
)
;
5290 __masm-> movdqu(xmm_temp2, Address(data, 0));
5291 __masm-> pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
5292
5293 __masm-> movdqu(xmm_temp1, Address(subkeyH, 0));
5294 __masm-> pshufb(xmm_temp1, xmm_temp10);
5295
5296 __masm-> pxor(xmm_temp0, xmm_temp2);
5297
5298 //
5299 // Multiply with the hash key
5300 //
5301 __masm-> movdqu(xmm_temp3, xmm_temp0);
5302 __masm-> pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
5303 __masm-> movdqu(xmm_temp4, xmm_temp0);
5304 __masm-> pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
5305
5306 __masm-> movdqu(xmm_temp5, xmm_temp0);
5307 __masm-> pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
5308 __masm-> movdqu(xmm_temp6, xmm_temp0);
5309 __masm-> pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
5310
5311 __masm-> pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
5312
5313 __masm-> movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
5314 __masm-> psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
5315 __masm-> pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
5316 __masm-> pxor(xmm_temp3, xmm_temp5);
5317 __masm-> pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
5318 // of the carry-less multiplication of
5319 // xmm0 by xmm1.
5320
5321 // We shift the result of the multiplication by one bit position
5322 // to the left to cope for the fact that the bits are reversed.
5323 __masm-> movdqu(xmm_temp7, xmm_temp3);
5324 __masm-> movdqu(xmm_temp8, xmm_temp6);
5325 __masm-> pslld(xmm_temp3, 1);
5326 __masm-> pslld(xmm_temp6, 1);
5327 __masm-> psrld(xmm_temp7, 31);
5328 __masm-> psrld(xmm_temp8, 31);
5329 __masm-> movdqu(xmm_temp9, xmm_temp7);
5330 __masm-> pslldq(xmm_temp8, 4);
5331 __masm-> pslldq(xmm_temp7, 4);
5332 __masm-> psrldq(xmm_temp9, 12);
5333 __masm-> por(xmm_temp3, xmm_temp7);
5334 __masm-> por(xmm_temp6, xmm_temp8);
5335 __masm-> por(xmm_temp6, xmm_temp9);
5336
5337 //
5338 // First phase of the reduction
5339 //
5340 // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
5341 // independently.
5342 __masm-> movdqu(xmm_temp7, xmm_temp3);
5343 __masm-> movdqu(xmm_temp8, xmm_temp3);
5344 __masm-> movdqu(xmm_temp9, xmm_temp3);
5345 __masm-> pslld(xmm_temp7, 31); // packed right shift shifting << 31
5346 __masm-> pslld(xmm_temp8, 30); // packed right shift shifting << 30
5347 __masm-> pslld(xmm_temp9, 25); // packed right shift shifting << 25
5348 __masm-> pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
5349 __masm-> pxor(xmm_temp7, xmm_temp9);
5350 __masm-> movdqu(xmm_temp8, xmm_temp7);
5351 __masm-> pslldq(xmm_temp7, 12);
5352 __masm-> psrldq(xmm_temp8, 4);
5353 __masm-> pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
5354
5355 //
5356 // Second phase of the reduction
5357 //
5358 // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
5359 // shift operations.
5360 __masm-> movdqu(xmm_temp2, xmm_temp3);
5361 __masm-> movdqu(xmm_temp4, xmm_temp3);
5362 __masm-> movdqu(xmm_temp5, xmm_temp3);
5363 __masm-> psrld(xmm_temp2, 1); // packed left shifting >> 1
5364 __masm-> psrld(xmm_temp4, 2); // packed left shifting >> 2
5365 __masm-> psrld(xmm_temp5, 7); // packed left shifting >> 7
5366 __masm-> pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
5367 __masm-> pxor(xmm_temp2, xmm_temp5);
5368 __masm-> pxor(xmm_temp2, xmm_temp8);
5369 __masm-> pxor(xmm_temp3, xmm_temp2);
5370 __masm-> pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
5371
5372 __masm-> decrement(blocks);
5373 __masm-> jcc(Assembler::zero, L_exit);
5374 __masm-> movdqu(xmm_temp0, xmm_temp6);
5375 __masm-> addptr(data, 16);
5376 __masm-> jmp(L_ghash_loop);
5377
5378 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
5379 __masm-> pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
5380 __masm-> movdqu(Address(state, 0), xmm_temp6); // store the result
5381 __masm-> leave();
5382 __masm-> ret(0);
5383 return start;
5384 }
5385
5386 address base64_shuffle_addr()
5387 {
5388 __masm-> align64();
5389 StubCodeMark mark(this, "StubRoutines", "shuffle_base64");
5390 address start = __masm-> pc();
5391 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5392, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5392 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5392, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5393 __masm-> emit_data64(0x0405030401020001, relocInfo::none);
5394 __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none);
5395 __masm-> emit_data64(0x10110f100d0e0c0d, relocInfo::none);
5396 __masm-> emit_data64(0x1617151613141213, relocInfo::none);
5397 __masm-> emit_data64(0x1c1d1b1c191a1819, relocInfo::none);
5398 __masm-> emit_data64(0x222321221f201e1f, relocInfo::none);
5399 __masm-> emit_data64(0x2829272825262425, relocInfo::none);
5400 __masm-> emit_data64(0x2e2f2d2e2b2c2a2b, relocInfo::none);
5401 return start;
5402 }
5403
5404 address base64_avx2_shuffle_addr()
5405 {
5406 __masm-> align32();
5407 StubCodeMark mark(this, "StubRoutines", "avx2_shuffle_base64");
5408 address start = __masm-> pc();
5409 __masm-> emit_data64(0x0809070805060405, relocInfo::none);
5410 __masm-> emit_data64(0x0e0f0d0e0b0c0a0b, relocInfo::none);
5411 __masm-> emit_data64(0x0405030401020001, relocInfo::none);
5412 __masm-> emit_data64(0x0a0b090a07080607, relocInfo::none);
5413 return start;
5414 }
5415
5416 address base64_avx2_input_mask_addr()
5417 {
5418 __masm-> align32();
5419 StubCodeMark mark(this, "StubRoutines", "avx2_input_mask_base64");
5420 address start = __masm-> pc();
5421 __masm-> emit_data64(0x8000000000000000, relocInfo::none);
5422 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5423 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5424 __masm-> emit_data64(0x8000000080000000, relocInfo::none);
5425 return start;
5426 }
5427
5428 address base64_avx2_lut_addr()
5429 {
5430 __masm-> align32();
5431 StubCodeMark mark(this, "StubRoutines", "avx2_lut_base64");
5432 address start = __masm-> pc();
5433 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5434 __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none);
5435 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5436 __masm-> emit_data64(0x0000f0edfcfcfcfc, relocInfo::none);
5437
5438 // URL LUT
5439 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5440 __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none);
5441 __masm-> emit_data64(0xfcfcfcfcfcfc4741, relocInfo::none);
5442 __masm-> emit_data64(0x000020effcfcfcfc, relocInfo::none);
5443 return start;
5444 }
5445
5446 address base64_encoding_table_addr()
5447 {
5448 __masm-> align64();
5449 StubCodeMark mark(this, "StubRoutines", "encoding_table_base64");
5450 address start = __masm-> pc();
5451 assert(((unsigned long long)start & 0x3f) == 0, "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5451, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5452 __masm-> emit_data64(0x4847464544434241, relocInfo::none);
5453 __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none);
5454 __masm-> emit_data64(0x5857565554535251, relocInfo::none);
5455 __masm-> emit_data64(0x6665646362615a59, relocInfo::none);
5456 __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none);
5457 __masm-> emit_data64(0x767574737271706f, relocInfo::none);
5458 __masm-> emit_data64(0x333231307a797877, relocInfo::none);
5459 __masm-> emit_data64(0x2f2b393837363534, relocInfo::none);
5460
5461 // URL table
5462 __masm-> emit_data64(0x4847464544434241, relocInfo::none);
5463 __masm-> emit_data64(0x504f4e4d4c4b4a49, relocInfo::none);
5464 __masm-> emit_data64(0x5857565554535251, relocInfo::none);
5465 __masm-> emit_data64(0x6665646362615a59, relocInfo::none);
5466 __masm-> emit_data64(0x6e6d6c6b6a696867, relocInfo::none);
5467 __masm-> emit_data64(0x767574737271706f, relocInfo::none);
5468 __masm-> emit_data64(0x333231307a797877, relocInfo::none);
5469 __masm-> emit_data64(0x5f2d393837363534, relocInfo::none);
5470 return start;
5471 }
5472
5473 // Code for generating Base64 encoding.
5474 // Intrinsic function prototype in Base64.java:
5475 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp,
5476 // boolean isURL) {
5477 address generate_base64_encodeBlock()
5478 {
5479 __masm-> align(CodeEntryAlignment);
5480 StubCodeMark mark(this, "StubRoutines", "implEncode");
5481 address start = __masm-> pc();
5482 __masm-> enter();
5483
5484 // Save callee-saved registers before using them
5485 __masm-> push(r12);
5486 __masm-> push(r13);
5487 __masm-> push(r14);
5488 __masm-> push(r15);
5489
5490 // arguments
5491 const Register source = c_rarg0; // Source Array
5492 const Register start_offset = c_rarg1; // start offset
5493 const Register end_offset = c_rarg2; // end offset
5494 const Register dest = c_rarg3; // destination array
5495
5496#ifndef _WIN64
5497 const Register dp = c_rarg4; // Position for writing to dest array
5498 const Register isURL = c_rarg5; // Base64 or URL character set
5499#else
5500 const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64
5501 const Address isURL_mem(rbp, 7 * wordSize);
5502 const Register isURL = r10; // pick the volatile windows register
5503 const Register dp = r12;
5504 __masm-> movl(dp, dp_mem);
5505 __masm-> movl(isURL, isURL_mem);
5506#endif
5507
5508 const Register length = r14;
5509 const Register encode_table = r13;
5510 Label L_process3, L_exit, L_processdata, L_vbmiLoop, L_not512, L_32byteLoop;
5511
5512 // calculate length from offsets
5513 __masm-> movl(length, end_offset);
5514 __masm-> subl(length, start_offset);
5515 __masm-> cmpl(length, 0);
5516 __masm-> jcc(Assembler::lessEqual, L_exit);
5517
5518 // Code for 512-bit VBMI encoding. Encodes 48 input bytes into 64
5519 // output bytes. We read 64 input bytes and ignore the last 16, so be
5520 // sure not to read past the end of the input buffer.
5521 if (VM_Version::supports_avx512_vbmi()) {
5522 __masm-> cmpl(length, 64); // Do not overrun input buffer.
5523 __masm-> jcc(Assembler::below, L_not512);
5524
5525 __masm-> shll(isURL, 6); // index into decode table based on isURL
5526 __masm-> lea(encode_table, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr()));
5527 __masm-> addptr(encode_table, isURL);
5528 __masm-> shrl(isURL, 6); // restore isURL
5529
5530 __masm-> mov64(rax, 0x3036242a1016040aull); // Shifts
5531 __masm-> evmovdquq(xmm3, ExternalAddress(StubRoutines::x86::base64_shuffle_addr()), Assembler::AVX_512bit, r15);
5532 __masm-> evmovdquq(xmm2, Address(encode_table, 0), Assembler::AVX_512bit);
5533 __masm-> evpbroadcastq(xmm1, rax, Assembler::AVX_512bit);
5534
5535 __masm-> align32();
5536 __masm-> BIND(L_vbmiLoop)bind(L_vbmiLoop); masm-> block_comment("L_vbmiLoop" ":");
5537
5538 __masm-> vpermb(xmm0, xmm3, Address(source, start_offset), Assembler::AVX_512bit);
5539 __masm-> subl(length, 48);
5540
5541 // Put the input bytes into the proper lanes for writing, then
5542 // encode them.
5543 __masm-> evpmultishiftqb(xmm0, xmm1, xmm0, Assembler::AVX_512bit);
5544 __masm-> vpermb(xmm0, xmm0, xmm2, Assembler::AVX_512bit);
5545
5546 // Write to destination
5547 __masm-> evmovdquq(Address(dest, dp), xmm0, Assembler::AVX_512bit);
5548
5549 __masm-> addptr(dest, 64);
5550 __masm-> addptr(source, 48);
5551 __masm-> cmpl(length, 64);
5552 __masm-> jcc(Assembler::aboveEqual, L_vbmiLoop);
5553
5554 __masm-> vzeroupper();
5555 }
5556
5557 __masm-> BIND(L_not512)bind(L_not512); masm-> block_comment("L_not512" ":");
5558 if (VM_Version::supports_avx2()
5559 && VM_Version::supports_avx512vlbw()) {
5560 /*
5561 ** This AVX2 encoder is based off the paper at:
5562 ** https://dl.acm.org/doi/10.1145/3132709
5563 **
5564 ** We use AVX2 SIMD instructions to encode 24 bytes into 32
5565 ** output bytes.
5566 **
5567 */
5568 // Lengths under 32 bytes are done with scalar routine
5569 __masm-> cmpl(length, 31);
5570 __masm-> jcc(Assembler::belowEqual, L_process3);
5571
5572 // Set up supporting constant table data
5573 __masm-> vmovdqu(xmm9, ExternalAddress(StubRoutines::x86::base64_avx2_shuffle_addr()), rax);
5574 // 6-bit mask for 2nd and 4th (and multiples) 6-bit values
5575 __masm-> movl(rax, 0x0fc0fc00);
5576 __masm-> vmovdqu(xmm1, ExternalAddress(StubRoutines::x86::base64_avx2_input_mask_addr()), rax);
5577 __masm-> evpbroadcastd(xmm8, rax, Assembler::AVX_256bit);
5578
5579 // Multiplication constant for "shifting" right by 6 and 10
5580 // bits
5581 __masm-> movl(rax, 0x04000040);
5582
5583 __masm-> subl(length, 24);
5584 __masm-> evpbroadcastd(xmm7, rax, Assembler::AVX_256bit);
5585
5586 // For the first load, we mask off reading of the first 4
5587 // bytes into the register. This is so we can get 4 3-byte
5588 // chunks into each lane of the register, avoiding having to
5589 // handle end conditions. We then shuffle these bytes into a
5590 // specific order so that manipulation is easier.
5591 //
5592 // The initial read loads the XMM register like this:
5593 //
5594 // Lower 128-bit lane:
5595 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5596 // | XX | XX | XX | XX | A0 | A1 | A2 | B0 | B1 | B2 | C0 | C1
5597 // | C2 | D0 | D1 | D2 |
5598 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5599 //
5600 // Upper 128-bit lane:
5601 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5602 // | E0 | E1 | E2 | F0 | F1 | F2 | G0 | G1 | G2 | H0 | H1 | H2
5603 // | XX | XX | XX | XX |
5604 // +----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+----+
5605 //
5606 // Where A0 is the first input byte, B0 is the fourth, etc.
5607 // The alphabetical significance denotes the 3 bytes to be
5608 // consumed and encoded into 4 bytes.
5609 //
5610 // We then shuffle the register so each 32-bit word contains
5611 // the sequence:
5612 // A1 A0 A2 A1, B1, B0, B2, B1, etc.
5613 // Each of these byte sequences are then manipulated into 4
5614 // 6-bit values ready for encoding.
5615 //
5616 // If we focus on one set of 3-byte chunks, changing the
5617 // nomenclature such that A0 => a, A1 => b, and A2 => c, we
5618 // shuffle such that each 24-bit chunk contains:
5619 //
5620 // b7 b6 b5 b4 b3 b2 b1 b0 | a7 a6 a5 a4 a3 a2 a1 a0 | c7 c6
5621 // c5 c4 c3 c2 c1 c0 | b7 b6 b5 b4 b3 b2 b1 b0
5622 // Explain this step.
5623 // b3 b2 b1 b0 c5 c4 c3 c2 | c1 c0 d5 d4 d3 d2 d1 d0 | a5 a4
5624 // a3 a2 a1 a0 b5 b4 | b3 b2 b1 b0 c5 c4 c3 c2
5625 //
5626 // W first and off all but bits 4-9 and 16-21 (c5..c0 and
5627 // a5..a0) and shift them using a vector multiplication
5628 // operation (vpmulhuw) which effectively shifts c right by 6
5629 // bits and a right by 10 bits. We similarly mask bits 10-15
5630 // (d5..d0) and 22-27 (b5..b0) and shift them left by 8 and 4
5631 // bits respecively. This is done using vpmullw. We end up
5632 // with 4 6-bit values, thus splitting the 3 input bytes,
5633 // ready for encoding:
5634 // 0 0 d5..d0 0 0 c5..c0 0 0 b5..b0 0 0 a5..a0
5635 //
5636 // For translation, we recognize that there are 5 distinct
5637 // ranges of legal Base64 characters as below:
5638 //
5639 // +-------------+-------------+------------+
5640 // | 6-bit value | ASCII range | offset |
5641 // +-------------+-------------+------------+
5642 // | 0..25 | A..Z | 65 |
5643 // | 26..51 | a..z | 71 |
5644 // | 52..61 | 0..9 | -4 |
5645 // | 62 | + or - | -19 or -17 |
5646 // | 63 | / or _ | -16 or 32 |
5647 // +-------------+-------------+------------+
5648 //
5649 // We note that vpshufb does a parallel lookup in a
5650 // destination register using the lower 4 bits of bytes from a
5651 // source register. If we use a saturated subtraction and
5652 // subtract 51 from each 6-bit value, bytes from [0,51]
5653 // saturate to 0, and [52,63] map to a range of [1,12]. We
5654 // distinguish the [0,25] and [26,51] ranges by assigning a
5655 // value of 13 for all 6-bit values less than 26. We end up
5656 // with:
5657 //
5658 // +-------------+-------------+------------+
5659 // | 6-bit value | Reduced | offset |
5660 // +-------------+-------------+------------+
5661 // | 0..25 | 13 | 65 |
5662 // | 26..51 | 0 | 71 |
5663 // | 52..61 | 0..9 | -4 |
5664 // | 62 | 11 | -19 or -17 |
5665 // | 63 | 12 | -16 or 32 |
5666 // +-------------+-------------+------------+
5667 //
5668 // We then use a final vpshufb to add the appropriate offset,
5669 // translating the bytes.
5670 //
5671 // Load input bytes - only 28 bytes. Mask the first load to
5672 // not load into the full register.
5673 __masm-> vpmaskmovd(xmm1, xmm1, Address(source, start_offset, Address::times_1, -4), Assembler::AVX_256bit);
5674
5675 // Move 3-byte chunks of input (12 bytes) into 16 bytes,
5676 // ordering by:
5677 // 1, 0, 2, 1; 4, 3, 5, 4; etc. This groups 6-bit chunks
5678 // for easy masking
5679 __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit);
5680
5681 __masm-> addl(start_offset, 24);
5682
5683 // Load masking register for first and third (and multiples)
5684 // 6-bit values.
5685 __masm-> movl(rax, 0x003f03f0);
5686 __masm-> evpbroadcastd(xmm6, rax, Assembler::AVX_256bit);
5687 // Multiplication constant for "shifting" left by 4 and 8 bits
5688 __masm-> movl(rax, 0x01000010);
5689 __masm-> evpbroadcastd(xmm5, rax, Assembler::AVX_256bit);
5690
5691 // Isolate 6-bit chunks of interest
5692 __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit);
5693
5694 // Load constants for encoding
5695 __masm-> movl(rax, 0x19191919);
5696 __masm-> evpbroadcastd(xmm3, rax, Assembler::AVX_256bit);
5697 __masm-> movl(rax, 0x33333333);
5698 __masm-> evpbroadcastd(xmm4, rax, Assembler::AVX_256bit);
5699
5700 // Shift output bytes 0 and 2 into proper lanes
5701 __masm-> vpmulhuw(xmm2, xmm0, xmm7, Assembler::AVX_256bit);
5702
5703 // Mask and shift output bytes 1 and 3 into proper lanes and
5704 // combine
5705 __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit);
5706 __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit);
5707 __masm-> vpor(xmm0, xmm0, xmm2, Assembler::AVX_256bit);
5708
5709 // Find out which are 0..25. This indicates which input
5710 // values fall in the range of 'A'-'Z', which require an
5711 // additional offset (see comments above)
5712 __masm-> vpcmpgtb(xmm2, xmm0, xmm3, Assembler::AVX_256bit);
5713 __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit);
5714 __masm-> vpsubb(xmm1, xmm1, xmm2, Assembler::AVX_256bit);
5715
5716 // Load the proper lookup table
5717 __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_avx2_lut_addr()));
5718 __masm-> movl(r15, isURL);
5719 __masm-> shll(r15, 5);
5720 __masm-> vmovdqu(xmm2, Address(r11, r15));
5721
5722 // Shuffle the offsets based on the range calculation done
5723 // above. This allows us to add the correct offset to the
5724 // 6-bit value corresponding to the range documented above.
5725 __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit);
5726 __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit);
5727
5728 // Store the encoded bytes
5729 __masm-> vmovdqu(Address(dest, dp), xmm0);
5730 __masm-> addl(dp, 32);
5731
5732 __masm-> cmpl(length, 31);
5733 __masm-> jcc(Assembler::belowEqual, L_process3);
5734
5735 __masm-> align32();
5736 __masm-> BIND(L_32byteLoop)bind(L_32byteLoop); masm-> block_comment("L_32byteLoop" ":"
)
;
5737
5738 // Get next 32 bytes
5739 __masm-> vmovdqu(xmm1, Address(source, start_offset, Address::times_1, -4));
5740
5741 __masm-> subl(length, 24);
5742 __masm-> addl(start_offset, 24);
5743
5744 // This logic is identical to the above, with only constant
5745 // register loads removed. Shuffle the input, mask off 6-bit
5746 // chunks, shift them into place, then add the offset to
5747 // encode.
5748 __masm-> vpshufb(xmm1, xmm1, xmm9, Assembler::AVX_256bit);
5749
5750 __masm-> vpand(xmm0, xmm8, xmm1, Assembler::AVX_256bit);
5751 __masm-> vpmulhuw(xmm10, xmm0, xmm7, Assembler::AVX_256bit);
5752 __masm-> vpand(xmm0, xmm6, xmm1, Assembler::AVX_256bit);
5753 __masm-> vpmullw(xmm0, xmm5, xmm0, Assembler::AVX_256bit);
5754 __masm-> vpor(xmm0, xmm0, xmm10, Assembler::AVX_256bit);
5755 __masm-> vpcmpgtb(xmm10, xmm0, xmm3, Assembler::AVX_256bit);
5756 __masm-> vpsubusb(xmm1, xmm0, xmm4, Assembler::AVX_256bit);
5757 __masm-> vpsubb(xmm1, xmm1, xmm10, Assembler::AVX_256bit);
5758 __masm-> vpshufb(xmm1, xmm2, xmm1, Assembler::AVX_256bit);
5759 __masm-> vpaddb(xmm0, xmm1, xmm0, Assembler::AVX_256bit);
5760
5761 // Store the encoded bytes
5762 __masm-> vmovdqu(Address(dest, dp), xmm0);
5763 __masm-> addl(dp, 32);
5764
5765 __masm-> cmpl(length, 31);
5766 __masm-> jcc(Assembler::above, L_32byteLoop);
5767
5768 __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":");
5769 __masm-> vzeroupper();
5770 } else {
5771 __masm-> BIND(L_process3)bind(L_process3); masm-> block_comment("L_process3" ":");
5772 }
5773
5774 __masm-> cmpl(length, 3);
5775 __masm-> jcc(Assembler::below, L_exit);
5776
5777 // Load the encoding table based on isURL
5778 __masm-> lea(r11, ExternalAddress(StubRoutines::x86::base64_encoding_table_addr()));
5779 __masm-> movl(r15, isURL);
5780 __masm-> shll(r15, 6);
5781 __masm-> addptr(r11, r15);
5782
5783 __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":"
)
;
5784
5785 // Load 3 bytes
5786 __masm-> load_unsigned_byte(r15, Address(source, start_offset));
5787 __masm-> load_unsigned_byte(r10, Address(source, start_offset, Address::times_1, 1));
5788 __masm-> load_unsigned_byte(r13, Address(source, start_offset, Address::times_1, 2));
5789
5790 // Build a 32-bit word with bytes 1, 2, 0, 1
5791 __masm-> movl(rax, r10);
5792 __masm-> shll(r10, 24);
5793 __masm-> orl(rax, r10);
5794
5795 __masm-> subl(length, 3);
5796
5797 __masm-> shll(r15, 8);
5798 __masm-> shll(r13, 16);
5799 __masm-> orl(rax, r15);
5800
5801 __masm-> addl(start_offset, 3);
5802
5803 __masm-> orl(rax, r13);
5804 // At this point, rax contains | byte1 | byte2 | byte0 | byte1
5805 // r13 has byte2 << 16 - need low-order 6 bits to translate.
5806 // This translated byte is the fourth output byte.
5807 __masm-> shrl(r13, 16);
5808 __masm-> andl(r13, 0x3f);
5809
5810 // The high-order 6 bits of r15 (byte0) is translated.
5811 // The translated byte is the first output byte.
5812 __masm-> shrl(r15, 10);
5813
5814 __masm-> load_unsigned_byte(r13, Address(r11, r13));
5815 __masm-> load_unsigned_byte(r15, Address(r11, r15));
5816
5817 __masm-> movb(Address(dest, dp, Address::times_1, 3), r13);
5818
5819 // Extract high-order 4 bits of byte1 and low-order 2 bits of byte0.
5820 // This translated byte is the second output byte.
5821 __masm-> shrl(rax, 4);
5822 __masm-> movl(r10, rax);
5823 __masm-> andl(rax, 0x3f);
5824
5825 __masm-> movb(Address(dest, dp, Address::times_1, 0), r15);
5826
5827 __masm-> load_unsigned_byte(rax, Address(r11, rax));
5828
5829 // Extract low-order 2 bits of byte1 and high-order 4 bits of byte2.
5830 // This translated byte is the third output byte.
5831 __masm-> shrl(r10, 18);
5832 __masm-> andl(r10, 0x3f);
5833
5834 __masm-> load_unsigned_byte(r10, Address(r11, r10));
5835
5836 __masm-> movb(Address(dest, dp, Address::times_1, 1), rax);
5837 __masm-> movb(Address(dest, dp, Address::times_1, 2), r10);
5838
5839 __masm-> addl(dp, 4);
5840 __masm-> cmpl(length, 3);
5841 __masm-> jcc(Assembler::aboveEqual, L_processdata);
5842
5843 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
5844 __masm-> pop(r15);
5845 __masm-> pop(r14);
5846 __masm-> pop(r13);
5847 __masm-> pop(r12);
5848 __masm-> leave();
5849 __masm-> ret(0);
5850 return start;
5851 }
5852
5853 // base64 AVX512vbmi tables
5854 address base64_vbmi_lookup_lo_addr() {
5855 __masm-> align64();
5856 StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64");
5857 address start = __masm-> pc();
5858 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5859, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5859 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5859, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5860 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5861 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5862 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5863 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5864 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5865 __masm-> emit_data64(0x3f8080803e808080, relocInfo::none);
5866 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5867 __masm-> emit_data64(0x8080808080803d3c, relocInfo::none);
5868 return start;
5869 }
5870
5871 address base64_vbmi_lookup_hi_addr() {
5872 __masm-> align64();
5873 StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64");
5874 address start = __masm-> pc();
5875 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5876, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5876 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5876, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5877 __masm-> emit_data64(0x0605040302010080, relocInfo::none);
5878 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
5879 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
5880 __masm-> emit_data64(0x8080808080191817, relocInfo::none);
5881 __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none);
5882 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
5883 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
5884 __masm-> emit_data64(0x8080808080333231, relocInfo::none);
5885 return start;
5886 }
5887 address base64_vbmi_lookup_lo_url_addr() {
5888 __masm-> align64();
5889 StubCodeMark mark(this, "StubRoutines", "lookup_lo_base64url");
5890 address start = __masm-> pc();
5891 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5892, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5892 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5892, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5893 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5894 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5895 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5896 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5897 __masm-> emit_data64(0x8080808080808080, relocInfo::none);
5898 __masm-> emit_data64(0x80803e8080808080, relocInfo::none);
5899 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5900 __masm-> emit_data64(0x8080808080803d3c, relocInfo::none);
5901 return start;
5902 }
5903
5904 address base64_vbmi_lookup_hi_url_addr() {
5905 __masm-> align64();
5906 StubCodeMark mark(this, "StubRoutines", "lookup_hi_base64url");
5907 address start = __masm-> pc();
5908 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5909, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5909 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5909, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5910 __masm-> emit_data64(0x0605040302010080, relocInfo::none);
5911 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
5912 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
5913 __masm-> emit_data64(0x3f80808080191817, relocInfo::none);
5914 __masm-> emit_data64(0x201f1e1d1c1b1a80, relocInfo::none);
5915 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
5916 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
5917 __masm-> emit_data64(0x8080808080333231, relocInfo::none);
5918 return start;
5919 }
5920
5921 address base64_vbmi_pack_vec_addr() {
5922 __masm-> align64();
5923 StubCodeMark mark(this, "StubRoutines", "pack_vec_base64");
5924 address start = __masm-> pc();
5925 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5926, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5926 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5926, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5927 __masm-> emit_data64(0x090a040506000102, relocInfo::none);
5928 __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none);
5929 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5930 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5931 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5932 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5933 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
5934 __masm-> emit_data64(0x0000000000000000, relocInfo::none);
5935 return start;
5936 }
5937
5938 address base64_vbmi_join_0_1_addr() {
5939 __masm-> align64();
5940 StubCodeMark mark(this, "StubRoutines", "join_0_1_base64");
5941 address start = __masm-> pc();
5942 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5943, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5943 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5943, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5944 __masm-> emit_data64(0x090a040506000102, relocInfo::none);
5945 __masm-> emit_data64(0x161011120c0d0e08, relocInfo::none);
5946 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5947 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5948 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5949 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5950 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5951 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5952 return start;
5953 }
5954
5955 address base64_vbmi_join_1_2_addr() {
5956 __masm-> align64();
5957 StubCodeMark mark(this, "StubRoutines", "join_1_2_base64");
5958 address start = __masm-> pc();
5959 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5960, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5960 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5960, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5961 __masm-> emit_data64(0x1c1d1e18191a1415, relocInfo::none);
5962 __masm-> emit_data64(0x292a242526202122, relocInfo::none);
5963 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5964 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5965 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5966 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5967 __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none);
5968 __masm-> emit_data64(0x696a646566606162, relocInfo::none);
5969 return start;
5970 }
5971
5972 address base64_vbmi_join_2_3_addr() {
5973 __masm-> align64();
5974 StubCodeMark mark(this, "StubRoutines", "join_2_3_base64");
5975 address start = __masm-> pc();
5976 assert(((unsigned long long)start & 0x3f) == 0,do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5977, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
5977 "Alignment problem (0x%08llx)", (unsigned long long)start)do { if (!(((unsigned long long)start & 0x3f) == 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 5977, "assert(" "((unsigned long long)start & 0x3f) == 0"
") failed", "Alignment problem (0x%08llx)", (unsigned long long
)start); ::breakpoint(); } } while (0)
;
5978 __masm-> emit_data64(0x363031322c2d2e28, relocInfo::none);
5979 __masm-> emit_data64(0x3c3d3e38393a3435, relocInfo::none);
5980 __masm-> emit_data64(0x494a444546404142, relocInfo::none);
5981 __masm-> emit_data64(0x565051524c4d4e48, relocInfo::none);
5982 __masm-> emit_data64(0x5c5d5e58595a5455, relocInfo::none);
5983 __masm-> emit_data64(0x696a646566606162, relocInfo::none);
5984 __masm-> emit_data64(0x767071726c6d6e68, relocInfo::none);
5985 __masm-> emit_data64(0x7c7d7e78797a7475, relocInfo::none);
5986 return start;
5987 }
5988
5989 address base64_decoding_table_addr() {
5990 StubCodeMark mark(this, "StubRoutines", "decoding_table_base64");
5991 address start = __masm-> pc();
5992 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5993 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5994 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5995 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5996 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
5997 __masm-> emit_data64(0x3fffffff3effffff, relocInfo::none);
5998 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
5999 __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none);
6000 __masm-> emit_data64(0x06050403020100ff, relocInfo::none);
6001 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
6002 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
6003 __masm-> emit_data64(0xffffffffff191817, relocInfo::none);
6004 __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none);
6005 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
6006 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
6007 __masm-> emit_data64(0xffffffffff333231, relocInfo::none);
6008 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6009 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6010 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6011 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6012 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6013 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6014 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6015 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6016 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6017 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6018 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6019 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6020 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6021 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6022 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6023 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6024
6025 // URL table
6026 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6027 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6028 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6029 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6030 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6031 __masm-> emit_data64(0xffff3effffffffff, relocInfo::none);
6032 __masm-> emit_data64(0x3b3a393837363534, relocInfo::none);
6033 __masm-> emit_data64(0xffffffffffff3d3c, relocInfo::none);
6034 __masm-> emit_data64(0x06050403020100ff, relocInfo::none);
6035 __masm-> emit_data64(0x0e0d0c0b0a090807, relocInfo::none);
6036 __masm-> emit_data64(0x161514131211100f, relocInfo::none);
6037 __masm-> emit_data64(0x3fffffffff191817, relocInfo::none);
6038 __masm-> emit_data64(0x201f1e1d1c1b1aff, relocInfo::none);
6039 __masm-> emit_data64(0x2827262524232221, relocInfo::none);
6040 __masm-> emit_data64(0x302f2e2d2c2b2a29, relocInfo::none);
6041 __masm-> emit_data64(0xffffffffff333231, relocInfo::none);
6042 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6043 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6044 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6045 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6046 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6047 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6048 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6049 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6050 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6051 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6052 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6053 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6054 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6055 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6056 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6057 __masm-> emit_data64(0xffffffffffffffff, relocInfo::none);
6058 return start;
6059 }
6060
6061
6062// Code for generating Base64 decoding.
6063//
6064// Based on the article (and associated code) from https://arxiv.org/abs/1910.05109.
6065//
6066// Intrinsic function prototype in Base64.java:
6067// private void decodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL, isMIME) {
6068 address generate_base64_decodeBlock() {
6069 __masm-> align(CodeEntryAlignment);
6070 StubCodeMark mark(this, "StubRoutines", "implDecode");
6071 address start = __masm-> pc();
6072 __masm-> enter();
6073
6074 // Save callee-saved registers before using them
6075 __masm-> push(r12);
6076 __masm-> push(r13);
6077 __masm-> push(r14);
6078 __masm-> push(r15);
6079 __masm-> push(rbx);
6080
6081 // arguments
6082 const Register source = c_rarg0; // Source Array
6083 const Register start_offset = c_rarg1; // start offset
6084 const Register end_offset = c_rarg2; // end offset
6085 const Register dest = c_rarg3; // destination array
6086 const Register isMIME = rbx;
6087
6088#ifndef _WIN64
6089 const Register dp = c_rarg4; // Position for writing to dest array
6090 const Register isURL = c_rarg5;// Base64 or URL character set
6091 __masm-> movl(isMIME, Address(rbp, 2 * wordSize));
6092#else
6093 const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64
6094 const Address isURL_mem(rbp, 7 * wordSize);
6095 const Register isURL = r10; // pick the volatile windows register
6096 const Register dp = r12;
6097 __masm-> movl(dp, dp_mem);
6098 __masm-> movl(isURL, isURL_mem);
6099 __masm-> movl(isMIME, Address(rbp, 8 * wordSize));
6100#endif
6101
6102 const XMMRegister lookup_lo = xmm5;
6103 const XMMRegister lookup_hi = xmm6;
6104 const XMMRegister errorvec = xmm7;
6105 const XMMRegister pack16_op = xmm9;
6106 const XMMRegister pack32_op = xmm8;
6107 const XMMRegister input0 = xmm3;
6108 const XMMRegister input1 = xmm20;
6109 const XMMRegister input2 = xmm21;
6110 const XMMRegister input3 = xmm19;
6111 const XMMRegister join01 = xmm12;
6112 const XMMRegister join12 = xmm11;
6113 const XMMRegister join23 = xmm10;
6114 const XMMRegister translated0 = xmm2;
6115 const XMMRegister translated1 = xmm1;
6116 const XMMRegister translated2 = xmm0;
1
'translated2' initialized to a null pointer value
6117 const XMMRegister translated3 = xmm4;
6118
6119 const XMMRegister merged0 = xmm2;
6120 const XMMRegister merged1 = xmm1;
6121 const XMMRegister merged2 = xmm0;
6122 const XMMRegister merged3 = xmm4;
6123 const XMMRegister merge_ab_bc0 = xmm2;
6124 const XMMRegister merge_ab_bc1 = xmm1;
6125 const XMMRegister merge_ab_bc2 = xmm0;
6126 const XMMRegister merge_ab_bc3 = xmm4;
6127
6128 const XMMRegister pack24bits = xmm4;
6129
6130 const Register length = r14;
6131 const Register output_size = r13;
6132 const Register output_mask = r15;
6133 const KRegister input_mask = k1;
6134
6135 const XMMRegister input_initial_valid_b64 = xmm0;
6136 const XMMRegister tmp = xmm10;
6137 const XMMRegister mask = xmm0;
6138 const XMMRegister invalid_b64 = xmm1;
6139
6140 Label L_process256, L_process64, L_process64Loop, L_exit, L_processdata, L_loadURL;
6141 Label L_continue, L_finalBit, L_padding, L_donePadding, L_bruteForce;
6142 Label L_forceLoop, L_bottomLoop, L_checkMIME, L_exit_no_vzero;
6143
6144 // calculate length from offsets
6145 __masm-> movl(length, end_offset);
6146 __masm-> subl(length, start_offset);
6147 __masm-> push(dest); // Save for return value calc
6148
6149 // If AVX512 VBMI not supported, just compile non-AVX code
6150 if(VM_Version::supports_avx512_vbmi() &&
2
Calling 'VM_Version::supports_avx512_vbmi'
5
Returning from 'VM_Version::supports_avx512_vbmi'
10
Taking true branch
6151 VM_Version::supports_avx512bw()) {
6
Calling 'VM_Version::supports_avx512bw'
9
Returning from 'VM_Version::supports_avx512bw'
6152 __masm-> cmpl(length, 128); // 128-bytes is break-even for AVX-512
6153 __masm-> jcc(Assembler::lessEqual, L_bruteForce);
6154
6155 __masm-> cmpl(isMIME, 0);
6156 __masm-> jcc(Assembler::notEqual, L_bruteForce);
6157
6158 // Load lookup tables based on isURL
6159 __masm-> cmpl(isURL, 0);
6160 __masm-> jcc(Assembler::notZero, L_loadURL);
6161
6162 __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_addr()), Assembler::AVX_512bit, r13);
6163 __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_addr()), Assembler::AVX_512bit, r13);
6164
6165 __masm-> BIND(L_continue)bind(L_continue); masm-> block_comment("L_continue" ":");
6166
6167 __masm-> movl(r15, 0x01400140);
6168 __masm-> evpbroadcastd(pack16_op, r15, Assembler::AVX_512bit);
6169
6170 __masm-> movl(r15, 0x00011000);
6171 __masm-> evpbroadcastd(pack32_op, r15, Assembler::AVX_512bit);
6172
6173 __masm-> cmpl(length, 0xff);
6174 __masm-> jcc(Assembler::lessEqual, L_process64);
6175
6176 // load masks required for decoding data
6177 __masm-> BIND(L_processdata)bind(L_processdata); masm-> block_comment("L_processdata" ":"
)
;
6178 __masm-> evmovdquq(join01, ExternalAddress(StubRoutines::x86::base64_vbmi_join_0_1_addr()), Assembler::AVX_512bit,r13);
6179 __masm-> evmovdquq(join12, ExternalAddress(StubRoutines::x86::base64_vbmi_join_1_2_addr()), Assembler::AVX_512bit, r13);
6180 __masm-> evmovdquq(join23, ExternalAddress(StubRoutines::x86::base64_vbmi_join_2_3_addr()), Assembler::AVX_512bit, r13);
6181
6182 __masm-> align32();
6183 __masm-> BIND(L_process256)bind(L_process256); masm-> block_comment("L_process256" ":"
)
;
6184 // Grab input data
6185 __masm-> evmovdquq(input0, Address(source, start_offset, Address::times_1, 0x00), Assembler::AVX_512bit);
6186 __masm-> evmovdquq(input1, Address(source, start_offset, Address::times_1, 0x40), Assembler::AVX_512bit);
6187 __masm-> evmovdquq(input2, Address(source, start_offset, Address::times_1, 0x80), Assembler::AVX_512bit);
6188 __masm-> evmovdquq(input3, Address(source, start_offset, Address::times_1, 0xc0), Assembler::AVX_512bit);
6189
6190 // Copy the low part of the lookup table into the destination of the permutation
6191 __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit);
6192 __masm-> evmovdquq(translated1, lookup_lo, Assembler::AVX_512bit);
6193 __masm-> evmovdquq(translated2, lookup_lo, Assembler::AVX_512bit);
11
Passing null pointer value via 1st parameter 'dst'
12
Calling 'MacroAssembler::evmovdquq'
6194 __masm-> evmovdquq(translated3, lookup_lo, Assembler::AVX_512bit);
6195
6196 // Translate the base64 input into "decoded" bytes
6197 __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit);
6198 __masm-> evpermt2b(translated1, input1, lookup_hi, Assembler::AVX_512bit);
6199 __masm-> evpermt2b(translated2, input2, lookup_hi, Assembler::AVX_512bit);
6200 __masm-> evpermt2b(translated3, input3, lookup_hi, Assembler::AVX_512bit);
6201
6202 // OR all of the translations together to check for errors (high-order bit of byte set)
6203 __masm-> vpternlogd(input0, 0xfe, input1, input2, Assembler::AVX_512bit);
6204
6205 __masm-> vpternlogd(input3, 0xfe, translated0, translated1, Assembler::AVX_512bit);
6206 __masm-> vpternlogd(input0, 0xfe, translated2, translated3, Assembler::AVX_512bit);
6207 __masm-> vpor(errorvec, input3, input0, Assembler::AVX_512bit);
6208
6209 // Check if there was an error - if so, try 64-byte chunks
6210 __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit);
6211 __masm-> kortestql(k3, k3);
6212 __masm-> jcc(Assembler::notZero, L_process64);
6213
6214 // The merging and shuffling happens here
6215 // We multiply each byte pair [00dddddd | 00cccccc | 00bbbbbb | 00aaaaaa]
6216 // Multiply [00cccccc] by 2^6 added to [00dddddd] to get [0000cccc | ccdddddd]
6217 // The pack16_op is a vector of 0x01400140, so multiply D by 1 and C by 0x40
6218 __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit);
6219 __masm-> vpmaddubsw(merge_ab_bc1, translated1, pack16_op, Assembler::AVX_512bit);
6220 __masm-> vpmaddubsw(merge_ab_bc2, translated2, pack16_op, Assembler::AVX_512bit);
6221 __masm-> vpmaddubsw(merge_ab_bc3, translated3, pack16_op, Assembler::AVX_512bit);
6222
6223 // Now do the same with packed 16-bit values.
6224 // We start with [0000cccc | ccdddddd | 0000aaaa | aabbbbbb]
6225 // pack32_op is 0x00011000 (2^12, 1), so this multiplies [0000aaaa | aabbbbbb] by 2^12
6226 // and adds [0000cccc | ccdddddd] to yield [00000000 | aaaaaabb | bbbbcccc | ccdddddd]
6227 __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit);
6228 __masm-> vpmaddwd(merged1, merge_ab_bc1, pack32_op, Assembler::AVX_512bit);
6229 __masm-> vpmaddwd(merged2, merge_ab_bc2, pack32_op, Assembler::AVX_512bit);
6230 __masm-> vpmaddwd(merged3, merge_ab_bc3, pack32_op, Assembler::AVX_512bit);
6231
6232 // The join vectors specify which byte from which vector goes into the outputs
6233 // One of every 4 bytes in the extended vector is zero, so we pack them into their
6234 // final positions in the register for storing (256 bytes in, 192 bytes out)
6235 __masm-> evpermt2b(merged0, join01, merged1, Assembler::AVX_512bit);
6236 __masm-> evpermt2b(merged1, join12, merged2, Assembler::AVX_512bit);
6237 __masm-> evpermt2b(merged2, join23, merged3, Assembler::AVX_512bit);
6238
6239 // Store result
6240 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x00), merged0, Assembler::AVX_512bit);
6241 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x40), merged1, Assembler::AVX_512bit);
6242 __masm-> evmovdquq(Address(dest, dp, Address::times_1, 0x80), merged2, Assembler::AVX_512bit);
6243
6244 __masm-> addptr(source, 0x100);
6245 __masm-> addptr(dest, 0xc0);
6246 __masm-> subl(length, 0x100);
6247 __masm-> cmpl(length, 64 * 4);
6248 __masm-> jcc(Assembler::greaterEqual, L_process256);
6249
6250 // At this point, we've decoded 64 * 4 * n bytes.
6251 // The remaining length will be <= 64 * 4 - 1.
6252 // UNLESS there was an error decoding the first 256-byte chunk. In this
6253 // case, the length will be arbitrarily long.
6254 //
6255 // Note that this will be the path for MIME-encoded strings.
6256
6257 __masm-> BIND(L_process64)bind(L_process64); masm-> block_comment("L_process64" ":");
6258
6259 __masm-> evmovdquq(pack24bits, ExternalAddress(StubRoutines::x86::base64_vbmi_pack_vec_addr()), Assembler::AVX_512bit, r13);
6260
6261 __masm-> cmpl(length, 63);
6262 __masm-> jcc(Assembler::lessEqual, L_finalBit);
6263
6264 __masm-> mov64(rax, 0x0000ffffffffffff);
6265 __masm-> kmovql(k2, rax);
6266
6267 __masm-> align32();
6268 __masm-> BIND(L_process64Loop)bind(L_process64Loop); masm-> block_comment("L_process64Loop"
":")
;
6269
6270 // Handle first 64-byte block
6271
6272 __masm-> evmovdquq(input0, Address(source, start_offset), Assembler::AVX_512bit);
6273 __masm-> evmovdquq(translated0, lookup_lo, Assembler::AVX_512bit);
6274 __masm-> evpermt2b(translated0, input0, lookup_hi, Assembler::AVX_512bit);
6275
6276 __masm-> vpor(errorvec, translated0, input0, Assembler::AVX_512bit);
6277
6278 // Check for error and bomb out before updating dest
6279 __masm-> evpmovb2m(k3, errorvec, Assembler::AVX_512bit);
6280 __masm-> kortestql(k3, k3);
6281 __masm-> jcc(Assembler::notZero, L_exit);
6282
6283 // Pack output register, selecting correct byte ordering
6284 __masm-> vpmaddubsw(merge_ab_bc0, translated0, pack16_op, Assembler::AVX_512bit);
6285 __masm-> vpmaddwd(merged0, merge_ab_bc0, pack32_op, Assembler::AVX_512bit);
6286 __masm-> vpermb(merged0, pack24bits, merged0, Assembler::AVX_512bit);
6287
6288 __masm-> evmovdqub(Address(dest, dp), k2, merged0, true, Assembler::AVX_512bit);
6289
6290 __masm-> subl(length, 64);
6291 __masm-> addptr(source, 64);
6292 __masm-> addptr(dest, 48);
6293
6294 __masm-> cmpl(length, 64);
6295 __masm-> jcc(Assembler::greaterEqual, L_process64Loop);
6296
6297 __masm-> cmpl(length, 0);
6298 __masm-> jcc(Assembler::lessEqual, L_exit);
6299
6300 __masm-> BIND(L_finalBit)bind(L_finalBit); masm-> block_comment("L_finalBit" ":");
6301 // Now have 1 to 63 bytes left to decode
6302
6303 // I was going to let Java take care of the final fragment
6304 // however it will repeatedly call this routine for every 4 bytes
6305 // of input data, so handle the rest here.
6306 __masm-> movq(rax, -1);
6307 __masm-> bzhiq(rax, rax, length); // Input mask in rax
6308
6309 __masm-> movl(output_size, length);
6310 __masm-> shrl(output_size, 2); // Find (len / 4) * 3 (output length)
6311 __masm-> lea(output_size, Address(output_size, output_size, Address::times_2, 0));
6312 // output_size in r13
6313
6314 // Strip pad characters, if any, and adjust length and mask
6315 __masm-> cmpb(Address(source, length, Address::times_1, -1), '=');
6316 __masm-> jcc(Assembler::equal, L_padding);
6317
6318 __masm-> BIND(L_donePadding)bind(L_donePadding); masm-> block_comment("L_donePadding" ":"
)
;
6319
6320 // Output size is (64 - output_size), output mask is (all 1s >> output_size).
6321 __masm-> kmovql(input_mask, rax);
6322 __masm-> movq(output_mask, -1);
6323 __masm-> bzhiq(output_mask, output_mask, output_size);
6324
6325 // Load initial input with all valid base64 characters. Will be used
6326 // in merging source bytes to avoid masking when determining if an error occurred.
6327 __masm-> movl(rax, 0x61616161);
6328 __masm-> evpbroadcastd(input_initial_valid_b64, rax, Assembler::AVX_512bit);
6329
6330 // A register containing all invalid base64 decoded values
6331 __masm-> movl(rax, 0x80808080);
6332 __masm-> evpbroadcastd(invalid_b64, rax, Assembler::AVX_512bit);
6333
6334 // input_mask is in k1
6335 // output_size is in r13
6336 // output_mask is in r15
6337 // zmm0 - free
6338 // zmm1 - 0x00011000
6339 // zmm2 - 0x01400140
6340 // zmm3 - errorvec
6341 // zmm4 - pack vector
6342 // zmm5 - lookup_lo
6343 // zmm6 - lookup_hi
6344 // zmm7 - errorvec
6345 // zmm8 - 0x61616161
6346 // zmm9 - 0x80808080
6347
6348 // Load only the bytes from source, merging into our "fully-valid" register
6349 __masm-> evmovdqub(input_initial_valid_b64, input_mask, Address(source, start_offset, Address::times_1, 0x0), true, Assembler::AVX_512bit);
6350
6351 // Decode all bytes within our merged input
6352 __masm-> evmovdquq(tmp, lookup_lo, Assembler::AVX_512bit);
6353 __masm-> evpermt2b(tmp, input_initial_valid_b64, lookup_hi, Assembler::AVX_512bit);
6354 __masm-> vporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit);
6355
6356 // Check for error. Compare (decoded | initial) to all invalid.
6357 // If any bytes have their high-order bit set, then we have an error.
6358 __masm-> evptestmb(k2, mask, invalid_b64, Assembler::AVX_512bit);
6359 __masm-> kortestql(k2, k2);
6360
6361 // If we have an error, use the brute force loop to decode what we can (4-byte chunks).
6362 __masm-> jcc(Assembler::notZero, L_bruteForce);
6363
6364 // Shuffle output bytes
6365 __masm-> vpmaddubsw(tmp, tmp, pack16_op, Assembler::AVX_512bit);
6366 __masm-> vpmaddwd(tmp, tmp, pack32_op, Assembler::AVX_512bit);
6367
6368 __masm-> vpermb(tmp, pack24bits, tmp, Assembler::AVX_512bit);
6369 __masm-> kmovql(k1, output_mask);
6370 __masm-> evmovdqub(Address(dest, dp), k1, tmp, true, Assembler::AVX_512bit);
6371
6372 __masm-> addptr(dest, output_size);
6373
6374 __masm-> BIND(L_exit)bind(L_exit); masm-> block_comment("L_exit" ":");
6375 __masm-> vzeroupper();
6376 __masm-> pop(rax); // Get original dest value
6377 __masm-> subptr(dest, rax); // Number of bytes converted
6378 __masm-> movptr(rax, dest);
6379 __masm-> pop(rbx);
6380 __masm-> pop(r15);
6381 __masm-> pop(r14);
6382 __masm-> pop(r13);
6383 __masm-> pop(r12);
6384 __masm-> leave();
6385 __masm-> ret(0);
6386
6387 __masm-> BIND(L_loadURL)bind(L_loadURL); masm-> block_comment("L_loadURL" ":");
6388 __masm-> evmovdquq(lookup_lo, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_lo_url_addr()), Assembler::AVX_512bit, r13);
6389 __masm-> evmovdquq(lookup_hi, ExternalAddress(StubRoutines::x86::base64_vbmi_lookup_hi_url_addr()), Assembler::AVX_512bit, r13);
6390 __masm-> jmp(L_continue);
6391
6392 __masm-> BIND(L_padding)bind(L_padding); masm-> block_comment("L_padding" ":");
6393 __masm-> decrementq(output_size, 1);
6394 __masm-> shrq(rax, 1);
6395
6396 __masm-> cmpb(Address(source, length, Address::times_1, -2), '=');
6397 __masm-> jcc(Assembler::notEqual, L_donePadding);
6398
6399 __masm-> decrementq(output_size, 1);
6400 __masm-> shrq(rax, 1);
6401 __masm-> jmp(L_donePadding);
6402
6403 __masm-> align32();
6404 __masm-> BIND(L_bruteForce)bind(L_bruteForce); masm-> block_comment("L_bruteForce" ":"
)
;
6405 } // End of if(avx512_vbmi)
6406
6407 // Use non-AVX code to decode 4-byte chunks into 3 bytes of output
6408
6409 // Register state (Linux):
6410 // r12-15 - saved on stack
6411 // rdi - src
6412 // rsi - sp
6413 // rdx - sl
6414 // rcx - dst
6415 // r8 - dp
6416 // r9 - isURL
6417
6418 // Register state (Windows):
6419 // r12-15 - saved on stack
6420 // rcx - src
6421 // rdx - sp
6422 // r8 - sl
6423 // r9 - dst
6424 // r12 - dp
6425 // r10 - isURL
6426
6427 // Registers (common):
6428 // length (r14) - bytes in src
6429
6430 const Register decode_table = r11;
6431 const Register out_byte_count = rbx;
6432 const Register byte1 = r13;
6433 const Register byte2 = r15;
6434 const Register byte3 = WINDOWS_ONLY(r8) NOT_WINDOWS(rdx)rdx;
6435 const Register byte4 = WINDOWS_ONLY(r10) NOT_WINDOWS(r9)r9;
6436
6437 __masm-> shrl(length, 2); // Multiple of 4 bytes only - length is # 4-byte chunks
6438 __masm-> cmpl(length, 0);
6439 __masm-> jcc(Assembler::lessEqual, L_exit_no_vzero);
6440
6441 __masm-> shll(isURL, 8); // index into decode table based on isURL
6442 __masm-> lea(decode_table, ExternalAddress(StubRoutines::x86::base64_decoding_table_addr()));
6443 __masm-> addptr(decode_table, isURL);
6444
6445 __masm-> jmp(L_bottomLoop);
6446
6447 __masm-> align32();
6448 __masm-> BIND(L_forceLoop)bind(L_forceLoop); masm-> block_comment("L_forceLoop" ":");
6449 __masm-> shll(byte1, 18);
6450 __masm-> shll(byte2, 12);
6451 __masm-> shll(byte3, 6);
6452 __masm-> orl(byte1, byte2);
6453 __masm-> orl(byte1, byte3);
6454 __masm-> orl(byte1, byte4);
6455
6456 __masm-> addptr(source, 4);
6457
6458 __masm-> movb(Address(dest, dp, Address::times_1, 2), byte1);
6459 __masm-> shrl(byte1, 8);
6460 __masm-> movb(Address(dest, dp, Address::times_1, 1), byte1);
6461 __masm-> shrl(byte1, 8);
6462 __masm-> movb(Address(dest, dp, Address::times_1, 0), byte1);
6463
6464 __masm-> addptr(dest, 3);
6465 __masm-> decrementl(length, 1);
6466 __masm-> jcc(Assembler::zero, L_exit_no_vzero);
6467
6468 __masm-> BIND(L_bottomLoop)bind(L_bottomLoop); masm-> block_comment("L_bottomLoop" ":"
)
;
6469 __masm-> load_unsigned_byte(byte1, Address(source, start_offset, Address::times_1, 0x00));
6470 __masm-> load_unsigned_byte(byte2, Address(source, start_offset, Address::times_1, 0x01));
6471 __masm-> load_signed_byte(byte1, Address(decode_table, byte1));
6472 __masm-> load_signed_byte(byte2, Address(decode_table, byte2));
6473 __masm-> load_unsigned_byte(byte3, Address(source, start_offset, Address::times_1, 0x02));
6474 __masm-> load_unsigned_byte(byte4, Address(source, start_offset, Address::times_1, 0x03));
6475 __masm-> load_signed_byte(byte3, Address(decode_table, byte3));
6476 __masm-> load_signed_byte(byte4, Address(decode_table, byte4));
6477
6478 __masm-> mov(rax, byte1);
6479 __masm-> orl(rax, byte2);
6480 __masm-> orl(rax, byte3);
6481 __masm-> orl(rax, byte4);
6482 __masm-> jcc(Assembler::positive, L_forceLoop);
6483
6484 __masm-> BIND(L_exit_no_vzero)bind(L_exit_no_vzero); masm-> block_comment("L_exit_no_vzero"
":")
;
6485 __masm-> pop(rax); // Get original dest value
6486 __masm-> subptr(dest, rax); // Number of bytes converted
6487 __masm-> movptr(rax, dest);
6488 __masm-> pop(rbx);
6489 __masm-> pop(r15);
6490 __masm-> pop(r14);
6491 __masm-> pop(r13);
6492 __masm-> pop(r12);
6493 __masm-> leave();
6494 __masm-> ret(0);
6495
6496 return start;
6497 }
6498
6499
6500 /**
6501 * Arguments:
6502 *
6503 * Inputs:
6504 * c_rarg0 - int crc
6505 * c_rarg1 - byte* buf
6506 * c_rarg2 - int length
6507 *
6508 * Ouput:
6509 * rax - int crc result
6510 */
6511 address generate_updateBytesCRC32() {
6512 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions")do { if (!(UseCRC32Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6512, "assert(" "UseCRC32Intrinsics" ") failed", "need AVX and CLMUL instructions"
); ::breakpoint(); } } while (0)
;
6513
6514 __masm-> align(CodeEntryAlignment);
6515 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
6516
6517 address start = __masm-> pc();
6518 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6519 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6520 // rscratch1: r10
6521 const Register crc = c_rarg0; // crc
6522 const Register buf = c_rarg1; // source java byte array address
6523 const Register len = c_rarg2; // length
6524 const Register table = c_rarg3; // crc_table address (reuse register)
6525 const Register tmp1 = r11;
6526 const Register tmp2 = r10;
6527 assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax);
6528
6529 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6530 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6531
6532 if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() &&
6533 VM_Version::supports_avx512bw() &&
6534 VM_Version::supports_avx512vl()) {
6535 // The constants used in the CRC32 algorithm requires the 1's compliment of the initial crc value.
6536 // However, the constant table for CRC32-C assumes the original crc value. Account for this
6537 // difference before calling and after returning.
6538 __masm-> lea(table, ExternalAddress(StubRoutines::x86::crc_table_avx512_addr()));
6539 __masm-> notl(crc);
6540 __masm-> kernel_crc32_avx512(crc, buf, len, table, tmp1, tmp2);
6541 __masm-> notl(crc);
6542 } else {
6543 __masm-> kernel_crc32(crc, buf, len, table, tmp1);
6544 }
6545
6546 __masm-> movl(rax, crc);
6547 __masm-> vzeroupper();
6548 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6549 __masm-> ret(0);
6550
6551 return start;
6552 }
6553
6554 /**
6555 * Arguments:
6556 *
6557 * Inputs:
6558 * c_rarg0 - int crc
6559 * c_rarg1 - byte* buf
6560 * c_rarg2 - long length
6561 * c_rarg3 - table_start - optional (present only when doing a library_call,
6562 * not used by x86 algorithm)
6563 *
6564 * Ouput:
6565 * rax - int crc result
6566 */
6567 address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
6568 assert(UseCRC32CIntrinsics, "need SSE4_2")do { if (!(UseCRC32CIntrinsics)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6568, "assert(" "UseCRC32CIntrinsics" ") failed", "need SSE4_2"
); ::breakpoint(); } } while (0)
;
6569 __masm-> align(CodeEntryAlignment);
6570 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
6571 address start = __masm-> pc();
6572 //reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs
6573 //Windows RCX RDX R8 R9 none none XMM0..XMM3
6574 //Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7
6575 const Register crc = c_rarg0; // crc
6576 const Register buf = c_rarg1; // source java byte array address
6577 const Register len = c_rarg2; // length
6578 const Register a = rax;
6579 const Register j = r9;
6580 const Register k = r10;
6581 const Register l = r11;
6582#ifdef _WIN64
6583 const Register y = rdi;
6584 const Register z = rsi;
6585#else
6586 const Register y = rcx;
6587 const Register z = r8;
6588#endif
6589 assert_different_registers(crc, buf, len, a, j, k, l, y, z);
6590
6591 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6592 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6593 if (VM_Version::supports_sse4_1() && VM_Version::supports_avx512_vpclmulqdq() &&
6594 VM_Version::supports_avx512bw() &&
6595 VM_Version::supports_avx512vl()) {
6596 __masm-> lea(j, ExternalAddress(StubRoutines::x86::crc32c_table_avx512_addr()));
6597 __masm-> kernel_crc32_avx512(crc, buf, len, j, l, k);
6598 } else {
6599#ifdef _WIN64
6600 __masm-> push(y);
6601 __masm-> push(z);
6602#endif
6603 __masm-> crc32c_ipl_alg2_alt2(crc, buf, len,
6604 a, j, k,
6605 l, y, z,
6606 c_farg0, c_farg1, c_farg2,
6607 is_pclmulqdq_supported);
6608#ifdef _WIN64
6609 __masm-> pop(z);
6610 __masm-> pop(y);
6611#endif
6612 }
6613 __masm-> movl(rax, crc);
6614 __masm-> vzeroupper();
6615 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6616 __masm-> ret(0);
6617
6618 return start;
6619 }
6620
6621
6622 /***
6623 * Arguments:
6624 *
6625 * Inputs:
6626 * c_rarg0 - int adler
6627 * c_rarg1 - byte* buff
6628 * c_rarg2 - int len
6629 *
6630 * Output:
6631 * rax - int adler result
6632 */
6633
6634 address generate_updateBytesAdler32() {
6635 assert(UseAdler32Intrinsics, "need AVX2")do { if (!(UseAdler32Intrinsics)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6635, "assert(" "UseAdler32Intrinsics" ") failed", "need AVX2"
); ::breakpoint(); } } while (0)
;
6636
6637 __masm-> align(CodeEntryAlignment);
6638 StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
6639
6640 address start = __masm-> pc();
6641
6642 const Register data = r9;
6643 const Register size = r10;
6644
6645 const XMMRegister yshuf0 = xmm6;
6646 const XMMRegister yshuf1 = xmm7;
6647 assert_different_registers(c_rarg0, c_rarg1, c_rarg2, data, size);
6648
6649 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6650 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6651
6652 __masm-> vmovdqu(yshuf0, ExternalAddress((address) StubRoutines::x86::_adler32_shuf0_table), r9);
6653 __masm-> vmovdqu(yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_shuf1_table), r9);
6654 __masm-> movptr(data, c_rarg1); //data
6655 __masm-> movl(size, c_rarg2); //length
6656 __masm-> updateBytesAdler32(c_rarg0, data, size, yshuf0, yshuf1, ExternalAddress((address) StubRoutines::x86::_adler32_ascale_table));
6657 __masm-> leave();
6658 __masm-> ret(0);
6659 return start;
6660 }
6661
6662 /**
6663 * Arguments:
6664 *
6665 * Input:
6666 * c_rarg0 - x address
6667 * c_rarg1 - x length
6668 * c_rarg2 - y address
6669 * c_rarg3 - y length
6670 * not Win64
6671 * c_rarg4 - z address
6672 * c_rarg5 - z length
6673 * Win64
6674 * rsp+40 - z address
6675 * rsp+48 - z length
6676 */
6677 address generate_multiplyToLen() {
6678 __masm-> align(CodeEntryAlignment);
6679 StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
6680
6681 address start = __masm-> pc();
6682 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6683 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6684 const Register x = rdi;
6685 const Register xlen = rax;
6686 const Register y = rsi;
6687 const Register ylen = rcx;
6688 const Register z = r8;
6689 const Register zlen = r11;
6690
6691 // Next registers will be saved on stack in multiply_to_len().
6692 const Register tmp1 = r12;
6693 const Register tmp2 = r13;
6694 const Register tmp3 = r14;
6695 const Register tmp4 = r15;
6696 const Register tmp5 = rbx;
6697
6698 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6699 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6700
6701#ifndef _WIN64
6702 __masm-> movptr(zlen, r9); // Save r9 in r11 - zlen
6703#endif
6704 setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
6705 // ylen => rcx, z => r8, zlen => r11
6706 // r9 and r10 may be used to save non-volatile registers
6707#ifdef _WIN64
6708 // last 2 arguments (#4, #5) are on stack on Win64
6709 __masm-> movptr(z, Address(rsp, 6 * wordSize));
6710 __masm-> movptr(zlen, Address(rsp, 7 * wordSize));
6711#endif
6712
6713 __masm-> movptr(xlen, rsi);
6714 __masm-> movptr(y, rdx);
6715 __masm-> multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
6716
6717 restore_arg_regs();
6718
6719 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6720 __masm-> ret(0);
6721
6722 return start;
6723 }
6724
6725 /**
6726 * Arguments:
6727 *
6728 * Input:
6729 * c_rarg0 - obja address
6730 * c_rarg1 - objb address
6731 * c_rarg3 - length length
6732 * c_rarg4 - scale log2_array_indxscale
6733 *
6734 * Output:
6735 * rax - int >= mismatched index, < 0 bitwise complement of tail
6736 */
6737 address generate_vectorizedMismatch() {
6738 __masm-> align(CodeEntryAlignment);
6739 StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
6740 address start = __masm-> pc();
6741
6742 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6743 __masm-> enter();
6744
6745#ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6746 const Register scale = c_rarg0; //rcx, will exchange with r9
6747 const Register objb = c_rarg1; //rdx
6748 const Register length = c_rarg2; //r8
6749 const Register obja = c_rarg3; //r9
6750 __masm-> xchgq(obja, scale); //now obja and scale contains the correct contents
6751
6752 const Register tmp1 = r10;
6753 const Register tmp2 = r11;
6754#endif
6755#ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6756 const Register obja = c_rarg0; //U:rdi
6757 const Register objb = c_rarg1; //U:rsi
6758 const Register length = c_rarg2; //U:rdx
6759 const Register scale = c_rarg3; //U:rcx
6760 const Register tmp1 = r8;
6761 const Register tmp2 = r9;
6762#endif
6763 const Register result = rax; //return value
6764 const XMMRegister vec0 = xmm0;
6765 const XMMRegister vec1 = xmm1;
6766 const XMMRegister vec2 = xmm2;
6767
6768 __masm-> vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2);
6769
6770 __masm-> vzeroupper();
6771 __masm-> leave();
6772 __masm-> ret(0);
6773
6774 return start;
6775 }
6776
6777/**
6778 * Arguments:
6779 *
6780 // Input:
6781 // c_rarg0 - x address
6782 // c_rarg1 - x length
6783 // c_rarg2 - z address
6784 // c_rarg3 - z lenth
6785 *
6786 */
6787 address generate_squareToLen() {
6788
6789 __masm-> align(CodeEntryAlignment);
6790 StubCodeMark mark(this, "StubRoutines", "squareToLen");
6791
6792 address start = __masm-> pc();
6793 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6794 // Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
6795 const Register x = rdi;
6796 const Register len = rsi;
6797 const Register z = r8;
6798 const Register zlen = rcx;
6799
6800 const Register tmp1 = r12;
6801 const Register tmp2 = r13;
6802 const Register tmp3 = r14;
6803 const Register tmp4 = r15;
6804 const Register tmp5 = rbx;
6805
6806 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6807 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6808
6809 setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
6810 // zlen => rcx
6811 // r9 and r10 may be used to save non-volatile registers
6812 __masm-> movptr(r8, rdx);
6813 __masm-> square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
6814
6815 restore_arg_regs();
6816
6817 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6818 __masm-> ret(0);
6819
6820 return start;
6821 }
6822
6823 address generate_method_entry_barrier() {
6824 __masm-> align(CodeEntryAlignment);
6825 StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier");
6826
6827 Label deoptimize_label;
6828
6829 address start = __masm-> pc();
6830
6831 __masm-> push(-1); // cookie, this is used for writing the new rsp when deoptimizing
6832
6833 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6834 __masm-> enter(); // save rbp
6835
6836 // save c_rarg0, because we want to use that value.
6837 // We could do without it but then we depend on the number of slots used by pusha
6838 __masm-> push(c_rarg0);
6839
6840 __masm-> lea(c_rarg0, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for c_rarg0 - this should be the return address
6841
6842 __masm-> pusha();
6843
6844 // The method may have floats as arguments, and we must spill them before calling
6845 // the VM runtime.
6846 assert(Argument::n_float_register_parameters_j == 8, "Assumption")do { if (!(Argument::n_float_register_parameters_j == 8)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 6846, "assert(" "Argument::n_float_register_parameters_j == 8"
") failed", "Assumption"); ::breakpoint(); } } while (0)
;
6847 const int xmm_size = wordSize * 2;
6848 const int xmm_spill_size = xmm_size * Argument::n_float_register_parameters_j;
6849 __masm-> subptr(rsp, xmm_spill_size);
6850 __masm-> movdqu(Address(rsp, xmm_size * 7), xmm7);
6851 __masm-> movdqu(Address(rsp, xmm_size * 6), xmm6);
6852 __masm-> movdqu(Address(rsp, xmm_size * 5), xmm5);
6853 __masm-> movdqu(Address(rsp, xmm_size * 4), xmm4);
6854 __masm-> movdqu(Address(rsp, xmm_size * 3), xmm3);
6855 __masm-> movdqu(Address(rsp, xmm_size * 2), xmm2);
6856 __masm-> movdqu(Address(rsp, xmm_size * 1), xmm1);
6857 __masm-> movdqu(Address(rsp, xmm_size * 0), xmm0);
6858
6859 __masm-> call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast<int (*)(address*)>(BarrierSetNMethod::nmethod_stub_entry_barrier))((address)((address_word)(static_cast<int (*)(address*)>
(BarrierSetNMethod::nmethod_stub_entry_barrier))))
, 1);
6860
6861 __masm-> movdqu(xmm0, Address(rsp, xmm_size * 0));
6862 __masm-> movdqu(xmm1, Address(rsp, xmm_size * 1));
6863 __masm-> movdqu(xmm2, Address(rsp, xmm_size * 2));
6864 __masm-> movdqu(xmm3, Address(rsp, xmm_size * 3));
6865 __masm-> movdqu(xmm4, Address(rsp, xmm_size * 4));
6866 __masm-> movdqu(xmm5, Address(rsp, xmm_size * 5));
6867 __masm-> movdqu(xmm6, Address(rsp, xmm_size * 6));
6868 __masm-> movdqu(xmm7, Address(rsp, xmm_size * 7));
6869 __masm-> addptr(rsp, xmm_spill_size);
6870
6871 __masm-> cmpl(rax, 1); // 1 means deoptimize
6872 __masm-> jcc(Assembler::equal, deoptimize_label);
6873
6874 __masm-> popa();
6875 __masm-> pop(c_rarg0);
6876
6877 __masm-> leave();
6878
6879 __masm-> addptr(rsp, 1 * wordSize); // cookie
6880 __masm-> ret(0);
6881
6882
6883 __masm-> BIND(deoptimize_label)bind(deoptimize_label); masm-> block_comment("deoptimize_label"
":")
;
6884
6885 __masm-> popa();
6886 __masm-> pop(c_rarg0);
6887
6888 __masm-> leave();
6889
6890 // this can be taken out, but is good for verification purposes. getting a SIGSEGV
6891 // here while still having a correct stack is valuable
6892 __masm-> testptr(rsp, Address(rsp, 0));
6893
6894 __masm-> movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier
6895 __masm-> jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point
6896
6897 return start;
6898 }
6899
6900 /**
6901 * Arguments:
6902 *
6903 * Input:
6904 * c_rarg0 - out address
6905 * c_rarg1 - in address
6906 * c_rarg2 - offset
6907 * c_rarg3 - len
6908 * not Win64
6909 * c_rarg4 - k
6910 * Win64
6911 * rsp+40 - k
6912 */
6913 address generate_mulAdd() {
6914 __masm-> align(CodeEntryAlignment);
6915 StubCodeMark mark(this, "StubRoutines", "mulAdd");
6916
6917 address start = __masm-> pc();
6918 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
6919 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
6920 const Register out = rdi;
6921 const Register in = rsi;
6922 const Register offset = r11;
6923 const Register len = rcx;
6924 const Register k = r8;
6925
6926 // Next registers will be saved on stack in mul_add().
6927 const Register tmp1 = r12;
6928 const Register tmp2 = r13;
6929 const Register tmp3 = r14;
6930 const Register tmp4 = r15;
6931 const Register tmp5 = rbx;
6932
6933 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6934 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6935
6936 setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
6937 // len => rcx, k => r8
6938 // r9 and r10 may be used to save non-volatile registers
6939#ifdef _WIN64
6940 // last argument is on stack on Win64
6941 __masm-> movl(k, Address(rsp, 6 * wordSize));
6942#endif
6943 __masm-> movptr(r11, rdx); // move offset in rdx to offset(r11)
6944 __masm-> mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
6945
6946 restore_arg_regs();
6947
6948 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
6949 __masm-> ret(0);
6950
6951 return start;
6952 }
6953
6954 address generate_bigIntegerRightShift() {
6955 __masm-> align(CodeEntryAlignment);
6956 StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
6957
6958 address start = __masm-> pc();
6959 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
6960 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
6961 const Register newArr = rdi;
6962 const Register oldArr = rsi;
6963 const Register newIdx = rdx;
6964 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
6965 const Register totalNumIter = r8;
6966
6967 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
6968 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
6969 const Register tmp1 = r11; // Caller save.
6970 const Register tmp2 = rax; // Caller save.
6971 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save.
6972 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save.
6973 const Register tmp5 = r14; // Callee save.
6974 const Register tmp6 = r15;
6975
6976 const XMMRegister x0 = xmm0;
6977 const XMMRegister x1 = xmm1;
6978 const XMMRegister x2 = xmm2;
6979
6980 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
6981 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
6982
6983#ifdef _WINDOWS
6984 setup_arg_regs(4);
6985 // For windows, since last argument is on stack, we need to move it to the appropriate register.
6986 __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize));
6987 // Save callee save registers.
6988 __masm-> push(tmp3);
6989 __masm-> push(tmp4);
6990#endif
6991 __masm-> push(tmp5);
6992
6993 // Rename temps used throughout the code.
6994 const Register idx = tmp1;
6995 const Register nIdx = tmp2;
6996
6997 __masm-> xorl(idx, idx);
6998
6999 // Start right shift from end of the array.
7000 // For example, if #iteration = 4 and newIdx = 1
7001 // then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
7002 // if #iteration = 4 and newIdx = 0
7003 // then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
7004 __masm-> movl(idx, totalNumIter);
7005 __masm-> movl(nIdx, idx);
7006 __masm-> addl(nIdx, newIdx);
7007
7008 // If vectorization is enabled, check if the number of iterations is at least 64
7009 // If not, then go to ShifTwo processing 2 iterations
7010 if (VM_Version::supports_avx512_vbmi2()) {
7011 __masm-> cmpptr(totalNumIter, (AVX3Threshold/64));
7012 __masm-> jcc(Assembler::less, ShiftTwo);
7013
7014 if (AVX3Threshold < 16 * 64) {
7015 __masm-> cmpl(totalNumIter, 16);
7016 __masm-> jcc(Assembler::less, ShiftTwo);
7017 }
7018 __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
7019 __masm-> subl(idx, 16);
7020 __masm-> subl(nIdx, 16);
7021 __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":"
)
;
7022 __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
7023 __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
7024 __masm-> vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
7025 __masm-> evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
7026 __masm-> subl(nIdx, 16);
7027 __masm-> subl(idx, 16);
7028 __masm-> jcc(Assembler::greaterEqual, Shift512Loop);
7029 __masm-> addl(idx, 16);
7030 __masm-> addl(nIdx, 16);
7031 }
7032 __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":");
7033 __masm-> cmpl(idx, 2);
7034 __masm-> jcc(Assembler::less, ShiftOne);
7035 __masm-> subl(idx, 2);
7036 __masm-> subl(nIdx, 2);
7037 __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":"
)
;
7038 __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
7039 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
7040 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7041 __masm-> shrdl(tmp5, tmp4);
7042 __masm-> shrdl(tmp4, tmp3);
7043 __masm-> movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
7044 __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4);
7045 __masm-> subl(nIdx, 2);
7046 __masm-> subl(idx, 2);
7047 __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop);
7048 __masm-> addl(idx, 2);
7049 __masm-> addl(nIdx, 2);
7050
7051 // Do the last iteration
7052 __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":");
7053 __masm-> cmpl(idx, 1);
7054 __masm-> jcc(Assembler::less, Exit);
7055 __masm-> subl(idx, 1);
7056 __masm-> subl(nIdx, 1);
7057 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
7058 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7059 __masm-> shrdl(tmp4, tmp3);
7060 __masm-> movl(Address(newArr, nIdx, Address::times_4), tmp4);
7061 __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":");
7062 // Restore callee save registers.
7063 __masm-> pop(tmp5);
7064#ifdef _WINDOWS
7065 __masm-> pop(tmp4);
7066 __masm-> pop(tmp3);
7067 restore_arg_regs();
7068#endif
7069 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7070 __masm-> ret(0);
7071 return start;
7072 }
7073
7074 /**
7075 * Arguments:
7076 *
7077 * Input:
7078 * c_rarg0 - newArr address
7079 * c_rarg1 - oldArr address
7080 * c_rarg2 - newIdx
7081 * c_rarg3 - shiftCount
7082 * not Win64
7083 * c_rarg4 - numIter
7084 * Win64
7085 * rsp40 - numIter
7086 */
7087 address generate_bigIntegerLeftShift() {
7088 __masm-> align(CodeEntryAlignment);
7089 StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
7090 address start = __masm-> pc();
7091 Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
7092 // For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
7093 const Register newArr = rdi;
7094 const Register oldArr = rsi;
7095 const Register newIdx = rdx;
7096 const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
7097 const Register totalNumIter = r8;
7098 // For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
7099 // For everything else, we prefer using r9 and r10 since we do not have to save them before use.
7100 const Register tmp1 = r11; // Caller save.
7101 const Register tmp2 = rax; // Caller save.
7102 const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9)r9; // Windows: Callee save. Linux: Caller save.
7103 const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10)r10; // Windows: Callee save. Linux: Caller save.
7104 const Register tmp5 = r14; // Callee save.
7105
7106 const XMMRegister x0 = xmm0;
7107 const XMMRegister x1 = xmm1;
7108 const XMMRegister x2 = xmm2;
7109 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7110 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7111
7112#ifdef _WINDOWS
7113 setup_arg_regs(4);
7114 // For windows, since last argument is on stack, we need to move it to the appropriate register.
7115 __masm-> movl(totalNumIter, Address(rsp, 6 * wordSize));
7116 // Save callee save registers.
7117 __masm-> push(tmp3);
7118 __masm-> push(tmp4);
7119#endif
7120 __masm-> push(tmp5);
7121
7122 // Rename temps used throughout the code
7123 const Register idx = tmp1;
7124 const Register numIterTmp = tmp2;
7125
7126 // Start idx from zero.
7127 __masm-> xorl(idx, idx);
7128 // Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
7129 __masm-> lea(newArr, Address(newArr, newIdx, Address::times_4));
7130 __masm-> movl(numIterTmp, totalNumIter);
7131
7132 // If vectorization is enabled, check if the number of iterations is at least 64
7133 // If not, then go to ShiftTwo shifting two numbers at a time
7134 if (VM_Version::supports_avx512_vbmi2()) {
7135 __masm-> cmpl(totalNumIter, (AVX3Threshold/64));
7136 __masm-> jcc(Assembler::less, ShiftTwo);
7137
7138 if (AVX3Threshold < 16 * 64) {
7139 __masm-> cmpl(totalNumIter, 16);
7140 __masm-> jcc(Assembler::less, ShiftTwo);
7141 }
7142 __masm-> evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
7143 __masm-> subl(numIterTmp, 16);
7144 __masm-> BIND(Shift512Loop)bind(Shift512Loop); masm-> block_comment("Shift512Loop" ":"
)
;
7145 __masm-> evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
7146 __masm-> evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
7147 __masm-> vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
7148 __masm-> evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
7149 __masm-> addl(idx, 16);
7150 __masm-> subl(numIterTmp, 16);
7151 __masm-> jcc(Assembler::greaterEqual, Shift512Loop);
7152 __masm-> addl(numIterTmp, 16);
7153 }
7154 __masm-> BIND(ShiftTwo)bind(ShiftTwo); masm-> block_comment("ShiftTwo" ":");
7155 __masm-> cmpl(totalNumIter, 1);
7156 __masm-> jcc(Assembler::less, Exit);
7157 __masm-> movl(tmp3, Address(oldArr, idx, Address::times_4));
7158 __masm-> subl(numIterTmp, 2);
7159 __masm-> jcc(Assembler::less, ShiftOne);
7160
7161 __masm-> BIND(ShiftTwoLoop)bind(ShiftTwoLoop); masm-> block_comment("ShiftTwoLoop" ":"
)
;
7162 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
7163 __masm-> movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
7164 __masm-> shldl(tmp3, tmp4);
7165 __masm-> shldl(tmp4, tmp5);
7166 __masm-> movl(Address(newArr, idx, Address::times_4), tmp3);
7167 __masm-> movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
7168 __masm-> movl(tmp3, tmp5);
7169 __masm-> addl(idx, 2);
7170 __masm-> subl(numIterTmp, 2);
7171 __masm-> jcc(Assembler::greaterEqual, ShiftTwoLoop);
7172
7173 // Do the last iteration
7174 __masm-> BIND(ShiftOne)bind(ShiftOne); masm-> block_comment("ShiftOne" ":");
7175 __masm-> addl(numIterTmp, 2);
7176 __masm-> cmpl(numIterTmp, 1);
7177 __masm-> jcc(Assembler::less, Exit);
7178 __masm-> movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
7179 __masm-> shldl(tmp3, tmp4);
7180 __masm-> movl(Address(newArr, idx, Address::times_4), tmp3);
7181
7182 __masm-> BIND(Exit)bind(Exit); masm-> block_comment("Exit" ":");
7183 // Restore callee save registers.
7184 __masm-> pop(tmp5);
7185#ifdef _WINDOWS
7186 __masm-> pop(tmp4);
7187 __masm-> pop(tmp3);
7188 restore_arg_regs();
7189#endif
7190 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7191 __masm-> ret(0);
7192 return start;
7193 }
7194
7195 address generate_libmExp() {
7196 StubCodeMark mark(this, "StubRoutines", "libmExp");
7197
7198 address start = __masm-> pc();
7199
7200 const XMMRegister x0 = xmm0;
7201 const XMMRegister x1 = xmm1;
7202 const XMMRegister x2 = xmm2;
7203 const XMMRegister x3 = xmm3;
7204
7205 const XMMRegister x4 = xmm4;
7206 const XMMRegister x5 = xmm5;
7207 const XMMRegister x6 = xmm6;
7208 const XMMRegister x7 = xmm7;
7209
7210 const Register tmp = r11;
7211
7212 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7213 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7214
7215 __masm-> fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
7216
7217 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7218 __masm-> ret(0);
7219
7220 return start;
7221
7222 }
7223
7224 address generate_libmLog() {
7225 StubCodeMark mark(this, "StubRoutines", "libmLog");
7226
7227 address start = __masm-> pc();
7228
7229 const XMMRegister x0 = xmm0;
7230 const XMMRegister x1 = xmm1;
7231 const XMMRegister x2 = xmm2;
7232 const XMMRegister x3 = xmm3;
7233
7234 const XMMRegister x4 = xmm4;
7235 const XMMRegister x5 = xmm5;
7236 const XMMRegister x6 = xmm6;
7237 const XMMRegister x7 = xmm7;
7238
7239 const Register tmp1 = r11;
7240 const Register tmp2 = r8;
7241
7242 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7243 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7244
7245 __masm-> fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
7246
7247 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7248 __masm-> ret(0);
7249
7250 return start;
7251
7252 }
7253
7254 address generate_libmLog10() {
7255 StubCodeMark mark(this, "StubRoutines", "libmLog10");
7256
7257 address start = __masm-> pc();
7258
7259 const XMMRegister x0 = xmm0;
7260 const XMMRegister x1 = xmm1;
7261 const XMMRegister x2 = xmm2;
7262 const XMMRegister x3 = xmm3;
7263
7264 const XMMRegister x4 = xmm4;
7265 const XMMRegister x5 = xmm5;
7266 const XMMRegister x6 = xmm6;
7267 const XMMRegister x7 = xmm7;
7268
7269 const Register tmp = r11;
7270
7271 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7272 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7273
7274 __masm-> fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
7275
7276 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7277 __masm-> ret(0);
7278
7279 return start;
7280
7281 }
7282
7283 address generate_libmPow() {
7284 StubCodeMark mark(this, "StubRoutines", "libmPow");
7285
7286 address start = __masm-> pc();
7287
7288 const XMMRegister x0 = xmm0;
7289 const XMMRegister x1 = xmm1;
7290 const XMMRegister x2 = xmm2;
7291 const XMMRegister x3 = xmm3;
7292
7293 const XMMRegister x4 = xmm4;
7294 const XMMRegister x5 = xmm5;
7295 const XMMRegister x6 = xmm6;
7296 const XMMRegister x7 = xmm7;
7297
7298 const Register tmp1 = r8;
7299 const Register tmp2 = r9;
7300 const Register tmp3 = r10;
7301 const Register tmp4 = r11;
7302
7303 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7304 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7305
7306 __masm-> fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7307
7308 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7309 __masm-> ret(0);
7310
7311 return start;
7312
7313 }
7314
7315 address generate_libmSin() {
7316 StubCodeMark mark(this, "StubRoutines", "libmSin");
7317
7318 address start = __masm-> pc();
7319
7320 const XMMRegister x0 = xmm0;
7321 const XMMRegister x1 = xmm1;
7322 const XMMRegister x2 = xmm2;
7323 const XMMRegister x3 = xmm3;
7324
7325 const XMMRegister x4 = xmm4;
7326 const XMMRegister x5 = xmm5;
7327 const XMMRegister x6 = xmm6;
7328 const XMMRegister x7 = xmm7;
7329
7330 const Register tmp1 = r8;
7331 const Register tmp2 = r9;
7332 const Register tmp3 = r10;
7333 const Register tmp4 = r11;
7334
7335 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7336 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7337
7338#ifdef _WIN64
7339 __masm-> push(rsi);
7340 __masm-> push(rdi);
7341#endif
7342 __masm-> fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7343
7344#ifdef _WIN64
7345 __masm-> pop(rdi);
7346 __masm-> pop(rsi);
7347#endif
7348
7349 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7350 __masm-> ret(0);
7351
7352 return start;
7353
7354 }
7355
7356 address generate_libmCos() {
7357 StubCodeMark mark(this, "StubRoutines", "libmCos");
7358
7359 address start = __masm-> pc();
7360
7361 const XMMRegister x0 = xmm0;
7362 const XMMRegister x1 = xmm1;
7363 const XMMRegister x2 = xmm2;
7364 const XMMRegister x3 = xmm3;
7365
7366 const XMMRegister x4 = xmm4;
7367 const XMMRegister x5 = xmm5;
7368 const XMMRegister x6 = xmm6;
7369 const XMMRegister x7 = xmm7;
7370
7371 const Register tmp1 = r8;
7372 const Register tmp2 = r9;
7373 const Register tmp3 = r10;
7374 const Register tmp4 = r11;
7375
7376 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7377 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7378
7379#ifdef _WIN64
7380 __masm-> push(rsi);
7381 __masm-> push(rdi);
7382#endif
7383 __masm-> fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7384
7385#ifdef _WIN64
7386 __masm-> pop(rdi);
7387 __masm-> pop(rsi);
7388#endif
7389
7390 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7391 __masm-> ret(0);
7392
7393 return start;
7394
7395 }
7396
7397 address generate_libmTan() {
7398 StubCodeMark mark(this, "StubRoutines", "libmTan");
7399
7400 address start = __masm-> pc();
7401
7402 const XMMRegister x0 = xmm0;
7403 const XMMRegister x1 = xmm1;
7404 const XMMRegister x2 = xmm2;
7405 const XMMRegister x3 = xmm3;
7406
7407 const XMMRegister x4 = xmm4;
7408 const XMMRegister x5 = xmm5;
7409 const XMMRegister x6 = xmm6;
7410 const XMMRegister x7 = xmm7;
7411
7412 const Register tmp1 = r8;
7413 const Register tmp2 = r9;
7414 const Register tmp3 = r10;
7415 const Register tmp4 = r11;
7416
7417 BLOCK_COMMENT("Entry:")masm-> block_comment("Entry:");
7418 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7419
7420#ifdef _WIN64
7421 __masm-> push(rsi);
7422 __masm-> push(rdi);
7423#endif
7424 __masm-> fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
7425
7426#ifdef _WIN64
7427 __masm-> pop(rdi);
7428 __masm-> pop(rsi);
7429#endif
7430
7431 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7432 __masm-> ret(0);
7433
7434 return start;
7435
7436 }
7437
7438#undef __masm->
7439#define __masm-> masm->
7440
7441 // Continuation point for throwing of implicit exceptions that are
7442 // not handled in the current activation. Fabricates an exception
7443 // oop and initiates normal exception dispatching in this
7444 // frame. Since we need to preserve callee-saved values (currently
7445 // only for C2, but done for C1 as well) we need a callee-saved oop
7446 // map and therefore have to make these stubs into RuntimeStubs
7447 // rather than BufferBlobs. If the compiler needs all registers to
7448 // be preserved between the fault point and the exception handler
7449 // then it must assume responsibility for that in
7450 // AbstractCompiler::continuation_for_implicit_null_exception or
7451 // continuation_for_implicit_division_by_zero_exception. All other
7452 // implicit exceptions (e.g., NullPointerException or
7453 // AbstractMethodError on entry) are either at call sites or
7454 // otherwise assume that stack unwinding will be initiated, so
7455 // caller saved registers were assumed volatile in the compiler.
7456 address generate_throw_exception(const char* name,
7457 address runtime_entry,
7458 Register arg1 = noreg,
7459 Register arg2 = noreg) {
7460 // Information about frame layout at time of blocking runtime call.
7461 // Note that we only have to preserve callee-saved registers since
7462 // the compilers are responsible for supplying a continuation point
7463 // if they expect all registers to be preserved.
7464 enum layout {
7465 rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
7466 rbp_off2,
7467 return_off,
7468 return_off2,
7469 framesize // inclusive of return address
7470 };
7471
7472 int insts_size = 512;
7473 int locs_size = 64;
7474
7475 CodeBuffer code(name, insts_size, locs_size);
7476 OopMapSet* oop_maps = new OopMapSet();
7477 MacroAssembler* masm = new MacroAssembler(&code);
7478
7479 address start = __masm-> pc();
7480
7481 // This is an inlined and slightly modified version of call_VM
7482 // which has the ability to fetch the return PC out of
7483 // thread-local storage and also sets up last_Java_sp slightly
7484 // differently than the real call_VM
7485
7486 __masm-> enter(); // required for proper stackwalking of RuntimeStub frame
7487
7488 assert(is_even(framesize/2), "sp not 16-byte aligned")do { if (!(is_even(framesize/2))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 7488, "assert(" "is_even(framesize/2)" ") failed", "sp not 16-byte aligned"
); ::breakpoint(); } } while (0)
;
7489
7490 // return address and rbp are already in place
7491 __masm-> subptr(rsp, (framesize-4) << LogBytesPerInt); // prolog
7492
7493 int frame_complete = __masm-> pc() - start;
7494
7495 // Set up last_Java_sp and last_Java_fp
7496 address the_pc = __masm-> pc();
7497 __masm-> set_last_Java_frame(rsp, rbp, the_pc);
7498 __masm-> andptr(rsp, -(StackAlignmentInBytes)); // Align stack
7499
7500 // Call runtime
7501 if (arg1 != noreg) {
7502 assert(arg2 != c_rarg1, "clobbered")do { if (!(arg2 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp"
, 7502, "assert(" "arg2 != c_rarg1" ") failed", "clobbered");
::breakpoint(); } } while (0)
;
7503 __masm-> movptr(c_rarg1, arg1);
7504 }
7505 if (arg2 != noreg) {
7506 __masm-> movptr(c_rarg2, arg2);
7507 }
7508 __masm-> movptr(c_rarg0, r15_thread);
7509 BLOCK_COMMENT("call runtime_entry")masm-> block_comment("call runtime_entry");
7510 __masm-> call(RuntimeAddress(runtime_entry));
7511
7512 // Generate oop map
7513 OopMap* map = new OopMap(framesize, 0);
7514
7515 oop_maps->add_gc_map(the_pc - start, map);
7516
7517 __masm-> reset_last_Java_frame(true);
7518
7519 __masm-> leave(); // required for proper stackwalking of RuntimeStub frame
7520
7521 // check for pending exceptions
7522#ifdef ASSERT1
7523 Label L;
7524 __masm-> cmpptr(Address(r15_thread, Thread::pending_exception_offset()),
7525 (int32_t) NULL_WORD0L);
7526 __masm-> jcc(Assembler::notEqual, L);
7527 __masm-> should_not_reach_here();
7528 __masm-> bind(L);
7529#endif // ASSERT
7530 __masm-> jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
7531
7532
7533 // codeBlob framesize is in words (not VMRegImpl::slot_size)
7534 RuntimeStub* stub =
7535 RuntimeStub::new_runtime_stub(name,
7536 &code,
7537 frame_complete,
7538 (framesize >> (LogBytesPerWord - LogBytesPerInt)),
7539 oop_maps, false);
7540 return stub->entry_point();
7541 }
7542
7543 void create_control_words() {
7544 // Round to nearest, 64-bit mode, exceptions masked
7545 StubRoutines::x86::_mxcsr_std = 0x1F80;
7546 }
7547
7548 // Initialization
7549 void generate_initial() {
7550 // Generates all stubs and initializes the entry points
7551
7552 // This platform-specific settings are needed by generate_call_stub()
7553 create_control_words();
7554
7555 // entry points that exist in all platforms Note: This is code
7556 // that could be shared among different platforms - however the
7557 // benefit seems to be smaller than the disadvantage of having a
7558 // much more complicated generator structure. See also comment in
7559 // stubRoutines.hpp.
7560
7561 StubRoutines::_forward_exception_entry = generate_forward_exception();
7562
7563 StubRoutines::_call_stub_entry =
7564 generate_call_stub(StubRoutines::_call_stub_return_address);
7565
7566 // is referenced by megamorphic call
7567 StubRoutines::_catch_exception_entry = generate_catch_exception();
7568
7569 // atomic calls
7570 StubRoutines::_fence_entry = generate_orderaccess_fence();
7571
7572 // platform dependent
7573 StubRoutines::x86::_get_previous_sp_entry = generate_get_previous_sp();
7574
7575 StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr();
7576
7577 StubRoutines::x86::_f2i_fixup = generate_f2i_fixup();
7578 StubRoutines::x86::_f2l_fixup = generate_f2l_fixup();
7579 StubRoutines::x86::_d2i_fixup = generate_d2i_fixup();
7580 StubRoutines::x86::_d2l_fixup = generate_d2l_fixup();
7581
7582 StubRoutines::x86::_float_sign_mask = generate_fp_mask("float_sign_mask", 0x7FFFFFFF7FFFFFFF);
7583 StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
7584 StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
7585 StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
7586
7587 // Build this early so it's available for the interpreter.
7588 StubRoutines::_throw_StackOverflowError_entry =
7589 generate_throw_exception("StackOverflowError throw_exception",
7590 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
7591 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
7592 throw_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_StackOverflowError
)))
);
7593 StubRoutines::_throw_delayed_StackOverflowError_entry =
7594 generate_throw_exception("delayed StackOverflowError throw_exception",
7595 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
7596 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
7597 throw_delayed_StackOverflowError)((address)((address_word)(SharedRuntime:: throw_delayed_StackOverflowError
)))
);
7598 if (UseCRC32Intrinsics) {
7599 // set table address before stub generation which use it
7600 StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
7601 StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
7602 }
7603
7604 if (UseCRC32CIntrinsics) {
7605 bool supports_clmul = VM_Version::supports_clmul();
7606 StubRoutines::x86::generate_CRC32C_table(supports_clmul);
7607 StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
7608 StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
7609 }
7610
7611 if (UseAdler32Intrinsics) {
7612 StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
7613 }
7614
7615 if (UseLibmIntrinsic && InlineIntrinsics) {
7616 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) ||
7617 vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) ||
7618 vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
7619 StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
7620 StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
7621 StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
7622 StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
7623 StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
7624 StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
7625 StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
7626 StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
7627 StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
7628 StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
7629 StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
7630 StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
7631 StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
7632 StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
7633 }
7634 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) {
7635 StubRoutines::_dexp = generate_libmExp();
7636 }
7637 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
7638 StubRoutines::_dlog = generate_libmLog();
7639 }
7640 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) {
7641 StubRoutines::_dlog10 = generate_libmLog10();
7642 }
7643 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) {
7644 StubRoutines::_dpow = generate_libmPow();
7645 }
7646 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
7647 StubRoutines::_dsin = generate_libmSin();
7648 }
7649 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
7650 StubRoutines::_dcos = generate_libmCos();
7651 }
7652 if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) {
7653 StubRoutines::_dtan = generate_libmTan();
7654 }
7655 }
7656
7657 // Safefetch stubs.
7658 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
7659 &StubRoutines::_safefetch32_fault_pc,
7660 &StubRoutines::_safefetch32_continuation_pc);
7661 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
7662 &StubRoutines::_safefetchN_fault_pc,
7663 &StubRoutines::_safefetchN_continuation_pc);
7664 }
7665
7666 void generate_all() {
7667 // Generates all stubs and initializes the entry points
7668
7669 // These entry points require SharedInfo::stack0 to be set up in
7670 // non-core builds and need to be relocatable, so they each
7671 // fabricate a RuntimeStub internally.
7672 StubRoutines::_throw_AbstractMethodError_entry =
7673 generate_throw_exception("AbstractMethodError throw_exception",
7674 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
7675 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
7676 throw_AbstractMethodError)((address)((address_word)(SharedRuntime:: throw_AbstractMethodError
)))
);
7677
7678 StubRoutines::_throw_IncompatibleClassChangeError_entry =
7679 generate_throw_exception("IncompatibleClassChangeError throw_exception",
7680 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
7681 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
7682 throw_IncompatibleClassChangeError)((address)((address_word)(SharedRuntime:: throw_IncompatibleClassChangeError
)))
);
7683
7684 StubRoutines::_throw_NullPointerException_at_call_entry =
7685 generate_throw_exception("NullPointerException at call throw_exception",
7686 CAST_FROM_FN_PTR(address,((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
7687 SharedRuntime::((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
7688 throw_NullPointerException_at_call)((address)((address_word)(SharedRuntime:: throw_NullPointerException_at_call
)))
);
7689
7690 // entry points that are platform specific
7691 StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF);
7692 StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
7693 StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
7694 StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
7695 StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
7696 StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x0000000100000001);
7697 StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
7698 StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
7699 StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);
7700 StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff);
7701 StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
7702 0xFFFFFFFF, 0, 0, 0);
7703 StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
7704 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
7705 StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100);
7706 StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask("vector_byte_shuffle_mask");
7707 StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100);
7708 StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
7709 StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
7710 StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
7711
7712 // support for verify_oop (must happen after universe_init)
7713 if (VerifyOops) {
7714 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
7715 }
7716
7717 // data cache line writeback
7718 StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
7719 StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
7720
7721 // arraycopy stubs used by compilers
7722 generate_arraycopy_stubs();
7723
7724 // don't bother generating these AES intrinsic stubs unless global flag is set
7725 if (UseAESIntrinsics) {
7726 StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
7727 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
7728 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
7729 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
7730 if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
7731 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
7732 StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
7733 StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
7734 StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7735 StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
7736 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7737 StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
7738 } else {
7739 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
7740 }
7741 }
7742
7743 if (UseAESCTRIntrinsics) {
7744 if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
7745 if (StubRoutines::x86::_counter_mask_addr == NULL__null) {
7746 StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
7747 }
7748 StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
7749 } else {
7750 StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
7751 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
7752 }
7753 }
7754
7755 if (UseMD5Intrinsics) {
7756 StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
7757 StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
7758 }
7759 if (UseSHA1Intrinsics) {
7760 StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
7761 StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
7762 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
7763 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
7764 }
7765 if (UseSHA256Intrinsics) {
7766 StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
7767 char* dst = (char*)StubRoutines::x86::_k256_W;
7768 char* src = (char*)StubRoutines::x86::_k256;
7769 for (int ii = 0; ii < 16; ++ii) {
7770 memcpy(dst + 32 * ii, src + 16 * ii, 16);
7771 memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
7772 }
7773 StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
7774 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
7775 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
7776 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
7777 }
7778 if (UseSHA512Intrinsics) {
7779 StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W;
7780 StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512();
7781 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
7782 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
7783 }
7784
7785 // Generate GHASH intrinsics code
7786 if (UseGHASHIntrinsics) {
7787 if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL__null) {
7788 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
7789 }
7790 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
7791 if (VM_Version::supports_avx()) {
7792 StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();
7793 StubRoutines::x86::_ghash_poly_addr = ghash_polynomial_addr();
7794 StubRoutines::_ghash_processBlocks = generate_avx_ghash_processBlocks();
7795 } else {
7796 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
7797 }
7798 }
7799
7800
7801 if (UseBASE64Intrinsics) {
7802 if(VM_Version::supports_avx2() &&
7803 VM_Version::supports_avx512bw() &&
7804 VM_Version::supports_avx512vl()) {
7805 StubRoutines::x86::_avx2_shuffle_base64 = base64_avx2_shuffle_addr();
7806 StubRoutines::x86::_avx2_input_mask_base64 = base64_avx2_input_mask_addr();
7807 StubRoutines::x86::_avx2_lut_base64 = base64_avx2_lut_addr();
7808 }
7809 StubRoutines::x86::_encoding_table_base64 = base64_encoding_table_addr();
7810 if (VM_Version::supports_avx512_vbmi()) {
7811 StubRoutines::x86::_shuffle_base64 = base64_shuffle_addr();
7812 StubRoutines::x86::_lookup_lo_base64 = base64_vbmi_lookup_lo_addr();
7813 StubRoutines::x86::_lookup_hi_base64 = base64_vbmi_lookup_hi_addr();
7814 StubRoutines::x86::_lookup_lo_base64url = base64_vbmi_lookup_lo_url_addr();
7815 StubRoutines::x86::_lookup_hi_base64url = base64_vbmi_lookup_hi_url_addr();
7816 StubRoutines::x86::_pack_vec_base64 = base64_vbmi_pack_vec_addr();
7817 StubRoutines::x86::_join_0_1_base64 = base64_vbmi_join_0_1_addr();
7818 StubRoutines::x86::_join_1_2_base64 = base64_vbmi_join_1_2_addr();
7819 StubRoutines::x86::_join_2_3_base64 = base64_vbmi_join_2_3_addr();
7820 }
7821 StubRoutines::x86::_decoding_table_base64 = base64_decoding_table_addr();
7822 StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
7823 StubRoutines::_base64_decodeBlock = generate_base64_decodeBlock();
7824 }
7825
7826 BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
7827 if (bs_nm != NULL__null) {
7828 StubRoutines::x86::_method_entry_barrier = generate_method_entry_barrier();
7829 }
7830#ifdef COMPILER21
7831 if (UseMultiplyToLenIntrinsic) {
7832 StubRoutines::_multiplyToLen = generate_multiplyToLen();
7833 }
7834 if (UseSquareToLenIntrinsic) {
7835 StubRoutines::_squareToLen = generate_squareToLen();
7836 }
7837 if (UseMulAddIntrinsic) {
7838 StubRoutines::_mulAdd = generate_mulAdd();
7839 }
7840 if (VM_Version::supports_avx512_vbmi2()) {
7841 StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
7842 StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
7843 }
7844 if (UseMontgomeryMultiplyIntrinsic) {
7845 StubRoutines::_montgomeryMultiply
7846 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply)((address)((address_word)(SharedRuntime::montgomery_multiply)
))
;
7847 }
7848 if (UseMontgomerySquareIntrinsic) {
7849 StubRoutines::_montgomerySquare
7850 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square)((address)((address_word)(SharedRuntime::montgomery_square)));
7851 }
7852
7853 // Get svml stub routine addresses
7854 void *libjsvml = NULL__null;
7855 char ebuf[1024];
7856 char dll_name[JVM_MAXPATHLEN4096 + 1];
7857 if (os::dll_locate_lib(dll_name, sizeof(dll_name), Arguments::get_dll_dir(), "jsvml")) {
7858 libjsvml = os::dll_load(dll_name, ebuf, sizeof ebuf);
7859 }
7860 if (libjsvml != NULL__null) {
7861 // SVML method naming convention
7862 // All the methods are named as __jsvml_op<T><N>_ha_<VV>
7863 // Where:
7864 // ha stands for high accuracy
7865 // <T> is optional to indicate float/double
7866 // Set to f for vector float operation
7867 // Omitted for vector double operation
7868 // <N> is the number of elements in the vector
7869 // 1, 2, 4, 8, 16
7870 // e.g. 128 bit float vector has 4 float elements
7871 // <VV> indicates the avx/sse level:
7872 // z0 is AVX512, l9 is AVX2, e9 is AVX1 and ex is for SSE2
7873 // e.g. __jsvml_expf16_ha_z0 is the method for computing 16 element vector float exp using AVX 512 insns
7874 // __jsvml_exp8_ha_z0 is the method for computing 8 element vector double exp using AVX 512 insns
7875
7876 log_info(library)(!(LogImpl<(LogTag::_library), (LogTag::__NO_TAG), (LogTag
::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::
__NO_TAG)>::is_level(LogLevel::Info))) ? (void)0 : LogImpl
<(LogTag::_library), (LogTag::__NO_TAG), (LogTag::__NO_TAG
), (LogTag::__NO_TAG), (LogTag::__NO_TAG), (LogTag::__NO_TAG)
>::write<LogLevel::Info>
("Loaded library %s, handle " INTPTR_FORMAT"0x%016" "l" "x", JNI_LIB_PREFIX"lib" "jsvml" JNI_LIB_SUFFIX".so", p2i(libjsvml));
7877 if (UseAVX > 2) {
7878 for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7879 int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7880 if ((!VM_Version::supports_avx512dq()) &&
7881 (vop == VectorSupport::VECTOR_OP_LOG || vop == VectorSupport::VECTOR_OP_LOG10 || vop == VectorSupport::VECTOR_OP_POW)) {
7882 continue;
7883 }
7884 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf16_ha_z0", VectorSupport::svmlname[op]);
7885 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
7886
7887 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s8_ha_z0", VectorSupport::svmlname[op]);
7888 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_512][op] = (address)os::dll_lookup(libjsvml, ebuf);
7889 }
7890 }
7891 const char* avx_sse_str = (UseAVX >= 2) ? "l9" : ((UseAVX == 1) ? "e9" : "ex");
7892 for (int op = 0; op < VectorSupport::NUM_SVML_OP; op++) {
7893 int vop = VectorSupport::VECTOR_OP_SVML_START + op;
7894 if (vop == VectorSupport::VECTOR_OP_POW) {
7895 continue;
7896 }
7897 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7898 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
7899
7900 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7901 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
7902
7903 snprintf(ebuf, sizeof(ebuf), "__jsvml_%sf8_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7904 StubRoutines::_vector_f_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
7905
7906 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s1_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7907 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_64][op] = (address)os::dll_lookup(libjsvml, ebuf);
7908
7909 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s2_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7910 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_128][op] = (address)os::dll_lookup(libjsvml, ebuf);
7911
7912 snprintf(ebuf, sizeof(ebuf), "__jsvml_%s4_ha_%s", VectorSupport::svmlname[op], avx_sse_str);
7913 StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf);
7914 }
7915 }
7916#endif // COMPILER2
7917
7918 if (UseVectorizedMismatchIntrinsic) {
7919 StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
7920 }
7921 }
7922
7923 public:
7924 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
7925 if (all) {
7926 generate_all();
7927 } else {
7928 generate_initial();
7929 }
7930 }
7931}; // end class declaration
7932
7933#define UCM_TABLE_MAX_ENTRIES16 16
7934void StubGenerator_generate(CodeBuffer* code, bool all) {
7935 if (UnsafeCopyMemory::_table == NULL__null) {
7936 UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES16);
7937 }
7938 StubGenerator g(code, all);
7939}

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_VM_VERSION_X86_HPP
26#define CPU_X86_VM_VERSION_X86_HPP
27
28#include "runtime/abstract_vm_version.hpp"
29#include "utilities/macros.hpp"
30#include "utilities/sizes.hpp"
31
32class VM_Version : public Abstract_VM_Version {
33 friend class VMStructs;
34 friend class JVMCIVMStructs;
35
36 public:
37 // cpuid result register layouts. These are all unions of a uint32_t
38 // (in case anyone wants access to the register as a whole) and a bitfield.
39
40 union StdCpuid1Eax {
41 uint32_t value;
42 struct {
43 uint32_t stepping : 4,
44 model : 4,
45 family : 4,
46 proc_type : 2,
47 : 2,
48 ext_model : 4,
49 ext_family : 8,
50 : 4;
51 } bits;
52 };
53
54 union StdCpuid1Ebx { // example, unused
55 uint32_t value;
56 struct {
57 uint32_t brand_id : 8,
58 clflush_size : 8,
59 threads_per_cpu : 8,
60 apic_id : 8;
61 } bits;
62 };
63
64 union StdCpuid1Ecx {
65 uint32_t value;
66 struct {
67 uint32_t sse3 : 1,
68 clmul : 1,
69 : 1,
70 monitor : 1,
71 : 1,
72 vmx : 1,
73 : 1,
74 est : 1,
75 : 1,
76 ssse3 : 1,
77 cid : 1,
78 : 1,
79 fma : 1,
80 cmpxchg16: 1,
81 : 4,
82 dca : 1,
83 sse4_1 : 1,
84 sse4_2 : 1,
85 : 2,
86 popcnt : 1,
87 : 1,
88 aes : 1,
89 : 1,
90 osxsave : 1,
91 avx : 1,
92 : 2,
93 hv : 1;
94 } bits;
95 };
96
97 union StdCpuid1Edx {
98 uint32_t value;
99 struct {
100 uint32_t : 4,
101 tsc : 1,
102 : 3,
103 cmpxchg8 : 1,
104 : 6,
105 cmov : 1,
106 : 3,
107 clflush : 1,
108 : 3,
109 mmx : 1,
110 fxsr : 1,
111 sse : 1,
112 sse2 : 1,
113 : 1,
114 ht : 1,
115 : 3;
116 } bits;
117 };
118
119 union DcpCpuid4Eax {
120 uint32_t value;
121 struct {
122 uint32_t cache_type : 5,
123 : 21,
124 cores_per_cpu : 6;
125 } bits;
126 };
127
128 union DcpCpuid4Ebx {
129 uint32_t value;
130 struct {
131 uint32_t L1_line_size : 12,
132 partitions : 10,
133 associativity : 10;
134 } bits;
135 };
136
137 union TplCpuidBEbx {
138 uint32_t value;
139 struct {
140 uint32_t logical_cpus : 16,
141 : 16;
142 } bits;
143 };
144
145 union ExtCpuid1Ecx {
146 uint32_t value;
147 struct {
148 uint32_t LahfSahf : 1,
149 CmpLegacy : 1,
150 : 3,
151 lzcnt_intel : 1,
152 lzcnt : 1,
153 sse4a : 1,
154 misalignsse : 1,
155 prefetchw : 1,
156 : 22;
157 } bits;
158 };
159
160 union ExtCpuid1Edx {
161 uint32_t value;
162 struct {
163 uint32_t : 22,
164 mmx_amd : 1,
165 mmx : 1,
166 fxsr : 1,
167 : 4,
168 long_mode : 1,
169 tdnow2 : 1,
170 tdnow : 1;
171 } bits;
172 };
173
174 union ExtCpuid5Ex {
175 uint32_t value;
176 struct {
177 uint32_t L1_line_size : 8,
178 L1_tag_lines : 8,
179 L1_assoc : 8,
180 L1_size : 8;
181 } bits;
182 };
183
184 union ExtCpuid7Edx {
185 uint32_t value;
186 struct {
187 uint32_t : 8,
188 tsc_invariance : 1,
189 : 23;
190 } bits;
191 };
192
193 union ExtCpuid8Ecx {
194 uint32_t value;
195 struct {
196 uint32_t cores_per_cpu : 8,
197 : 24;
198 } bits;
199 };
200
201 union SefCpuid7Eax {
202 uint32_t value;
203 };
204
205 union SefCpuid7Ebx {
206 uint32_t value;
207 struct {
208 uint32_t fsgsbase : 1,
209 : 2,
210 bmi1 : 1,
211 : 1,
212 avx2 : 1,
213 : 2,
214 bmi2 : 1,
215 erms : 1,
216 : 1,
217 rtm : 1,
218 : 4,
219 avx512f : 1,
220 avx512dq : 1,
221 : 1,
222 adx : 1,
223 : 3,
224 clflushopt : 1,
225 clwb : 1,
226 : 1,
227 avx512pf : 1,
228 avx512er : 1,
229 avx512cd : 1,
230 sha : 1,
231 avx512bw : 1,
232 avx512vl : 1;
233 } bits;
234 };
235
236 union SefCpuid7Ecx {
237 uint32_t value;
238 struct {
239 uint32_t prefetchwt1 : 1,
240 avx512_vbmi : 1,
241 umip : 1,
242 pku : 1,
243 ospke : 1,
244 : 1,
245 avx512_vbmi2 : 1,
246 : 1,
247 gfni : 1,
248 vaes : 1,
249 avx512_vpclmulqdq : 1,
250 avx512_vnni : 1,
251 avx512_bitalg : 1,
252 : 1,
253 avx512_vpopcntdq : 1,
254 : 17;
255 } bits;
256 };
257
258 union SefCpuid7Edx {
259 uint32_t value;
260 struct {
261 uint32_t : 2,
262 avx512_4vnniw : 1,
263 avx512_4fmaps : 1,
264 : 10,
265 serialize : 1,
266 : 17;
267 } bits;
268 };
269
270 union ExtCpuid1EEbx {
271 uint32_t value;
272 struct {
273 uint32_t : 8,
274 threads_per_core : 8,
275 : 16;
276 } bits;
277 };
278
279 union XemXcr0Eax {
280 uint32_t value;
281 struct {
282 uint32_t x87 : 1,
283 sse : 1,
284 ymm : 1,
285 bndregs : 1,
286 bndcsr : 1,
287 opmask : 1,
288 zmm512 : 1,
289 zmm32 : 1,
290 : 24;
291 } bits;
292 };
293
294protected:
295 static int _cpu;
296 static int _model;
297 static int _stepping;
298
299 static bool _has_intel_jcc_erratum;
300
301 static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
302 static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
303
304 enum Feature_Flag : uint64_t {
305#define CPU_FEATURE_FLAGS(decl)decl(CX8, "cx8", 0) decl(CMOV, "cmov", 1) decl(FXSR, "fxsr", 2
) decl(HT, "ht", 3) decl(MMX, "mmx", 4) decl(3DNOW_PREFETCH, "3dnowpref"
, 5) decl(SSE, "sse", 6) decl(SSE2, "sse2", 7) decl(SSE3, "sse3"
, 8 ) decl(SSSE3, "ssse3", 9 ) decl(SSE4A, "sse4a", 10) decl(
SSE4_1, "sse4.1", 11) decl(SSE4_2, "sse4.2", 12) decl(POPCNT,
"popcnt", 13) decl(LZCNT, "lzcnt", 14) decl(TSC, "tsc", 15) decl
(TSCINV_BIT, "tscinvbit", 16) decl(TSCINV, "tscinv", 17) decl
(AVX, "avx", 18) decl(AVX2, "avx2", 19) decl(AES, "aes", 20) decl
(ERMS, "erms", 21) decl(CLMUL, "clmul", 22) decl(BMI1, "bmi1"
, 23) decl(BMI2, "bmi2", 24) decl(RTM, "rtm", 25) decl(ADX, "adx"
, 26) decl(AVX512F, "avx512f", 27) decl(AVX512DQ, "avx512dq",
28) decl(AVX512PF, "avx512pf", 29) decl(AVX512ER, "avx512er"
, 30) decl(AVX512CD, "avx512cd", 31) decl(AVX512BW, "avx512bw"
, 32) decl(AVX512VL, "avx512vl", 33) decl(SHA, "sha", 34) decl
(FMA, "fma", 35) decl(VZEROUPPER, "vzeroupper", 36) decl(AVX512_VPOPCNTDQ
, "avx512_vpopcntdq", 37) decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq"
, 38) decl(AVX512_VAES, "avx512_vaes", 39) decl(AVX512_VNNI, "avx512_vnni"
, 40) decl(FLUSH, "clflush", 41) decl(FLUSHOPT, "clflushopt",
42) decl(CLWB, "clwb", 43) decl(AVX512_VBMI2, "avx512_vbmi2"
, 44) decl(AVX512_VBMI, "avx512_vbmi", 45) decl(HV, "hv", 46)
decl(SERIALIZE, "serialize", 47)
\
306 decl(CX8, "cx8", 0) /* next bits are from cpuid 1 (EDX) */ \
307 decl(CMOV, "cmov", 1) \
308 decl(FXSR, "fxsr", 2) \
309 decl(HT, "ht", 3) \
310 \
311 decl(MMX, "mmx", 4) \
312 decl(3DNOW_PREFETCH, "3dnowpref", 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
313 /* may not necessarily support other 3dnow instructions */ \
314 decl(SSE, "sse", 6) \
315 decl(SSE2, "sse2", 7) \
316 \
317 decl(SSE3, "sse3", 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \
318 decl(SSSE3, "ssse3", 9 ) \
319 decl(SSE4A, "sse4a", 10) \
320 decl(SSE4_1, "sse4.1", 11) \
321 \
322 decl(SSE4_2, "sse4.2", 12) \
323 decl(POPCNT, "popcnt", 13) \
324 decl(LZCNT, "lzcnt", 14) \
325 decl(TSC, "tsc", 15) \
326 \
327 decl(TSCINV_BIT, "tscinvbit", 16) \
328 decl(TSCINV, "tscinv", 17) \
329 decl(AVX, "avx", 18) \
330 decl(AVX2, "avx2", 19) \
331 \
332 decl(AES, "aes", 20) \
333 decl(ERMS, "erms", 21) /* enhanced 'rep movsb/stosb' instructions */ \
334 decl(CLMUL, "clmul", 22) /* carryless multiply for CRC */ \
335 decl(BMI1, "bmi1", 23) \
336 \
337 decl(BMI2, "bmi2", 24) \
338 decl(RTM, "rtm", 25) /* Restricted Transactional Memory instructions */ \
339 decl(ADX, "adx", 26) \
340 decl(AVX512F, "avx512f", 27) /* AVX 512bit foundation instructions */ \
341 \
342 decl(AVX512DQ, "avx512dq", 28) \
343 decl(AVX512PF, "avx512pf", 29) \
344 decl(AVX512ER, "avx512er", 30) \
345 decl(AVX512CD, "avx512cd", 31) \
346 \
347 decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \
348 decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \
349 decl(SHA, "sha", 34) /* SHA instructions */ \
350 decl(FMA, "fma", 35) /* FMA instructions */ \
351 \
352 decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \
353 decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \
354 decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \
355 decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \
356 \
357 decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \
358 decl(FLUSH, "clflush", 41) /* flush instruction */ \
359 decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \
360 decl(CLWB, "clwb", 43) /* clwb instruction */ \
361 \
362 decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
363 decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
364 decl(HV, "hv", 46) /* Hypervisor instructions */ \
365 decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */
366
367#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
368 CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)DECLARE_CPU_FEATURE_FLAG(CX8, "cx8", 0) DECLARE_CPU_FEATURE_FLAG
(CMOV, "cmov", 1) DECLARE_CPU_FEATURE_FLAG(FXSR, "fxsr", 2) DECLARE_CPU_FEATURE_FLAG
(HT, "ht", 3) DECLARE_CPU_FEATURE_FLAG(MMX, "mmx", 4) DECLARE_CPU_FEATURE_FLAG
(3DNOW_PREFETCH, "3dnowpref", 5) DECLARE_CPU_FEATURE_FLAG(SSE
, "sse", 6) DECLARE_CPU_FEATURE_FLAG(SSE2, "sse2", 7) DECLARE_CPU_FEATURE_FLAG
(SSE3, "sse3", 8 ) DECLARE_CPU_FEATURE_FLAG(SSSE3, "ssse3", 9
) DECLARE_CPU_FEATURE_FLAG(SSE4A, "sse4a", 10) DECLARE_CPU_FEATURE_FLAG
(SSE4_1, "sse4.1", 11) DECLARE_CPU_FEATURE_FLAG(SSE4_2, "sse4.2"
, 12) DECLARE_CPU_FEATURE_FLAG(POPCNT, "popcnt", 13) DECLARE_CPU_FEATURE_FLAG
(LZCNT, "lzcnt", 14) DECLARE_CPU_FEATURE_FLAG(TSC, "tsc", 15)
DECLARE_CPU_FEATURE_FLAG(TSCINV_BIT, "tscinvbit", 16) DECLARE_CPU_FEATURE_FLAG
(TSCINV, "tscinv", 17) DECLARE_CPU_FEATURE_FLAG(AVX, "avx", 18
) DECLARE_CPU_FEATURE_FLAG(AVX2, "avx2", 19) DECLARE_CPU_FEATURE_FLAG
(AES, "aes", 20) DECLARE_CPU_FEATURE_FLAG(ERMS, "erms", 21) DECLARE_CPU_FEATURE_FLAG
(CLMUL, "clmul", 22) DECLARE_CPU_FEATURE_FLAG(BMI1, "bmi1", 23
) DECLARE_CPU_FEATURE_FLAG(BMI2, "bmi2", 24) DECLARE_CPU_FEATURE_FLAG
(RTM, "rtm", 25) DECLARE_CPU_FEATURE_FLAG(ADX, "adx", 26) DECLARE_CPU_FEATURE_FLAG
(AVX512F, "avx512f", 27) DECLARE_CPU_FEATURE_FLAG(AVX512DQ, "avx512dq"
, 28) DECLARE_CPU_FEATURE_FLAG(AVX512PF, "avx512pf", 29) DECLARE_CPU_FEATURE_FLAG
(AVX512ER, "avx512er", 30) DECLARE_CPU_FEATURE_FLAG(AVX512CD,
"avx512cd", 31) DECLARE_CPU_FEATURE_FLAG(AVX512BW, "avx512bw"
, 32) DECLARE_CPU_FEATURE_FLAG(AVX512VL, "avx512vl", 33) DECLARE_CPU_FEATURE_FLAG
(SHA, "sha", 34) DECLARE_CPU_FEATURE_FLAG(FMA, "fma", 35) DECLARE_CPU_FEATURE_FLAG
(VZEROUPPER, "vzeroupper", 36) DECLARE_CPU_FEATURE_FLAG(AVX512_VPOPCNTDQ
, "avx512_vpopcntdq", 37) DECLARE_CPU_FEATURE_FLAG(AVX512_VPCLMULQDQ
, "avx512_vpclmulqdq", 38) DECLARE_CPU_FEATURE_FLAG(AVX512_VAES
, "avx512_vaes", 39) DECLARE_CPU_FEATURE_FLAG(AVX512_VNNI, "avx512_vnni"
, 40) DECLARE_CPU_FEATURE_FLAG(FLUSH, "clflush", 41) DECLARE_CPU_FEATURE_FLAG
(FLUSHOPT, "clflushopt", 42) DECLARE_CPU_FEATURE_FLAG(CLWB, "clwb"
, 43) DECLARE_CPU_FEATURE_FLAG(AVX512_VBMI2, "avx512_vbmi2", 44
) DECLARE_CPU_FEATURE_FLAG(AVX512_VBMI, "avx512_vbmi", 45) DECLARE_CPU_FEATURE_FLAG
(HV, "hv", 46) DECLARE_CPU_FEATURE_FLAG(SERIALIZE, "serialize"
, 47)
369#undef DECLARE_CPU_FEATURE_FLAG
370 };
371
372 static const char* _features_names[];
373
374 enum Extended_Family {
375 // AMD
376 CPU_FAMILY_AMD_11H = 0x11,
377 // ZX
378 CPU_FAMILY_ZX_CORE_F6 = 6,
379 CPU_FAMILY_ZX_CORE_F7 = 7,
380 // Intel
381 CPU_FAMILY_INTEL_CORE = 6,
382 CPU_MODEL_NEHALEM = 0x1e,
383 CPU_MODEL_NEHALEM_EP = 0x1a,
384 CPU_MODEL_NEHALEM_EX = 0x2e,
385 CPU_MODEL_WESTMERE = 0x25,
386 CPU_MODEL_WESTMERE_EP = 0x2c,
387 CPU_MODEL_WESTMERE_EX = 0x2f,
388 CPU_MODEL_SANDYBRIDGE = 0x2a,
389 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
390 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
391 CPU_MODEL_HASWELL_E3 = 0x3c,
392 CPU_MODEL_HASWELL_E7 = 0x3f,
393 CPU_MODEL_BROADWELL = 0x3d,
394 CPU_MODEL_SKYLAKE = 0x55
395 };
396
397 // cpuid information block. All info derived from executing cpuid with
398 // various function numbers is stored here. Intel and AMD info is
399 // merged in this block: accessor methods disentangle it.
400 //
401 // The info block is laid out in subblocks of 4 dwords corresponding to
402 // eax, ebx, ecx and edx, whether or not they contain anything useful.
403 struct CpuidInfo {
404 // cpuid function 0
405 uint32_t std_max_function;
406 uint32_t std_vendor_name_0;
407 uint32_t std_vendor_name_1;
408 uint32_t std_vendor_name_2;
409
410 // cpuid function 1
411 StdCpuid1Eax std_cpuid1_eax;
412 StdCpuid1Ebx std_cpuid1_ebx;
413 StdCpuid1Ecx std_cpuid1_ecx;
414 StdCpuid1Edx std_cpuid1_edx;
415
416 // cpuid function 4 (deterministic cache parameters)
417 DcpCpuid4Eax dcp_cpuid4_eax;
418 DcpCpuid4Ebx dcp_cpuid4_ebx;
419 uint32_t dcp_cpuid4_ecx; // unused currently
420 uint32_t dcp_cpuid4_edx; // unused currently
421
422 // cpuid function 7 (structured extended features)
423 SefCpuid7Eax sef_cpuid7_eax;
424 SefCpuid7Ebx sef_cpuid7_ebx;
425 SefCpuid7Ecx sef_cpuid7_ecx;
426 SefCpuid7Edx sef_cpuid7_edx;
427
428 // cpuid function 0xB (processor topology)
429 // ecx = 0
430 uint32_t tpl_cpuidB0_eax;
431 TplCpuidBEbx tpl_cpuidB0_ebx;
432 uint32_t tpl_cpuidB0_ecx; // unused currently
433 uint32_t tpl_cpuidB0_edx; // unused currently
434
435 // ecx = 1
436 uint32_t tpl_cpuidB1_eax;
437 TplCpuidBEbx tpl_cpuidB1_ebx;
438 uint32_t tpl_cpuidB1_ecx; // unused currently
439 uint32_t tpl_cpuidB1_edx; // unused currently
440
441 // ecx = 2
442 uint32_t tpl_cpuidB2_eax;
443 TplCpuidBEbx tpl_cpuidB2_ebx;
444 uint32_t tpl_cpuidB2_ecx; // unused currently
445 uint32_t tpl_cpuidB2_edx; // unused currently
446
447 // cpuid function 0x80000000 // example, unused
448 uint32_t ext_max_function;
449 uint32_t ext_vendor_name_0;
450 uint32_t ext_vendor_name_1;
451 uint32_t ext_vendor_name_2;
452
453 // cpuid function 0x80000001
454 uint32_t ext_cpuid1_eax; // reserved
455 uint32_t ext_cpuid1_ebx; // reserved
456 ExtCpuid1Ecx ext_cpuid1_ecx;
457 ExtCpuid1Edx ext_cpuid1_edx;
458
459 // cpuid functions 0x80000002 thru 0x80000004: example, unused
460 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
461 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
462 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
463
464 // cpuid function 0x80000005 // AMD L1, Intel reserved
465 uint32_t ext_cpuid5_eax; // unused currently
466 uint32_t ext_cpuid5_ebx; // reserved
467 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD)
468 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD)
469
470 // cpuid function 0x80000007
471 uint32_t ext_cpuid7_eax; // reserved
472 uint32_t ext_cpuid7_ebx; // reserved
473 uint32_t ext_cpuid7_ecx; // reserved
474 ExtCpuid7Edx ext_cpuid7_edx; // tscinv
475
476 // cpuid function 0x80000008
477 uint32_t ext_cpuid8_eax; // unused currently
478 uint32_t ext_cpuid8_ebx; // reserved
479 ExtCpuid8Ecx ext_cpuid8_ecx;
480 uint32_t ext_cpuid8_edx; // reserved
481
482 // cpuid function 0x8000001E // AMD 17h
483 uint32_t ext_cpuid1E_eax;
484 ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
485 uint32_t ext_cpuid1E_ecx;
486 uint32_t ext_cpuid1E_edx; // unused currently
487
488 // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
489 XemXcr0Eax xem_xcr0_eax;
490 uint32_t xem_xcr0_edx; // reserved
491
492 // Space to save ymm registers after signal handle
493 int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
494
495 // Space to save zmm registers after signal handle
496 int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
497 };
498
499 // The actual cpuid info block
500 static CpuidInfo _cpuid_info;
501
502 // Extractors and predicates
503 static uint32_t extended_cpu_family() {
504 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
505 result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
506 return result;
507 }
508
509 static uint32_t extended_cpu_model() {
510 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
511 result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
512 return result;
513 }
514
515 static uint32_t cpu_stepping() {
516 uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
517 return result;
518 }
519
520 static uint logical_processor_count() {
521 uint result = threads_per_core();
522 return result;
523 }
524
525 static bool compute_has_intel_jcc_erratum();
526
527 static uint64_t feature_flags() {
528 uint64_t result = 0;
529 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
530 result |= CPU_CX8;
531 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
532 result |= CPU_CMOV;
533 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
534 result |= CPU_FLUSH;
535#ifdef _LP641
536 // clflush should always be available on x86_64
537 // if not we are in real trouble because we rely on it
538 // to flush the code cache.
539 assert ((result & CPU_FLUSH) != 0, "clflush should be available")do { if (!((result & CPU_FLUSH) != 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp"
, 539, "assert(" "(result & CPU_FLUSH) != 0" ") failed", "clflush should be available"
); ::breakpoint(); } } while (0)
;
540#endif
541 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
542 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
543 result |= CPU_FXSR;
544 // HT flag is set for multi-core processors also.
545 if (threads_per_core() > 1)
546 result |= CPU_HT;
547 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
548 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
549 result |= CPU_MMX;
550 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
551 result |= CPU_SSE;
552 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
553 result |= CPU_SSE2;
554 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
555 result |= CPU_SSE3;
556 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
557 result |= CPU_SSSE3;
558 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
559 result |= CPU_SSE4_1;
560 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
561 result |= CPU_SSE4_2;
562 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
563 result |= CPU_POPCNT;
564 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
565 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
566 _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
567 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
568 result |= CPU_AVX;
569 result |= CPU_VZEROUPPER;
570 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
571 result |= CPU_AVX2;
572 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
573 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
574 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
575 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
576 result |= CPU_AVX512F;
577 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
578 result |= CPU_AVX512CD;
579 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
580 result |= CPU_AVX512DQ;
581 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
582 result |= CPU_AVX512PF;
583 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
584 result |= CPU_AVX512ER;
585 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
586 result |= CPU_AVX512BW;
587 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
588 result |= CPU_AVX512VL;
589 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
590 result |= CPU_AVX512_VPOPCNTDQ;
591 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
592 result |= CPU_AVX512_VPCLMULQDQ;
593 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
594 result |= CPU_AVX512_VAES;
595 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
596 result |= CPU_AVX512_VNNI;
597 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
598 result |= CPU_AVX512_VBMI;
599 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
600 result |= CPU_AVX512_VBMI2;
601 }
602 }
603 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
604 result |= CPU_HV;
605 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
606 result |= CPU_BMI1;
607 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
608 result |= CPU_TSC;
609 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
610 result |= CPU_TSCINV_BIT;
611 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
612 result |= CPU_AES;
613 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
614 result |= CPU_ERMS;
615 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
616 result |= CPU_CLMUL;
617 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
618 result |= CPU_RTM;
619 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
620 result |= CPU_ADX;
621 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
622 result |= CPU_BMI2;
623 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
624 result |= CPU_SHA;
625 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
626 result |= CPU_FMA;
627 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
628 result |= CPU_FLUSHOPT;
629
630 // AMD|Hygon features.
631 if (is_amd_family()) {
632 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
633 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
634 result |= CPU_3DNOW_PREFETCH;
635 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
636 result |= CPU_LZCNT;
637 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
638 result |= CPU_SSE4A;
639 }
640
641 // Intel features.
642 if (is_intel()) {
643 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
644 result |= CPU_LZCNT;
645 // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
646 if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
647 result |= CPU_3DNOW_PREFETCH;
648 }
649 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
650 result |= CPU_CLWB;
651 }
652 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
653 result |= CPU_SERIALIZE;
654 }
655
656 // ZX features.
657 if (is_zx()) {
658 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
659 result |= CPU_LZCNT;
660 // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
661 if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
662 result |= CPU_3DNOW_PREFETCH;
663 }
664 }
665
666 // Composite features.
667 if (supports_tscinv_bit() &&
668 ((is_amd_family() && !is_amd_Barcelona()) ||
669 is_intel_tsc_synched_at_init())) {
670 result |= CPU_TSCINV;
671 }
672
673 return result;
674 }
675
676 static bool os_supports_avx_vectors() {
677 bool retVal = false;
678 int nreg = 2 LP64_ONLY(+2)+2;
679 if (supports_evex()) {
680 // Verify that OS save/restore all bits of EVEX registers
681 // during signal processing.
682 retVal = true;
683 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
684 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
685 retVal = false;
686 break;
687 }
688 }
689 } else if (supports_avx()) {
690 // Verify that OS save/restore all bits of AVX registers
691 // during signal processing.
692 retVal = true;
693 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
694 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
695 retVal = false;
696 break;
697 }
698 }
699 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
700 if (retVal == false) {
701 // Verify that OS save/restore all bits of EVEX registers
702 // during signal processing.
703 retVal = true;
704 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
705 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
706 retVal = false;
707 break;
708 }
709 }
710 }
711 }
712 return retVal;
713 }
714
715 static void get_processor_features();
716
717public:
718 // Offsets for cpuid asm stub
719 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->std_max_function
) - 16))
; }
720 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->std_cpuid1_eax
) - 16))
; }
721 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->dcp_cpuid4_eax
) - 16))
; }
722 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->sef_cpuid7_eax
) - 16))
; }
723 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid1_eax
) - 16))
; }
724 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid5_eax
) - 16))
; }
725 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid7_eax
) - 16))
; }
726 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid8_eax
) - 16))
; }
727 static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ext_cpuid1E_eax
) - 16))
; }
728 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB0_eax
) - 16))
; }
729 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB1_eax
) - 16))
; }
730 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->tpl_cpuidB2_eax
) - 16))
; }
731 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->xem_xcr0_eax
) - 16))
; }
732 static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->ymm_save
) - 16))
; }
733 static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->zmm_save
) - 16))
; }
734
735 // The value used to check ymm register after signal handle
736 static int ymm_test_value() { return 0xCAFEBABE; }
737
738 static void get_cpu_info_wrapper();
739 static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
740 static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
741 static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
742 static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; }
743
744 static void clean_cpuFeatures() { _features = 0; }
745 static void set_avx_cpuFeatures() { _features = (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); }
746 static void set_evex_cpuFeatures() { _features = (CPU_AVX512F | CPU_SSE | CPU_SSE2 | CPU_VZEROUPPER ); }
747
748
749 // Initialization
750 static void initialize();
751
752 // Override Abstract_VM_Version implementation
753 static void print_platform_virtualization_info(outputStream*);
754
755 // Asserts
756 static void assert_is_initialized() {
757 assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized")do { if (!(_cpuid_info.std_cpuid1_eax.bits.family != 0)) { (*
g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp"
, 757, "assert(" "_cpuid_info.std_cpuid1_eax.bits.family != 0"
") failed", "VM_Version not initialized"); ::breakpoint(); }
} while (0)
;
758 }
759
760 //
761 // Processor family:
762 // 3 - 386
763 // 4 - 486
764 // 5 - Pentium
765 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
766 // Pentium M, Core Solo, Core Duo, Core2 Duo
767 // family 6 model: 9, 13, 14, 15
768 // 0x0f - Pentium 4, Opteron
769 //
770 // Note: The cpu family should be used to select between
771 // instruction sequences which are valid on all Intel
772 // processors. Use the feature test functions below to
773 // determine whether a particular instruction is supported.
774 //
775 static int cpu_family() { return _cpu;}
776 static bool is_P6() { return cpu_family() >= 6; }
777 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
778 static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
779 static bool is_amd_family() { return is_amd() || is_hygon(); }
780 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
781 static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
782 static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
783 static bool is_knights_family() { return UseKNLSetting || ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
784
785 static bool supports_processor_topology() {
786 return (_cpuid_info.std_max_function >= 0xB) &&
787 // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
788 // Some cpus have max cpuid >= 0xB but do not support processor topology.
789 (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
790 }
791
792 static uint cores_per_cpu() {
793 uint result = 1;
794 if (is_intel()) {
795 bool supports_topology = supports_processor_topology();
796 if (supports_topology) {
797 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
798 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
799 }
800 if (!supports_topology || result == 0) {
801 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
802 }
803 } else if (is_amd_family()) {
804 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
805 } else if (is_zx()) {
806 bool supports_topology = supports_processor_topology();
807 if (supports_topology) {
808 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
809 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
810 }
811 if (!supports_topology || result == 0) {
812 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
813 }
814 }
815 return result;
816 }
817
818 static uint threads_per_core() {
819 uint result = 1;
820 if (is_intel() && supports_processor_topology()) {
821 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
822 } else if (is_zx() && supports_processor_topology()) {
823 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
824 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
825 if (cpu_family() >= 0x17) {
826 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
827 } else {
828 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
829 cores_per_cpu();
830 }
831 }
832 return (result == 0 ? 1 : result);
833 }
834
835 static intx L1_line_size() {
836 intx result = 0;
837 if (is_intel()) {
838 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
839 } else if (is_amd_family()) {
840 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
841 } else if (is_zx()) {
842 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
843 }
844 if (result < 32) // not defined ?
845 result = 32; // 32 bytes by default on x86 and other x64
846 return result;
847 }
848
849 static intx prefetch_data_size() {
850 return L1_line_size();
851 }
852
853 //
854 // Feature identification
855 //
856 static bool supports_cpuid() { return _features != 0; }
857 static bool supports_cmpxchg8() { return (_features & CPU_CX8) != 0; }
858 static bool supports_cmov() { return (_features & CPU_CMOV) != 0; }
859 static bool supports_fxsr() { return (_features & CPU_FXSR) != 0; }
860 static bool supports_ht() { return (_features & CPU_HT) != 0; }
861 static bool supports_mmx() { return (_features & CPU_MMX) != 0; }
862 static bool supports_sse() { return (_features & CPU_SSE) != 0; }
863 static bool supports_sse2() { return (_features & CPU_SSE2) != 0; }
864 static bool supports_sse3() { return (_features & CPU_SSE3) != 0; }
865 static bool supports_ssse3() { return (_features & CPU_SSSE3)!= 0; }
866 static bool supports_sse4_1() { return (_features & CPU_SSE4_1) != 0; }
867 static bool supports_sse4_2() { return (_features & CPU_SSE4_2) != 0; }
868 static bool supports_popcnt() { return (_features & CPU_POPCNT) != 0; }
869 static bool supports_avx() { return (_features & CPU_AVX) != 0; }
870 static bool supports_avx2() { return (_features & CPU_AVX2) != 0; }
871 static bool supports_tsc() { return (_features & CPU_TSC) != 0; }
872 static bool supports_aes() { return (_features & CPU_AES) != 0; }
873 static bool supports_erms() { return (_features & CPU_ERMS) != 0; }
874 static bool supports_clmul() { return (_features & CPU_CLMUL) != 0; }
875 static bool supports_rtm() { return (_features & CPU_RTM) != 0; }
876 static bool supports_bmi1() { return (_features & CPU_BMI1) != 0; }
877 static bool supports_bmi2() { return (_features & CPU_BMI2) != 0; }
878 static bool supports_adx() { return (_features & CPU_ADX) != 0; }
879 static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
880 static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
881 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
882 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
883 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
884 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
7
Assuming the condition is true
8
Returning the value 1, which participates in a condition later
885 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
886 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
887 static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
888 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
889 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
890 supports_avx512bw() && supports_avx512dq()); }
891 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
892 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
893 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
894 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
895 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
896 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
897 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
898 static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
899 static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
900 static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; }
901 static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; }
902 static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
3
Assuming the condition is true
4
Returning the value 1, which participates in a condition later
903 static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
904 static bool supports_hv() { return (_features & CPU_HV) != 0; }
905 static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
906
907 // Intel features
908 static bool is_intel_family_core() { return is_intel() &&
909 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
910
911 static bool is_intel_skylake() { return is_intel_family_core() &&
912 extended_cpu_model() == CPU_MODEL_SKYLAKE; }
913
914 static int avx3_threshold();
915
916 static bool is_intel_tsc_synched_at_init() {
917 if (is_intel_family_core()) {
918 uint32_t ext_model = extended_cpu_model();
919 if (ext_model == CPU_MODEL_NEHALEM_EP ||
920 ext_model == CPU_MODEL_WESTMERE_EP ||
921 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
922 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
923 // <= 2-socket invariant tsc support. EX versions are usually used
924 // in > 2-socket systems and likely don't synchronize tscs at
925 // initialization.
926 // Code that uses tsc values must be prepared for them to arbitrarily
927 // jump forward or backward.
928 return true;
929 }
930 }
931 return false;
932 }
933
934 // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
935 // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
936 // mitigation causes regressions when jumps or fused conditional branches cross or end at
937 // 32 byte boundaries.
938 static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; }
939
940 // AMD features
941 static bool supports_3dnow_prefetch() { return (_features & CPU_3DNOW_PREFETCH) != 0; }
942 static bool supports_lzcnt() { return (_features & CPU_LZCNT) != 0; }
943 static bool supports_sse4a() { return (_features & CPU_SSE4A) != 0; }
944
945 static bool is_amd_Barcelona() { return is_amd() &&
946 extended_cpu_family() == CPU_FAMILY_AMD_11H; }
947
948 // Intel and AMD newer cores support fast timestamps well
949 static bool supports_tscinv_bit() {
950 return (_features & CPU_TSCINV_BIT) != 0;
951 }
952 static bool supports_tscinv() {
953 return (_features & CPU_TSCINV) != 0;
954 }
955
956 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
957 static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 &&
958 supports_sse3() && _model != 0x1C; }
959
960 static bool supports_compare_and_exchange() { return true; }
961
962 static intx allocate_prefetch_distance(bool use_watermark_prefetch) {
963 // Hardware prefetching (distance/size in bytes):
964 // Pentium 3 - 64 / 32
965 // Pentium 4 - 256 / 128
966 // Athlon - 64 / 32 ????
967 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
968 // Core - 128 / 64
969 //
970 // Software prefetching (distance in bytes / instruction with best score):
971 // Pentium 3 - 128 / prefetchnta
972 // Pentium 4 - 512 / prefetchnta
973 // Athlon - 128 / prefetchnta
974 // Opteron - 256 / prefetchnta
975 // Core - 256 / prefetchnta
976 // It will be used only when AllocatePrefetchStyle > 0
977
978 if (is_amd_family()) { // AMD | Hygon
979 if (supports_sse2()) {
980 return 256; // Opteron
981 } else {
982 return 128; // Athlon
983 }
984 } else { // Intel
985 if (supports_sse3() && cpu_family() == 6) {
986 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
987 return 192;
988 } else if (use_watermark_prefetch) { // watermark prefetching on Core
989#ifdef _LP641
990 return 384;
991#else
992 return 320;
993#endif
994 }
995 }
996 if (supports_sse2()) {
997 if (cpu_family() == 6) {
998 return 256; // Pentium M, Core, Core2
999 } else {
1000 return 512; // Pentium 4
1001 }
1002 } else {
1003 return 128; // Pentium 3 (and all other old CPUs)
1004 }
1005 }
1006 }
1007
1008 // SSE2 and later processors implement a 'pause' instruction
1009 // that can be used for efficient implementation of
1010 // the intrinsic for java.lang.Thread.onSpinWait()
1011 static bool supports_on_spin_wait() { return supports_sse2(); }
1012
1013 // x86_64 supports fast class initialization checks for static methods.
1014 static bool supports_fast_class_init_checks() {
1015 return LP64_ONLY(true)true NOT_LP64(false); // not implemented on x86_32
1016 }
1017
1018 constexpr static bool supports_stack_watermark_barrier() {
1019 return true;
1020 }
1021
1022 // there are several insns to force cache line sync to memory which
1023 // we can use to ensure mapped non-volatile memory is up to date with
1024 // pending in-cache changes.
1025 //
1026 // 64 bit cpus always support clflush which writes back and evicts
1027 // on 32 bit cpus support is recorded via a feature flag
1028 //
1029 // clflushopt is optional and acts like clflush except it does
1030 // not synchronize with other memory ops. it needs a preceding
1031 // and trailing StoreStore fence
1032 //
1033 // clwb is an optional intel-specific instruction which
1034 // writes back without evicting the line. it also does not
1035 // synchronize with other memory ops. so, it needs preceding
1036 // and trailing StoreStore fences.
1037
1038#ifdef _LP641
1039
1040 static bool supports_clflush(); // Can't inline due to header file conflict
1041#else
1042 static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); }
1043#endif // _LP64
1044 // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit
1045 static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); }
1046 static bool supports_clwb() { return ((_features & CPU_CLWB) != 0); }
1047
1048#ifdef __APPLE__
1049 // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
1050 static bool is_cpu_emulated();
1051#endif
1052
1053 // support functions for virtualization detection
1054 private:
1055 static void check_virtualizations();
1056
1057 static const char* cpu_family_description(void);
1058 static const char* cpu_model_description(void);
1059 static const char* cpu_brand(void);
1060 static const char* cpu_brand_string(void);
1061
1062 static int cpu_type_description(char* const buf, size_t buf_len);
1063 static int cpu_detailed_description(char* const buf, size_t buf_len);
1064 static int cpu_extended_brand_string(char* const buf, size_t buf_len);
1065
1066 static bool cpu_is_em64t(void);
1067 static bool is_netburst(void);
1068
1069 // Returns bytes written excluding termninating null byte.
1070 static size_t cpu_write_support_string(char* const buf, size_t buf_len);
1071 static void resolve_cpu_information_details(void);
1072 static int64_t max_qualified_cpu_freq_from_brand_string(void);
1073
1074 public:
1075 // Offsets for cpuid asm stub brand string
1076 static ByteSize proc_name_0_offset() { return byte_offset_of(CpuidInfo, proc_name_0)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_0
) - 16))
; }
1077 static ByteSize proc_name_1_offset() { return byte_offset_of(CpuidInfo, proc_name_1)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_1
) - 16))
; }
1078 static ByteSize proc_name_2_offset() { return byte_offset_of(CpuidInfo, proc_name_2)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_2
) - 16))
; }
1079 static ByteSize proc_name_3_offset() { return byte_offset_of(CpuidInfo, proc_name_3)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_3
) - 16))
; }
1080 static ByteSize proc_name_4_offset() { return byte_offset_of(CpuidInfo, proc_name_4)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_4
) - 16))
; }
1081 static ByteSize proc_name_5_offset() { return byte_offset_of(CpuidInfo, proc_name_5)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_5
) - 16))
; }
1082 static ByteSize proc_name_6_offset() { return byte_offset_of(CpuidInfo, proc_name_6)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_6
) - 16))
; }
1083 static ByteSize proc_name_7_offset() { return byte_offset_of(CpuidInfo, proc_name_7)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_7
) - 16))
; }
1084 static ByteSize proc_name_8_offset() { return byte_offset_of(CpuidInfo, proc_name_8)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_8
) - 16))
; }
1085 static ByteSize proc_name_9_offset() { return byte_offset_of(CpuidInfo, proc_name_9)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_9
) - 16))
; }
1086 static ByteSize proc_name_10_offset() { return byte_offset_of(CpuidInfo, proc_name_10)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_10
) - 16))
; }
1087 static ByteSize proc_name_11_offset() { return byte_offset_of(CpuidInfo, proc_name_11)in_ByteSize((int)(size_t)((intx)&(((CpuidInfo*)16)->proc_name_11
) - 16))
; }
1088
1089 static int64_t maximum_qualified_cpu_frequency(void);
1090
1091 static bool supports_tscinv_ext(void);
1092
1093 static void initialize_tsc();
1094 static void initialize_cpu_information(void);
1095};
1096
1097#endif // CPU_X86_VM_VERSION_X86_HPP

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_MACROASSEMBLER_X86_HPP
26#define CPU_X86_MACROASSEMBLER_X86_HPP
27
28#include "asm/assembler.hpp"
29#include "code/vmreg.inline.hpp"
30#include "compiler/oopMap.hpp"
31#include "utilities/macros.hpp"
32#include "runtime/rtmLocking.hpp"
33#include "runtime/vm_version.hpp"
34
35// MacroAssembler extends Assembler by frequently used macros.
36//
37// Instructions for which a 'better' code sequence exists depending
38// on arguments should also go in here.
39
40class MacroAssembler: public Assembler {
41 friend class LIR_Assembler;
42 friend class Runtime1; // as_Address()
43
44 public:
45 // Support for VM calls
46 //
47 // This is the base routine called by the different versions of call_VM_leaf. The interpreter
48 // may customize this version by overriding it for its purposes (e.g., to save/restore
49 // additional registers when doing a VM call).
50
51 virtual void call_VM_leaf_base(
52 address entry_point, // the entry point
53 int number_of_arguments // the number of arguments to pop after the call
54 );
55
56 protected:
57 // This is the base routine called by the different versions of call_VM. The interpreter
58 // may customize this version by overriding it for its purposes (e.g., to save/restore
59 // additional registers when doing a VM call).
60 //
61 // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
62 // returns the register which contains the thread upon return. If a thread register has been
63 // specified, the return value will correspond to that register. If no last_java_sp is specified
64 // (noreg) than rsp will be used instead.
65 virtual void call_VM_base( // returns the register containing the thread upon return
66 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
67 Register java_thread, // the thread if computed before ; use noreg otherwise
68 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
69 address entry_point, // the entry point
70 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
71 bool check_exceptions // whether to check for pending exceptions after return
72 );
73
74 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
75
76 // helpers for FPU flag access
77 // tmp is a temporary register, if none is available use noreg
78 void save_rax (Register tmp);
79 void restore_rax(Register tmp);
80
81 public:
82 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
83
84 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
85 // The implementation is only non-empty for the InterpreterMacroAssembler,
86 // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
87 virtual void check_and_handle_popframe(Register java_thread);
88 virtual void check_and_handle_earlyret(Register java_thread);
89
90 Address as_Address(AddressLiteral adr);
91 Address as_Address(ArrayAddress adr);
92
93 // Support for NULL-checks
94 //
95 // Generates code that causes a NULL OS exception if the content of reg is NULL.
96 // If the accessed location is M[reg + offset] and the offset is known, provide the
97 // offset. No explicit code generation is needed if the offset is within a certain
98 // range (0 <= offset <= page_size).
99
100 void null_check(Register reg, int offset = -1);
101 static bool needs_explicit_null_check(intptr_t offset);
102 static bool uses_implicit_null_check(void* address);
103
104 // Required platform-specific helpers for Label::patch_instructions.
105 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
106 void pd_patch_instruction(address branch, address target, const char* file, int line) {
107 unsigned char op = branch[0];
108 assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
109 op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
110 op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
111 (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
112 op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
113 op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
114 "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
;
115
116 if (op == 0xEB || (op & 0xF0) == 0x70) {
117 // short offset operators (jmp and jcc)
118 char* disp = (char*) &branch[1];
119 int imm8 = target - (address) &disp[1];
120 guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
121 file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
;
122 *disp = imm8;
123 } else {
124 int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
125 int imm32 = target - (address) &disp[1];
126 *disp = imm32;
127 }
128 }
129
130 // The following 4 methods return the offset of the appropriate move instruction
131
132 // Support for fast byte/short loading with zero extension (depending on particular CPU)
133 int load_unsigned_byte(Register dst, Address src);
134 int load_unsigned_short(Register dst, Address src);
135
136 // Support for fast byte/short loading with sign extension (depending on particular CPU)
137 int load_signed_byte(Register dst, Address src);
138 int load_signed_short(Register dst, Address src);
139
140 // Support for sign-extension (hi:lo = extend_sign(lo))
141 void extend_sign(Register hi, Register lo);
142
143 // Load and store values by size and signed-ness
144 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
145 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
146
147 // Support for inc/dec with optimal instruction selection depending on value
148
149 void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; }
150 void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; }
151
152 void decrementl(Address dst, int value = 1);
153 void decrementl(Register reg, int value = 1);
154
155 void decrementq(Register reg, int value = 1);
156 void decrementq(Address dst, int value = 1);
157
158 void incrementl(Address dst, int value = 1);
159 void incrementl(Register reg, int value = 1);
160
161 void incrementq(Register reg, int value = 1);
162 void incrementq(Address dst, int value = 1);
163
164 // Support optimal SSE move instructions.
165 void movflt(XMMRegister dst, XMMRegister src) {
166 if (dst-> encoding() == src->encoding()) return;
167 if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
168 else { movss (dst, src); return; }
169 }
170 void movflt(XMMRegister dst, Address src) { movss(dst, src); }
171 void movflt(XMMRegister dst, AddressLiteral src);
172 void movflt(Address dst, XMMRegister src) { movss(dst, src); }
173
174 // Move with zero extension
175 void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); }
176
177 void movdbl(XMMRegister dst, XMMRegister src) {
178 if (dst-> encoding() == src->encoding()) return;
179 if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
180 else { movsd (dst, src); return; }
181 }
182
183 void movdbl(XMMRegister dst, AddressLiteral src);
184
185 void movdbl(XMMRegister dst, Address src) {
186 if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
187 else { movlpd(dst, src); return; }
188 }
189 void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
190
191 void incrementl(AddressLiteral dst);
192 void incrementl(ArrayAddress dst);
193
194 void incrementq(AddressLiteral dst);
195
196 // Alignment
197 void align32();
198 void align64();
199 void align(int modulus);
200 void align(int modulus, int target);
201
202 // A 5 byte nop that is safe for patching (see patch_verified_entry)
203 void fat_nop();
204
205 // Stack frame creation/removal
206 void enter();
207 void leave();
208
209 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
210 // The pointer will be loaded into the thread register.
211 void get_thread(Register thread);
212
213#ifdef _LP641
214 // Support for argument shuffling
215
216 void move32_64(VMRegPair src, VMRegPair dst);
217 void long_move(VMRegPair src, VMRegPair dst);
218 void float_move(VMRegPair src, VMRegPair dst);
219 void double_move(VMRegPair src, VMRegPair dst);
220 void move_ptr(VMRegPair src, VMRegPair dst);
221 void object_move(OopMap* map,
222 int oop_handle_offset,
223 int framesize_in_slots,
224 VMRegPair src,
225 VMRegPair dst,
226 bool is_receiver,
227 int* receiver_offset);
228#endif // _LP64
229
230 // Support for VM calls
231 //
232 // It is imperative that all calls into the VM are handled via the call_VM macros.
233 // They make sure that the stack linkage is setup correctly. call_VM's correspond
234 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
235
236
237 void call_VM(Register oop_result,
238 address entry_point,
239 bool check_exceptions = true);
240 void call_VM(Register oop_result,
241 address entry_point,
242 Register arg_1,
243 bool check_exceptions = true);
244 void call_VM(Register oop_result,
245 address entry_point,
246 Register arg_1, Register arg_2,
247 bool check_exceptions = true);
248 void call_VM(Register oop_result,
249 address entry_point,
250 Register arg_1, Register arg_2, Register arg_3,
251 bool check_exceptions = true);
252
253 // Overloadings with last_Java_sp
254 void call_VM(Register oop_result,
255 Register last_java_sp,
256 address entry_point,
257 int number_of_arguments = 0,
258 bool check_exceptions = true);
259 void call_VM(Register oop_result,
260 Register last_java_sp,
261 address entry_point,
262 Register arg_1, bool
263 check_exceptions = true);
264 void call_VM(Register oop_result,
265 Register last_java_sp,
266 address entry_point,
267 Register arg_1, Register arg_2,
268 bool check_exceptions = true);
269 void call_VM(Register oop_result,
270 Register last_java_sp,
271 address entry_point,
272 Register arg_1, Register arg_2, Register arg_3,
273 bool check_exceptions = true);
274
275 void get_vm_result (Register oop_result, Register thread);
276 void get_vm_result_2(Register metadata_result, Register thread);
277
278 // These always tightly bind to MacroAssembler::call_VM_base
279 // bypassing the virtual implementation
280 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
281 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
282 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
283 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
284 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
285
286 void call_VM_leaf0(address entry_point);
287 void call_VM_leaf(address entry_point,
288 int number_of_arguments = 0);
289 void call_VM_leaf(address entry_point,
290 Register arg_1);
291 void call_VM_leaf(address entry_point,
292 Register arg_1, Register arg_2);
293 void call_VM_leaf(address entry_point,
294 Register arg_1, Register arg_2, Register arg_3);
295
296 // These always tightly bind to MacroAssembler::call_VM_leaf_base
297 // bypassing the virtual implementation
298 void super_call_VM_leaf(address entry_point);
299 void super_call_VM_leaf(address entry_point, Register arg_1);
300 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
301 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
302 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
303
304 // last Java Frame (fills frame anchor)
305 void set_last_Java_frame(Register thread,
306 Register last_java_sp,
307 Register last_java_fp,
308 address last_java_pc);
309
310 // thread in the default location (r15_thread on 64bit)
311 void set_last_Java_frame(Register last_java_sp,
312 Register last_java_fp,
313 address last_java_pc);
314
315 void reset_last_Java_frame(Register thread, bool clear_fp);
316
317 // thread in the default location (r15_thread on 64bit)
318 void reset_last_Java_frame(bool clear_fp);
319
320 // jobjects
321 void clear_jweak_tag(Register possibly_jweak);
322 void resolve_jobject(Register value, Register thread, Register tmp);
323
324 // C 'boolean' to Java boolean: x == 0 ? 0 : 1
325 void c2bool(Register x);
326
327 // C++ bool manipulation
328
329 void movbool(Register dst, Address src);
330 void movbool(Address dst, bool boolconst);
331 void movbool(Address dst, Register src);
332 void testbool(Register dst);
333
334 void resolve_oop_handle(Register result, Register tmp = rscratch2);
335 void resolve_weak_handle(Register result, Register tmp);
336 void load_mirror(Register mirror, Register method, Register tmp = rscratch2);
337 void load_method_holder_cld(Register rresult, Register rmethod);
338
339 void load_method_holder(Register holder, Register method);
340
341 // oop manipulations
342 void load_klass(Register dst, Register src, Register tmp);
343 void store_klass(Register dst, Register src, Register tmp);
344
345 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
346 Register tmp1, Register thread_tmp);
347 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
348 Register tmp1, Register tmp2);
349
350 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
351 Register thread_tmp = noreg, DecoratorSet decorators = 0);
352 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
353 Register thread_tmp = noreg, DecoratorSet decorators = 0);
354 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
355 Register tmp2 = noreg, DecoratorSet decorators = 0);
356
357 // Used for storing NULL. All other oop constants should be
358 // stored using routines that take a jobject.
359 void store_heap_oop_null(Address dst);
360
361#ifdef _LP641
362 void store_klass_gap(Register dst, Register src);
363
364 // This dummy is to prevent a call to store_heap_oop from
365 // converting a zero (like NULL) into a Register by giving
366 // the compiler two choices it can't resolve
367
368 void store_heap_oop(Address dst, void* dummy);
369
370 void encode_heap_oop(Register r);
371 void decode_heap_oop(Register r);
372 void encode_heap_oop_not_null(Register r);
373 void decode_heap_oop_not_null(Register r);
374 void encode_heap_oop_not_null(Register dst, Register src);
375 void decode_heap_oop_not_null(Register dst, Register src);
376
377 void set_narrow_oop(Register dst, jobject obj);
378 void set_narrow_oop(Address dst, jobject obj);
379 void cmp_narrow_oop(Register dst, jobject obj);
380 void cmp_narrow_oop(Address dst, jobject obj);
381
382 void encode_klass_not_null(Register r, Register tmp);
383 void decode_klass_not_null(Register r, Register tmp);
384 void encode_and_move_klass_not_null(Register dst, Register src);
385 void decode_and_move_klass_not_null(Register dst, Register src);
386 void set_narrow_klass(Register dst, Klass* k);
387 void set_narrow_klass(Address dst, Klass* k);
388 void cmp_narrow_klass(Register dst, Klass* k);
389 void cmp_narrow_klass(Address dst, Klass* k);
390
391 // if heap base register is used - reinit it with the correct value
392 void reinit_heapbase();
393
394 DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg);
395
396#endif // _LP64
397
398 // Int division/remainder for Java
399 // (as idivl, but checks for special case as described in JVM spec.)
400 // returns idivl instruction offset for implicit exception handling
401 int corrected_idivl(Register reg);
402
403 // Long division/remainder for Java
404 // (as idivq, but checks for special case as described in JVM spec.)
405 // returns idivq instruction offset for implicit exception handling
406 int corrected_idivq(Register reg);
407
408 void int3();
409
410 // Long operation macros for a 32bit cpu
411 // Long negation for Java
412 void lneg(Register hi, Register lo);
413
414 // Long multiplication for Java
415 // (destroys contents of eax, ebx, ecx and edx)
416 void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
417
418 // Long shifts for Java
419 // (semantics as described in JVM spec.)
420 void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f)
421 void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f)
422
423 // Long compare for Java
424 // (semantics as described in JVM spec.)
425 void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
426
427
428 // misc
429
430 // Sign extension
431 void sign_extend_short(Register reg);
432 void sign_extend_byte(Register reg);
433
434 // Division by power of 2, rounding towards 0
435 void division_with_shift(Register reg, int shift_value);
436
437#ifndef _LP641
438 // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
439 //
440 // CF (corresponds to C0) if x < y
441 // PF (corresponds to C2) if unordered
442 // ZF (corresponds to C3) if x = y
443 //
444 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
445 // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
446 void fcmp(Register tmp);
447 // Variant of the above which allows y to be further down the stack
448 // and which only pops x and y if specified. If pop_right is
449 // specified then pop_left must also be specified.
450 void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
451
452 // Floating-point comparison for Java
453 // Compares the top-most stack entries on the FPU stack and stores the result in dst.
454 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
455 // (semantics as described in JVM spec.)
456 void fcmp2int(Register dst, bool unordered_is_less);
457 // Variant of the above which allows y to be further down the stack
458 // and which only pops x and y if specified. If pop_right is
459 // specified then pop_left must also be specified.
460 void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
461
462 // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
463 // tmp is a temporary register, if none is available use noreg
464 void fremr(Register tmp);
465
466 // only if +VerifyFPU
467 void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
468#endif // !LP64
469
470 // dst = c = a * b + c
471 void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
472 void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
473
474 void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
475 void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
476 void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
477 void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
478
479
480 // same as fcmp2int, but using SSE2
481 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
482 void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
483
484 // branch to L if FPU flag C2 is set/not set
485 // tmp is a temporary register, if none is available use noreg
486 void jC2 (Register tmp, Label& L);
487 void jnC2(Register tmp, Label& L);
488
489 // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
490 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
491 void load_float(Address src);
492
493 // Store float value to 'address'. If UseSSE >= 1, the value is stored
494 // from register xmm0. Otherwise, the value is stored from the FPU stack.
495 void store_float(Address dst);
496
497 // Load double value from 'address'. If UseSSE >= 2, the value is loaded into
498 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
499 void load_double(Address src);
500
501 // Store double value to 'address'. If UseSSE >= 2, the value is stored
502 // from register xmm0. Otherwise, the value is stored from the FPU stack.
503 void store_double(Address dst);
504
505#ifndef _LP641
506 // Pop ST (ffree & fincstp combined)
507 void fpop();
508
509 void empty_FPU_stack();
510#endif // !_LP64
511
512 void push_IU_state();
513 void pop_IU_state();
514
515 void push_FPU_state();
516 void pop_FPU_state();
517
518 void push_CPU_state();
519 void pop_CPU_state();
520
521 // Round up to a power of two
522 void round_to(Register reg, int modulus);
523
524 // Callee saved registers handling
525 void push_callee_saved_registers();
526 void pop_callee_saved_registers();
527
528 // allocation
529 void eden_allocate(
530 Register thread, // Current thread
531 Register obj, // result: pointer to object after successful allocation
532 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
533 int con_size_in_bytes, // object size in bytes if known at compile time
534 Register t1, // temp register
535 Label& slow_case // continuation point if fast allocation fails
536 );
537 void tlab_allocate(
538 Register thread, // Current thread
539 Register obj, // result: pointer to object after successful allocation
540 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
541 int con_size_in_bytes, // object size in bytes if known at compile time
542 Register t1, // temp register
543 Register t2, // temp register
544 Label& slow_case // continuation point if fast allocation fails
545 );
546 void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
547
548 // interface method calling
549 void lookup_interface_method(Register recv_klass,
550 Register intf_klass,
551 RegisterOrConstant itable_index,
552 Register method_result,
553 Register scan_temp,
554 Label& no_such_interface,
555 bool return_method = true);
556
557 // virtual method calling
558 void lookup_virtual_method(Register recv_klass,
559 RegisterOrConstant vtable_index,
560 Register method_result);
561
562 // Test sub_klass against super_klass, with fast and slow paths.
563
564 // The fast path produces a tri-state answer: yes / no / maybe-slow.
565 // One of the three labels can be NULL, meaning take the fall-through.
566 // If super_check_offset is -1, the value is loaded up from super_klass.
567 // No registers are killed, except temp_reg.
568 void check_klass_subtype_fast_path(Register sub_klass,
569 Register super_klass,
570 Register temp_reg,
571 Label* L_success,
572 Label* L_failure,
573 Label* L_slow_path,
574 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
575
576 // The rest of the type check; must be wired to a corresponding fast path.
577 // It does not repeat the fast path logic, so don't use it standalone.
578 // The temp_reg and temp2_reg can be noreg, if no temps are available.
579 // Updates the sub's secondary super cache as necessary.
580 // If set_cond_codes, condition codes will be Z on success, NZ on failure.
581 void check_klass_subtype_slow_path(Register sub_klass,
582 Register super_klass,
583 Register temp_reg,
584 Register temp2_reg,
585 Label* L_success,
586 Label* L_failure,
587 bool set_cond_codes = false);
588
589 // Simplified, combined version, good for typical uses.
590 // Falls through on failure.
591 void check_klass_subtype(Register sub_klass,
592 Register super_klass,
593 Register temp_reg,
594 Label& L_success);
595
596 void clinit_barrier(Register klass,
597 Register thread,
598 Label* L_fast_path = NULL__null,
599 Label* L_slow_path = NULL__null);
600
601 // method handles (JSR 292)
602 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
603
604 // Debugging
605
606 // only if +VerifyOops
607 void _verify_oop(Register reg, const char* s, const char* file, int line);
608 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
609
610 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
611 if (VerifyOops) {
612 _verify_oop(reg, s, file, line);
613 }
614 }
615 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
616 if (VerifyOops) {
617 _verify_oop_addr(reg, s, file, line);
618 }
619 }
620
621 // TODO: verify method and klass metadata (compare against vptr?)
622 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
623 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
624
625#define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 625)
_verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625)
626#define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 626)
_verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626)
627#define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 627)
_verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627)
628#define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 628)
_verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628)
629#define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 629)
_verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629)
630
631 // Verify or restore cpu control state after JNI call
632 void restore_cpu_control_state_after_jni();
633
634 // prints msg, dumps registers and stops execution
635 void stop(const char* msg);
636
637 // prints msg and continues
638 void warn(const char* msg);
639
640 // dumps registers and other state
641 void print_state();
642
643 static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
644 static void debug64(char* msg, int64_t pc, int64_t regs[]);
645 static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
646 static void print_state64(int64_t pc, int64_t regs[]);
647
648 void os_breakpoint();
649
650 void untested() { stop("untested"); }
651
652 void unimplemented(const char* what = "");
653
654 void should_not_reach_here() { stop("should not reach here"); }
655
656 void print_CPU_state();
657
658 // Stack overflow checking
659 void bang_stack_with_offset(int offset) {
660 // stack grows down, caller passes positive offset
661 assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 661, "assert(" "offset > 0" ") failed", "must bang with negative offset"
); ::breakpoint(); } } while (0)
;
662 movl(Address(rsp, (-offset)), rax);
663 }
664
665 // Writes to stack successive pages until offset reached to check for
666 // stack overflow + shadow pages. Also, clobbers tmp
667 void bang_stack_size(Register size, Register tmp);
668
669 // Check for reserved stack access in method being exited (for JIT)
670 void reserved_stack_check();
671
672 void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
673
674 void verify_tlab();
675
676 Condition negate_condition(Condition cond);
677
678 // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
679 // operands. In general the names are modified to avoid hiding the instruction in Assembler
680 // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
681 // here in MacroAssembler. The major exception to this rule is call
682
683 // Arithmetics
684
685
686 void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; }
687 void addptr(Address dst, Register src);
688
689 void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); }
690 void addptr(Register dst, int32_t src);
691 void addptr(Register dst, Register src);
692 void addptr(Register dst, RegisterOrConstant src) {
693 if (src.is_constant()) addptr(dst, (int) src.as_constant());
694 else addptr(dst, src.as_register());
695 }
696
697 void andptr(Register dst, int32_t src);
698 void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; }
699
700 void cmp8(AddressLiteral src1, int imm);
701
702 // renamed to drag out the casting of address to int32_t/intptr_t
703 void cmp32(Register src1, int32_t imm);
704
705 void cmp32(AddressLiteral src1, int32_t imm);
706 // compare reg - mem, or reg - &mem
707 void cmp32(Register src1, AddressLiteral src2);
708
709 void cmp32(Register src1, Address src2);
710
711#ifndef _LP641
712 void cmpklass(Address dst, Metadata* obj);
713 void cmpklass(Register dst, Metadata* obj);
714 void cmpoop(Address dst, jobject obj);
715#endif // _LP64
716
717 void cmpoop(Register src1, Register src2);
718 void cmpoop(Register src1, Address src2);
719 void cmpoop(Register dst, jobject obj);
720
721 // NOTE src2 must be the lval. This is NOT an mem-mem compare
722 void cmpptr(Address src1, AddressLiteral src2);
723
724 void cmpptr(Register src1, AddressLiteral src2);
725
726 void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
727 void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
728 // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
729
730 void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
731 void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
732
733 // cmp64 to avoild hiding cmpq
734 void cmp64(Register src1, AddressLiteral src);
735
736 void cmpxchgptr(Register reg, Address adr);
737
738 void locked_cmpxchgptr(Register reg, AddressLiteral adr);
739
740
741 void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); }
742 void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); }
743
744
745 void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); }
746
747 void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); }
748
749 void shlptr(Register dst, int32_t shift);
750 void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); }
751
752 void shrptr(Register dst, int32_t shift);
753 void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); }
754
755 void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); }
756 void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); }
757
758 void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
759
760 void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
761 void subptr(Register dst, int32_t src);
762 // Force generation of a 4 byte immediate value even if it fits into 8bit
763 void subptr_imm32(Register dst, int32_t src);
764 void subptr(Register dst, Register src);
765 void subptr(Register dst, RegisterOrConstant src) {
766 if (src.is_constant()) subptr(dst, (int) src.as_constant());
767 else subptr(dst, src.as_register());
768 }
769
770 void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
771 void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
772
773 void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
774 void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
775
776 void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; }
777
778
779
780 // Helper functions for statistics gathering.
781 // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
782 void cond_inc32(Condition cond, AddressLiteral counter_addr);
783 // Unconditional atomic increment.
784 void atomic_incl(Address counter_addr);
785 void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
786#ifdef _LP641
787 void atomic_incq(Address counter_addr);
788 void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
789#endif
790 void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
791 void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; }
792
793 void lea(Register dst, AddressLiteral adr);
794 void lea(Address dst, AddressLiteral adr);
795 void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
796
797 void leal32(Register dst, Address src) { leal(dst, src); }
798
799 // Import other testl() methods from the parent class or else
800 // they will be hidden by the following overriding declaration.
801 using Assembler::testl;
802 void testl(Register dst, AddressLiteral src);
803
804 void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
805 void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
806 void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
807 void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); }
808
809 void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); }
810 void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); }
811 void testptr(Register src1, Register src2);
812
813 void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
814 void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
815
816 // Calls
817
818 void call(Label& L, relocInfo::relocType rtype);
819 void call(Register entry);
820 void call(Address addr) { Assembler::call(addr); }
821
822 // NOTE: this call transfers to the effective address of entry NOT
823 // the address contained by entry. This is because this is more natural
824 // for jumps/calls.
825 void call(AddressLiteral entry);
826
827 // Emit the CompiledIC call idiom
828 void ic_call(address entry, jint method_index = 0);
829
830 // Jumps
831
832 // NOTE: these jumps tranfer to the effective address of dst NOT
833 // the address contained by dst. This is because this is more natural
834 // for jumps/calls.
835 void jump(AddressLiteral dst);
836 void jump_cc(Condition cc, AddressLiteral dst);
837
838 // 32bit can do a case table jump in one instruction but we no longer allow the base
839 // to be installed in the Address class. This jump will tranfers to the address
840 // contained in the location described by entry (not the address of entry)
841 void jump(ArrayAddress entry);
842
843 // Floating
844
845 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
846 void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
847 void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); }
848
849 void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
850 void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
851 void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
852
853 void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
854 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
855 void comiss(XMMRegister dst, AddressLiteral src);
856
857 void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
858 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
859 void comisd(XMMRegister dst, AddressLiteral src);
860
861#ifndef _LP641
862 void fadd_s(Address src) { Assembler::fadd_s(src); }
863 void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
864
865 void fldcw(Address src) { Assembler::fldcw(src); }
866 void fldcw(AddressLiteral src);
867
868 void fld_s(int index) { Assembler::fld_s(index); }
869 void fld_s(Address src) { Assembler::fld_s(src); }
870 void fld_s(AddressLiteral src);
871
872 void fld_d(Address src) { Assembler::fld_d(src); }
873 void fld_d(AddressLiteral src);
874
875 void fmul_s(Address src) { Assembler::fmul_s(src); }
876 void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
877#endif // _LP64
878
879 void fld_x(Address src) { Assembler::fld_x(src); }
880 void fld_x(AddressLiteral src);
881
882 void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
883 void ldmxcsr(AddressLiteral src);
884
885#ifdef _LP641
886 private:
887 void sha256_AVX2_one_round_compute(
888 Register reg_old_h,
889 Register reg_a,
890 Register reg_b,
891 Register reg_c,
892 Register reg_d,
893 Register reg_e,
894 Register reg_f,
895 Register reg_g,
896 Register reg_h,
897 int iter);
898 void sha256_AVX2_four_rounds_compute_first(int start);
899 void sha256_AVX2_four_rounds_compute_last(int start);
900 void sha256_AVX2_one_round_and_sched(
901 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
902 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */
903 XMMRegister xmm_2, /* ymm6 */
904 XMMRegister xmm_3, /* ymm7 */
905 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
906 Register reg_b, /* ebx */ /* full cycle is 8 iterations */
907 Register reg_c, /* edi */
908 Register reg_d, /* esi */
909 Register reg_e, /* r8d */
910 Register reg_f, /* r9d */
911 Register reg_g, /* r10d */
912 Register reg_h, /* r11d */
913 int iter);
914
915 void addm(int disp, Register r1, Register r2);
916 void gfmul(XMMRegister tmp0, XMMRegister t);
917 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
918 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
919 void generateHtbl_one_block(Register htbl);
920 void generateHtbl_eight_blocks(Register htbl);
921 public:
922 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
923 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
924 Register buf, Register state, Register ofs, Register limit, Register rsp,
925 bool multi_block, XMMRegister shuf_mask);
926 void avx_ghash(Register state, Register htbl, Register data, Register blocks);
927#endif
928
929#ifdef _LP641
930 private:
931 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
932 Register e, Register f, Register g, Register h, int iteration);
933
934 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
935 Register a, Register b, Register c, Register d, Register e, Register f,
936 Register g, Register h, int iteration);
937
938 void addmq(int disp, Register r1, Register r2);
939 public:
940 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
941 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
942 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
943 XMMRegister shuf_mask);
944private:
945 void roundEnc(XMMRegister key, int rnum);
946 void lastroundEnc(XMMRegister key, int rnum);
947 void roundDec(XMMRegister key, int rnum);
948 void lastroundDec(XMMRegister key, int rnum);
949 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
950 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
951 void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
952 void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
953 XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
954 XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
955 bool final_reduction, int index, XMMRegister counter_inc_mask);
956public:
957 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
958 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
959 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
960 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
961 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
962 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
963
964#endif
965
966 void fast_md5(Register buf, Address state, Address ofs, Address limit,
967 bool multi_block);
968
969 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
970 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
971 Register buf, Register state, Register ofs, Register limit, Register rsp,
972 bool multi_block);
973
974#ifdef _LP641
975 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
976 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
977 Register buf, Register state, Register ofs, Register limit, Register rsp,
978 bool multi_block, XMMRegister shuf_mask);
979#else
980 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
981 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
982 Register buf, Register state, Register ofs, Register limit, Register rsp,
983 bool multi_block);
984#endif
985
986 void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
987 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
988 Register rax, Register rcx, Register rdx, Register tmp);
989
990#ifdef _LP641
991 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
992 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
993 Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
994
995 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
996 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
997 Register rax, Register rcx, Register rdx, Register r11);
998
999 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1000 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1001 Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
1002
1003 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1004 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1005 Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
1006 Register tmp3, Register tmp4);
1007
1008 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1009 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1010 Register rax, Register rcx, Register rdx, Register tmp1,
1011 Register tmp2, Register tmp3, Register tmp4);
1012 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1013 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1014 Register rax, Register rcx, Register rdx, Register tmp1,
1015 Register tmp2, Register tmp3, Register tmp4);
1016#else
1017 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1018 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1019 Register rax, Register rcx, Register rdx, Register tmp1);
1020
1021 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1022 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1023 Register rax, Register rcx, Register rdx, Register tmp);
1024
1025 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1026 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1027 Register rdx, Register tmp);
1028
1029 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1030 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1031 Register rax, Register rbx, Register rdx);
1032
1033 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1034 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1035 Register rax, Register rcx, Register rdx, Register tmp);
1036
1037 void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1038 Register edx, Register ebx, Register esi, Register edi,
1039 Register ebp, Register esp);
1040
1041 void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
1042 Register esi, Register edi, Register ebp, Register esp);
1043
1044 void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1045 Register edx, Register ebx, Register esi, Register edi,
1046 Register ebp, Register esp);
1047
1048 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1049 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1050 Register rax, Register rcx, Register rdx, Register tmp);
1051#endif
1052
1053private:
1054
1055 // these are private because users should be doing movflt/movdbl
1056
1057 void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
1058 void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
1059 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
1060 void movss(XMMRegister dst, AddressLiteral src);
1061
1062 void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
1063 void movlpd(XMMRegister dst, AddressLiteral src);
1064
1065public:
1066
1067 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
1068 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
1069 void addsd(XMMRegister dst, AddressLiteral src);
1070
1071 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
1072 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
1073 void addss(XMMRegister dst, AddressLiteral src);
1074
1075 void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); }
1076 void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
1077 void addpd(XMMRegister dst, AddressLiteral src);
1078
1079 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
1080 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
1081 void divsd(XMMRegister dst, AddressLiteral src);
1082
1083 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
1084 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
1085 void divss(XMMRegister dst, AddressLiteral src);
1086
1087 // Move Unaligned Double Quadword
1088 void movdqu(Address dst, XMMRegister src);
1089 void movdqu(XMMRegister dst, Address src);
1090 void movdqu(XMMRegister dst, XMMRegister src);
1091 void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
1092
1093 void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
1094 void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
1095 void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
1096 void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1097 void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
1098 void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
1099
1100 void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
1101 void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
1102 void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); }
1103 void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); }
1104 void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); }
1105 void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1106
1107 // Safe move operation, lowers down to 16bit moves for targets supporting
1108 // AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
1109 void kmov(Address dst, KRegister src);
1110 void kmov(KRegister dst, Address src);
1111 void kmov(KRegister dst, KRegister src);
1112 void kmov(Register dst, KRegister src);
1113 void kmov(KRegister dst, Register src);
1114
1115 // AVX Unaligned forms
1116 void vmovdqu(Address dst, XMMRegister src);
1117 void vmovdqu(XMMRegister dst, Address src);
1118 void vmovdqu(XMMRegister dst, XMMRegister src);
1119 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1120 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);
1121
1122
1123 // AVX512 Unaligned
1124 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
1125 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
1126
1127 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1128 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1129 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1130 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1131 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1132 void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1133
1134 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1135 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1136 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1137 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1138 void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1139
1140 void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1141 void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1142 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
1143 if (dst->encoding() == src->encoding()) return;
1144 Assembler::evmovdqul(dst, src, vector_len);
1145 }
1146 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1147 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1148 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1149 if (dst->encoding() == src->encoding() && mask == k0) return;
1150 Assembler::evmovdqul(dst, mask, src, merge, vector_len);
1151 }
1152 void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1153
1154 void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1155 void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1156 void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
1157 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
1158 if (dst->encoding() == src->encoding()) return;
13
Called C++ object pointer is null
1159 Assembler::evmovdquq(dst, src, vector_len);
1160 }
1161 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1162 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1163 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1164 if (dst->encoding() == src->encoding() && mask == k0) return;
1165 Assembler::evmovdquq(dst, mask, src, merge, vector_len);
1166 }
1167 void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1168
1169 // Move Aligned Double Quadword
1170 void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
1171 void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
1172 void movdqa(XMMRegister dst, AddressLiteral src);
1173
1174 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
1175 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
1176 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
1177 void movsd(XMMRegister dst, AddressLiteral src);
1178
1179 void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
1180 void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
1181 void mulpd(XMMRegister dst, AddressLiteral src);
1182
1183 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
1184 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
1185 void mulsd(XMMRegister dst, AddressLiteral src);
1186
1187 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
1188 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
1189 void mulss(XMMRegister dst, AddressLiteral src);
1190
1191 // Carry-Less Multiplication Quadword
1192 void pclmulldq(XMMRegister dst, XMMRegister src) {
1193 // 0x00 - multiply lower 64 bits [0:63]
1194 Assembler::pclmulqdq(dst, src, 0x00);
1195 }
1196 void pclmulhdq(XMMRegister dst, XMMRegister src) {
1197 // 0x11 - multiply upper 64 bits [64:127]
1198 Assembler::pclmulqdq(dst, src, 0x11);
1199 }
1200
1201 void pcmpeqb(XMMRegister dst, XMMRegister src);
1202 void pcmpeqw(XMMRegister dst, XMMRegister src);
1203
1204 void pcmpestri(XMMRegister dst, Address src, int imm8);
1205 void pcmpestri(XMMRegister dst, XMMRegister src, int imm8);
1206
1207 void pmovzxbw(XMMRegister dst, XMMRegister src);
1208 void pmovzxbw(XMMRegister dst, Address src);
1209
1210 void pmovmskb(Register dst, XMMRegister src);
1211
1212 void ptest(XMMRegister dst, XMMRegister src);
1213
1214 void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
1215 void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
1216 void sqrtsd(XMMRegister dst, AddressLiteral src);
1217
1218 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1219 void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1220 void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg);
1221
1222 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
1223 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
1224 void sqrtss(XMMRegister dst, AddressLiteral src);
1225
1226 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
1227 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
1228 void subsd(XMMRegister dst, AddressLiteral src);
1229
1230 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
1231 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
1232 void subss(XMMRegister dst, AddressLiteral src);
1233
1234 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
1235 void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
1236 void ucomiss(XMMRegister dst, AddressLiteral src);
1237
1238 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
1239 void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
1240 void ucomisd(XMMRegister dst, AddressLiteral src);
1241
1242 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1243 void xorpd(XMMRegister dst, XMMRegister src);
1244 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
1245 void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1246
1247 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1248 void xorps(XMMRegister dst, XMMRegister src);
1249 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
1250 void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1251
1252 // Shuffle Bytes
1253 void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
1254 void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
1255 void pshufb(XMMRegister dst, AddressLiteral src);
1256 // AVX 3-operands instructions
1257
1258 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
1259 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
1260 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1261
1262 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
1263 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
1264 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1265
1266 void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1267 void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1268
1269 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1270 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1271 void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1272
1273 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1274 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1275
1276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1277 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1278 void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1279
1280 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1281 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1282 void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1283
1284 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
1285 void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
1286
1287 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1288
1289 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1290 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1291
1292 // Vector compares
1293 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1294 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1295 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1296 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1297 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1298 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1299 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1300 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1301 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1302 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1303 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1304 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1305 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1306 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1307 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1308 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1309
1310 void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
1311
1312 // Emit comparison instruction for the specified comparison predicate.
1313 void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
1314 void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
1315
1316 void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
1317 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
1318
1319 void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit);
1320
1321 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1322 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1323 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1324 Assembler::vpmulld(dst, nds, src, vector_len);
1325 };
1326 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1327 Assembler::vpmulld(dst, nds, src, vector_len);
1328 }
1329 void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1330
1331 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1332 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1333
1334 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1335 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1336
1337 void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1338 void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1339
1340 void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1341 void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1342
1343 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1344 if (!is_varshift) {
1345 Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
1346 } else {
1347 Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len);
1348 }
1349 }
1350 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1351 if (!is_varshift) {
1352 Assembler::evpslld(dst, mask, nds, src, merge, vector_len);
1353 } else {
1354 Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len);
1355 }
1356 }
1357 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1358 if (!is_varshift) {
1359 Assembler::evpsllq(dst, mask, nds, src, merge, vector_len);
1360 } else {
1361 Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len);
1362 }
1363 }
1364 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1365 if (!is_varshift) {
1366 Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len);
1367 } else {
1368 Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len);
1369 }
1370 }
1371 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1372 if (!is_varshift) {
1373 Assembler::evpsrld(dst, mask, nds, src, merge, vector_len);
1374 } else {
1375 Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
1376 }
1377 }
1378 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1379 if (!is_varshift) {
1380 Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
1381 } else {
1382 Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
1383 }
1384 }
1385 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1386 if (!is_varshift) {
1387 Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
1388 } else {
1389 Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
1390 }
1391 }
1392 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1393 if (!is_varshift) {
1394 Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);
1395 } else {
1396 Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
1397 }
1398 }
1399 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1400 if (!is_varshift) {
1401 Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
1402 } else {
1403 Assembler::evpsravq(dst, mask, nds, src, merge, vector_len);
1404 }
1405 }
1406
1407 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1408 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1409 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1410 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1411
1412 void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1413 void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1414
1415 void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1416 void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1417
1418 void vptest(XMMRegister dst, XMMRegister src);
1419 void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
1420
1421 void punpcklbw(XMMRegister dst, XMMRegister src);
1422 void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
1423
1424 void pshufd(XMMRegister dst, Address src, int mode);
1425 void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
1426
1427 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1428 void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
1429
1430 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1431 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1432 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1433
1434 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1435 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1436 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1437
1438 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1439
1440 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
1441 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
1442 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1443
1444 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
1445 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
1446 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1447
1448 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
1449 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
1450 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1451
1452 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
1453 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
1454 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1455
1456 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
1457 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
1458 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1459
1460 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
1461 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
1462 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1463
1464 void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1465 void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1466
1467 // AVX Vector instructions
1468
1469 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1470 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1471 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1472
1473 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1474 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1475 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1476
1477 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1478 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1479 Assembler::vpxor(dst, nds, src, vector_len);
1480 else
1481 Assembler::vxorpd(dst, nds, src, vector_len);
1482 }
1483 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1484 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1485 Assembler::vpxor(dst, nds, src, vector_len);
1486 else
1487 Assembler::vxorpd(dst, nds, src, vector_len);
1488 }
1489 void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1490
1491 // Simple version for AVX2 256bit vectors
1492 void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
1493 void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
1494
1495 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
1496 void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1497
1498 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
1499 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1500 Assembler::vinserti32x4(dst, nds, src, imm8);
1501 } else if (UseAVX > 1) {
1502 // vinserti128 is available only in AVX2
1503 Assembler::vinserti128(dst, nds, src, imm8);
1504 } else {
1505 Assembler::vinsertf128(dst, nds, src, imm8);
1506 }
1507 }
1508
1509 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
1510 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1511 Assembler::vinserti32x4(dst, nds, src, imm8);
1512 } else if (UseAVX > 1) {
1513 // vinserti128 is available only in AVX2
1514 Assembler::vinserti128(dst, nds, src, imm8);
1515 } else {
1516 Assembler::vinsertf128(dst, nds, src, imm8);
1517 }
1518 }
1519
1520 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1521 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1522 Assembler::vextracti32x4(dst, src, imm8);
1523 } else if (UseAVX > 1) {
1524 // vextracti128 is available only in AVX2
1525 Assembler::vextracti128(dst, src, imm8);
1526 } else {
1527 Assembler::vextractf128(dst, src, imm8);
1528 }
1529 }
1530
1531 void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
1532 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1533 Assembler::vextracti32x4(dst, src, imm8);
1534 } else if (UseAVX > 1) {
1535 // vextracti128 is available only in AVX2
1536 Assembler::vextracti128(dst, src, imm8);
1537 } else {
1538 Assembler::vextractf128(dst, src, imm8);
1539 }
1540 }
1541
1542 // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
1543 void vinserti128_high(XMMRegister dst, XMMRegister src) {
1544 vinserti128(dst, dst, src, 1);
1545 }
1546 void vinserti128_high(XMMRegister dst, Address src) {
1547 vinserti128(dst, dst, src, 1);
1548 }
1549 void vextracti128_high(XMMRegister dst, XMMRegister src) {
1550 vextracti128(dst, src, 1);
1551 }
1552 void vextracti128_high(Address dst, XMMRegister src) {
1553 vextracti128(dst, src, 1);
1554 }
1555
1556 void vinsertf128_high(XMMRegister dst, XMMRegister src) {
1557 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1558 Assembler::vinsertf32x4(dst, dst, src, 1);
1559 } else {
1560 Assembler::vinsertf128(dst, dst, src, 1);
1561 }
1562 }
1563
1564 void vinsertf128_high(XMMRegister dst, Address src) {
1565 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1566 Assembler::vinsertf32x4(dst, dst, src, 1);
1567 } else {
1568 Assembler::vinsertf128(dst, dst, src, 1);
1569 }
1570 }
1571
1572 void vextractf128_high(XMMRegister dst, XMMRegister src) {
1573 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1574 Assembler::vextractf32x4(dst, src, 1);
1575 } else {
1576 Assembler::vextractf128(dst, src, 1);
1577 }
1578 }
1579
1580 void vextractf128_high(Address dst, XMMRegister src) {
1581 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1582 Assembler::vextractf32x4(dst, src, 1);
1583 } else {
1584 Assembler::vextractf128(dst, src, 1);
1585 }
1586 }
1587
1588 // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
1589 void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
1590 Assembler::vinserti64x4(dst, dst, src, 1);
1591 }
1592 void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
1593 Assembler::vinsertf64x4(dst, dst, src, 1);
1594 }
1595 void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
1596 Assembler::vextracti64x4(dst, src, 1);
1597 }
1598 void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
1599 Assembler::vextractf64x4(dst, src, 1);
1600 }
1601 void vextractf64x4_high(Address dst, XMMRegister src) {
1602 Assembler::vextractf64x4(dst, src, 1);
1603 }
1604 void vinsertf64x4_high(XMMRegister dst, Address src) {
1605 Assembler::vinsertf64x4(dst, dst, src, 1);
1606 }
1607
1608 // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
1609 void vinserti128_low(XMMRegister dst, XMMRegister src) {
1610 vinserti128(dst, dst, src, 0);
1611 }
1612 void vinserti128_low(XMMRegister dst, Address src) {
1613 vinserti128(dst, dst, src, 0);
1614 }
1615 void vextracti128_low(XMMRegister dst, XMMRegister src) {
1616 vextracti128(dst, src, 0);
1617 }
1618 void vextracti128_low(Address dst, XMMRegister src) {
1619 vextracti128(dst, src, 0);
1620 }
1621
1622 void vinsertf128_low(XMMRegister dst, XMMRegister src) {
1623 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1624 Assembler::vinsertf32x4(dst, dst, src, 0);
1625 } else {
1626 Assembler::vinsertf128(dst, dst, src, 0);
1627 }
1628 }
1629
1630 void vinsertf128_low(XMMRegister dst, Address src) {
1631 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1632 Assembler::vinsertf32x4(dst, dst, src, 0);
1633 } else {
1634 Assembler::vinsertf128(dst, dst, src, 0);
1635 }
1636 }
1637
1638 void vextractf128_low(XMMRegister dst, XMMRegister src) {
1639 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1640 Assembler::vextractf32x4(dst, src, 0);
1641 } else {
1642 Assembler::vextractf128(dst, src, 0);
1643 }
1644 }
1645
1646 void vextractf128_low(Address dst, XMMRegister src) {
1647 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1648 Assembler::vextractf32x4(dst, src, 0);
1649 } else {
1650 Assembler::vextractf128(dst, src, 0);
1651 }
1652 }
1653
1654 // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
1655 void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
1656 Assembler::vinserti64x4(dst, dst, src, 0);
1657 }
1658 void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
1659 Assembler::vinsertf64x4(dst, dst, src, 0);
1660 }
1661 void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
1662 Assembler::vextracti64x4(dst, src, 0);
1663 }
1664 void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1665 Assembler::vextractf64x4(dst, src, 0);
1666 }
1667 void vextractf64x4_low(Address dst, XMMRegister src) {
1668 Assembler::vextractf64x4(dst, src, 0);
1669 }
1670 void vinsertf64x4_low(XMMRegister dst, Address src) {
1671 Assembler::vinsertf64x4(dst, dst, src, 0);
1672 }
1673
1674 // Carry-Less Multiplication Quadword
1675 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1676 // 0x00 - multiply lower 64 bits [0:63]
1677 Assembler::vpclmulqdq(dst, nds, src, 0x00);
1678 }
1679 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1680 // 0x11 - multiply upper 64 bits [64:127]
1681 Assembler::vpclmulqdq(dst, nds, src, 0x11);
1682 }
1683 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1684 // 0x10 - multiply nds[0:63] and src[64:127]
1685 Assembler::vpclmulqdq(dst, nds, src, 0x10);
1686 }
1687 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1688 //0x01 - multiply nds[64:127] and src[0:63]
1689 Assembler::vpclmulqdq(dst, nds, src, 0x01);
1690 }
1691
1692 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1693 // 0x00 - multiply lower 64 bits [0:63]
1694 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1695 }
1696 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1697 // 0x11 - multiply upper 64 bits [64:127]
1698 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1699 }
1700
1701 // AVX-512 mask operations.
1702 void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2);
1703 void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1704 void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg);
1705 void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1706 void kortest(uint masklen, KRegister src1, KRegister src2);
1707 void ktest(uint masklen, KRegister src1, KRegister src2);
1708
1709 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1710 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1711
1712 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1713 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1714
1715 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1716 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1717
1718 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1719 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1720
1721 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1722 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1723 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1724 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1725
1726 void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
1727 void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
1728
1729 void cmov32( Condition cc, Register dst, Address src);
1730 void cmov32( Condition cc, Register dst, Register src);
1731
1732 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1733
1734 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1735 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1736
1737 void movoop(Register dst, jobject obj);
1738 void movoop(Address dst, jobject obj);
1739
1740 void mov_metadata(Register dst, Metadata* obj);
1741 void mov_metadata(Address dst, Metadata* obj);
1742
1743 void movptr(ArrayAddress dst, Register src);
1744 // can this do an lea?
1745 void movptr(Register dst, ArrayAddress src);
1746
1747 void movptr(Register dst, Address src);
1748
1749#ifdef _LP641
1750 void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
1751#else
1752 void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
1753#endif
1754
1755 void movptr(Register dst, intptr_t src);
1756 void movptr(Register dst, Register src);
1757 void movptr(Address dst, intptr_t src);
1758
1759 void movptr(Address dst, Register src);
1760
1761 void movptr(Register dst, RegisterOrConstant src) {
1762 if (src.is_constant()) movptr(dst, src.as_constant());
1763 else movptr(dst, src.as_register());
1764 }
1765
1766#ifdef _LP641
1767 // Generally the next two are only used for moving NULL
1768 // Although there are situations in initializing the mark word where
1769 // they could be used. They are dangerous.
1770
1771 // They only exist on LP64 so that int32_t and intptr_t are not the same
1772 // and we have ambiguous declarations.
1773
1774 void movptr(Address dst, int32_t imm32);
1775 void movptr(Register dst, int32_t imm32);
1776#endif // _LP64
1777
1778 // to avoid hiding movl
1779 void mov32(AddressLiteral dst, Register src);
1780 void mov32(Register dst, AddressLiteral src);
1781
1782 // to avoid hiding movb
1783 void movbyte(ArrayAddress dst, int src);
1784
1785 // Import other mov() methods from the parent class or else
1786 // they will be hidden by the following overriding declaration.
1787 using Assembler::movdl;
1788 using Assembler::movq;
1789 void movdl(XMMRegister dst, AddressLiteral src);
1790 void movq(XMMRegister dst, AddressLiteral src);
1791
1792 // Can push value or effective address
1793 void pushptr(AddressLiteral src);
1794
1795 void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); }
1796 void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); }
1797
1798 void pushoop(jobject obj);
1799 void pushklass(Metadata* obj);
1800
1801 // sign extend as need a l to ptr sized element
1802 void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); }
1803 void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); }
1804
1805
1806 public:
1807 // C2 compiled method's prolog code.
1808 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
1809
1810 // clear memory of size 'cnt' qwords, starting at 'base';
1811 // if 'is_large' is set, do not try to produce short loop
1812 void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
1813
1814 // clear memory initialization sequence for constant size;
1815 void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1816
1817 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
1818 void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1819
1820 // Fill primitive arrays
1821 void generate_fill(BasicType t, bool aligned,
1822 Register to, Register value, Register count,
1823 Register rtmp, XMMRegister xtmp);
1824
1825 void encode_iso_array(Register src, Register dst, Register len,
1826 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1827 XMMRegister tmp4, Register tmp5, Register result, bool ascii);
1828
1829#ifdef _LP641
1830 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);
1831 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1832 Register y, Register y_idx, Register z,
1833 Register carry, Register product,
1834 Register idx, Register kdx);
1835 void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
1836 Register yz_idx, Register idx,
1837 Register carry, Register product, int offset);
1838 void multiply_128_x_128_bmi2_loop(Register y, Register z,
1839 Register carry, Register carry2,
1840 Register idx, Register jdx,
1841 Register yz_idx1, Register yz_idx2,
1842 Register tmp, Register tmp3, Register tmp4);
1843 void multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
1844 Register yz_idx, Register idx, Register jdx,
1845 Register carry, Register product,
1846 Register carry2);
1847 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
1848 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1849 void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
1850 Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1851 void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
1852 Register tmp2);
1853 void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
1854 Register rdxReg, Register raxReg);
1855 void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
1856 void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1857 Register tmp3, Register tmp4);
1858 void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1859 Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1860
1861 void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
1862 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1863 Register raxReg);
1864 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
1865 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1866 Register raxReg);
1867 void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
1868 Register result, Register tmp1, Register tmp2,
1869 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
1870#endif
1871
1872 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1873 void update_byte_crc32(Register crc, Register val, Register table);
1874 void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
1875
1876
1877#ifdef _LP641
1878 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2);
1879 void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
1880 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
1881 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
1882 void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale);
1883#endif // _LP64
1884
1885 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
1886 // Note on a naming convention:
1887 // Prefix w = register only used on a Westmere+ architecture
1888 // Prefix n = register only used on a Nehalem architecture
1889#ifdef _LP641
1890 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1891 Register tmp1, Register tmp2, Register tmp3);
1892#else
1893 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1894 Register tmp1, Register tmp2, Register tmp3,
1895 XMMRegister xtmp1, XMMRegister xtmp2);
1896#endif
1897 void crc32c_pclmulqdq(XMMRegister w_xtmp1,
1898 Register in_out,
1899 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
1900 XMMRegister w_xtmp2,
1901 Register tmp1,
1902 Register n_tmp2, Register n_tmp3);
1903 void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
1904 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1905 Register tmp1, Register tmp2,
1906 Register n_tmp3);
1907 void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
1908 Register in_out1, Register in_out2, Register in_out3,
1909 Register tmp1, Register tmp2, Register tmp3,
1910 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1911 Register tmp4, Register tmp5,
1912 Register n_tmp6);
1913 void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
1914 Register tmp1, Register tmp2, Register tmp3,
1915 Register tmp4, Register tmp5, Register tmp6,
1916 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1917 bool is_pclmulqdq_supported);
1918 // Fold 128-bit data chunk
1919 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
1920 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
1921#ifdef _LP641
1922 // Fold 512-bit data chunk
1923 void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset);
1924#endif // _LP64
1925 // Fold 8-bit data
1926 void fold_8bit_crc32(Register crc, Register table, Register tmp);
1927 void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
1928
1929 // Compress char[] array to byte[].
1930 void char_array_compress(Register src, Register dst, Register len,
1931 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1932 XMMRegister tmp4, Register tmp5, Register result,
1933 KRegister mask1 = knoreg, KRegister mask2 = knoreg);
1934
1935 // Inflate byte[] array to char[].
1936 void byte_array_inflate(Register src, Register dst, Register len,
1937 XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
1938
1939 void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
1940 Register length, Register temp, int vec_enc);
1941
1942 void fill64_masked(uint shift, Register dst, int disp,
1943 XMMRegister xmm, KRegister mask, Register length,
1944 Register temp, bool use64byteVector = false);
1945
1946 void fill32_masked(uint shift, Register dst, int disp,
1947 XMMRegister xmm, KRegister mask, Register length,
1948 Register temp);
1949
1950 void fill32(Register dst, int disp, XMMRegister xmm);
1951
1952 void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
1953
1954#ifdef _LP641
1955 void convert_f2i(Register dst, XMMRegister src);
1956 void convert_d2i(Register dst, XMMRegister src);
1957 void convert_f2l(Register dst, XMMRegister src);
1958 void convert_d2l(Register dst, XMMRegister src);
1959
1960 void cache_wb(Address line);
1961 void cache_wbsync(bool is_pre);
1962
1963#if COMPILER2_OR_JVMCI1
1964 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
1965 Register to, Register count, int shift,
1966 Register index, Register temp,
1967 bool use64byteVector, Label& L_entry, Label& L_exit);
1968
1969 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
1970 Register to, Register start_index, Register end_index,
1971 Register count, int shift, Register temp,
1972 bool use64byteVector, Label& L_entry, Label& L_exit);
1973
1974 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
1975 KRegister mask, Register length, Register index,
1976 Register temp, int shift = Address::times_1, int offset = 0,
1977 bool use64byteVector = false);
1978
1979 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
1980 KRegister mask, Register length, Register index,
1981 Register temp, int shift = Address::times_1, int offset = 0);
1982
1983 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
1984 int shift = Address::times_1, int offset = 0);
1985
1986 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
1987 bool conjoint, int shift = Address::times_1, int offset = 0,
1988 bool use64byteVector = false);
1989
1990 void generate_fill_avx3(BasicType type, Register to, Register value,
1991 Register count, Register rtmp, XMMRegister xtmp);
1992
1993#endif // COMPILER2_OR_JVMCI
1994
1995#endif // _LP64
1996
1997 void vallones(XMMRegister dst, int vector_len);
1998};
1999
2000/**
2001 * class SkipIfEqual:
2002 *
2003 * Instantiating this class will result in assembly code being output that will
2004 * jump around any code emitted between the creation of the instance and it's
2005 * automatic destruction at the end of a scope block, depending on the value of
2006 * the flag passed to the constructor, which will be checked at run-time.
2007 */
2008class SkipIfEqual {
2009 private:
2010 MacroAssembler* _masm;
2011 Label _label;
2012
2013 public:
2014 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
2015 ~SkipIfEqual();
2016};
2017
2018#endif // CPU_X86_MACROASSEMBLER_X86_HPP