Bug Summary

File:jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp
Warning:line 1158, column 9
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name macroAssembler_x86.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "jvm.h"
27#include "asm/assembler.hpp"
28#include "asm/assembler.inline.hpp"
29#include "compiler/compiler_globals.hpp"
30#include "compiler/disassembler.hpp"
31#include "gc/shared/barrierSet.hpp"
32#include "gc/shared/barrierSetAssembler.hpp"
33#include "gc/shared/collectedHeap.inline.hpp"
34#include "gc/shared/tlab_globals.hpp"
35#include "interpreter/bytecodeHistogram.hpp"
36#include "interpreter/interpreter.hpp"
37#include "memory/resourceArea.hpp"
38#include "memory/universe.hpp"
39#include "oops/accessDecorators.hpp"
40#include "oops/compressedOops.inline.hpp"
41#include "oops/klass.inline.hpp"
42#include "prims/methodHandles.hpp"
43#include "runtime/flags/flagSetting.hpp"
44#include "runtime/interfaceSupport.inline.hpp"
45#include "runtime/jniHandles.hpp"
46#include "runtime/objectMonitor.hpp"
47#include "runtime/os.hpp"
48#include "runtime/safepoint.hpp"
49#include "runtime/safepointMechanism.hpp"
50#include "runtime/sharedRuntime.hpp"
51#include "runtime/stubRoutines.hpp"
52#include "runtime/thread.hpp"
53#include "utilities/macros.hpp"
54#include "crc32c.h"
55
56#ifdef PRODUCT
57#define BLOCK_COMMENT(str) /* nothing */
58#define STOP(error)block_comment(error); stop(error) stop(error)
59#else
60#define BLOCK_COMMENT(str) block_comment(str)
61#define STOP(error)block_comment(error); stop(error) block_comment(error); stop(error)
62#endif
63
64#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
65
66#ifdef ASSERT1
67bool AbstractAssembler::pd_check_instruction_mark() { return true; }
68#endif
69
70static Assembler::Condition reverse[] = {
71 Assembler::noOverflow /* overflow = 0x0 */ ,
72 Assembler::overflow /* noOverflow = 0x1 */ ,
73 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
74 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
75 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
76 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
77 Assembler::above /* belowEqual = 0x6 */ ,
78 Assembler::belowEqual /* above = 0x7 */ ,
79 Assembler::positive /* negative = 0x8 */ ,
80 Assembler::negative /* positive = 0x9 */ ,
81 Assembler::noParity /* parity = 0xa */ ,
82 Assembler::parity /* noParity = 0xb */ ,
83 Assembler::greaterEqual /* less = 0xc */ ,
84 Assembler::less /* greaterEqual = 0xd */ ,
85 Assembler::greater /* lessEqual = 0xe */ ,
86 Assembler::lessEqual /* greater = 0xf, */
87
88};
89
90
91// Implementation of MacroAssembler
92
93// First all the versions that have distinct versions depending on 32/64 bit
94// Unless the difference is trivial (1 line or so).
95
96#ifndef _LP641
97
98// 32bit versions
99
100Address MacroAssembler::as_Address(AddressLiteral adr) {
101 return Address(adr.target(), adr.rspec());
102}
103
104Address MacroAssembler::as_Address(ArrayAddress adr) {
105 return Address::make_array(adr);
106}
107
108void MacroAssembler::call_VM_leaf_base(address entry_point,
109 int number_of_arguments) {
110 call(RuntimeAddress(entry_point));
111 increment(rsp, number_of_arguments * wordSize);
112}
113
114void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
115 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
116}
117
118
119void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
120 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
121}
122
123void MacroAssembler::cmpoop(Address src1, jobject obj) {
124 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
125}
126
127void MacroAssembler::cmpoop(Register src1, jobject obj) {
128 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
129}
130
131void MacroAssembler::extend_sign(Register hi, Register lo) {
132 // According to Intel Doc. AP-526, "Integer Divide", p.18.
133 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
134 cdql();
135 } else {
136 movl(hi, lo);
137 sarl(hi, 31);
138 }
139}
140
141void MacroAssembler::jC2(Register tmp, Label& L) {
142 // set parity bit if FPU flag C2 is set (via rax)
143 save_rax(tmp);
144 fwait(); fnstsw_ax();
145 sahf();
146 restore_rax(tmp);
147 // branch
148 jcc(Assembler::parity, L);
149}
150
151void MacroAssembler::jnC2(Register tmp, Label& L) {
152 // set parity bit if FPU flag C2 is set (via rax)
153 save_rax(tmp);
154 fwait(); fnstsw_ax();
155 sahf();
156 restore_rax(tmp);
157 // branch
158 jcc(Assembler::noParity, L);
159}
160
161// 32bit can do a case table jump in one instruction but we no longer allow the base
162// to be installed in the Address class
163void MacroAssembler::jump(ArrayAddress entry) {
164 jmp(as_Address(entry));
165}
166
167// Note: y_lo will be destroyed
168void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
169 // Long compare for Java (semantics as described in JVM spec.)
170 Label high, low, done;
171
172 cmpl(x_hi, y_hi);
173 jcc(Assembler::less, low);
174 jcc(Assembler::greater, high);
175 // x_hi is the return register
176 xorl(x_hi, x_hi);
177 cmpl(x_lo, y_lo);
178 jcc(Assembler::below, low);
179 jcc(Assembler::equal, done);
180
181 bind(high);
182 xorl(x_hi, x_hi);
183 increment(x_hi);
184 jmp(done);
185
186 bind(low);
187 xorl(x_hi, x_hi);
188 decrementl(x_hi);
189
190 bind(done);
191}
192
193void MacroAssembler::lea(Register dst, AddressLiteral src) {
194 mov_literal32(dst, (int32_t)src.target(), src.rspec());
195}
196
197void MacroAssembler::lea(Address dst, AddressLiteral adr) {
198 // leal(dst, as_Address(adr));
199 // see note in movl as to why we must use a move
200 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
201}
202
203void MacroAssembler::leave() {
204 mov(rsp, rbp);
205 pop(rbp);
206}
207
208void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
209 // Multiplication of two Java long values stored on the stack
210 // as illustrated below. Result is in rdx:rax.
211 //
212 // rsp ---> [ ?? ] \ \
213 // .... | y_rsp_offset |
214 // [ y_lo ] / (in bytes) | x_rsp_offset
215 // [ y_hi ] | (in bytes)
216 // .... |
217 // [ x_lo ] /
218 // [ x_hi ]
219 // ....
220 //
221 // Basic idea: lo(result) = lo(x_lo * y_lo)
222 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
223 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
224 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
225 Label quick;
226 // load x_hi, y_hi and check if quick
227 // multiplication is possible
228 movl(rbx, x_hi);
229 movl(rcx, y_hi);
230 movl(rax, rbx);
231 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
232 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
233 // do full multiplication
234 // 1st step
235 mull(y_lo); // x_hi * y_lo
236 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
237 // 2nd step
238 movl(rax, x_lo);
239 mull(rcx); // x_lo * y_hi
240 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
241 // 3rd step
242 bind(quick); // note: rbx, = 0 if quick multiply!
243 movl(rax, x_lo);
244 mull(y_lo); // x_lo * y_lo
245 addl(rdx, rbx); // correct hi(x_lo * y_lo)
246}
247
248void MacroAssembler::lneg(Register hi, Register lo) {
249 negl(lo);
250 adcl(hi, 0);
251 negl(hi);
252}
253
254void MacroAssembler::lshl(Register hi, Register lo) {
255 // Java shift left long support (semantics as described in JVM spec., p.305)
256 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
257 // shift value is in rcx !
258 assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 258, "assert(" "hi != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
259 assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 259, "assert(" "lo != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
260 const Register s = rcx; // shift count
261 const int n = BitsPerWord;
262 Label L;
263 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
264 cmpl(s, n); // if (s < n)
265 jcc(Assembler::less, L); // else (s >= n)
266 movl(hi, lo); // x := x << n
267 xorl(lo, lo);
268 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
269 bind(L); // s (mod n) < n
270 shldl(hi, lo); // x := x << s
271 shll(lo);
272}
273
274
275void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
276 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
277 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
278 assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 278, "assert(" "hi != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
279 assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 279, "assert(" "lo != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
280 const Register s = rcx; // shift count
281 const int n = BitsPerWord;
282 Label L;
283 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
284 cmpl(s, n); // if (s < n)
285 jcc(Assembler::less, L); // else (s >= n)
286 movl(lo, hi); // x := x >> n
287 if (sign_extension) sarl(hi, 31);
288 else xorl(hi, hi);
289 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
290 bind(L); // s (mod n) < n
291 shrdl(lo, hi); // x := x >> s
292 if (sign_extension) sarl(hi);
293 else shrl(hi);
294}
295
296void MacroAssembler::movoop(Register dst, jobject obj) {
297 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
298}
299
300void MacroAssembler::movoop(Address dst, jobject obj) {
301 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
302}
303
304void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
305 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
306}
307
308void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
309 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
310}
311
312void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
313 // scratch register is not used,
314 // it is defined to match parameters of 64-bit version of this method.
315 if (src.is_lval()) {
316 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
317 } else {
318 movl(dst, as_Address(src));
319 }
320}
321
322void MacroAssembler::movptr(ArrayAddress dst, Register src) {
323 movl(as_Address(dst), src);
324}
325
326void MacroAssembler::movptr(Register dst, ArrayAddress src) {
327 movl(dst, as_Address(src));
328}
329
330// src should NEVER be a real pointer. Use AddressLiteral for true pointers
331void MacroAssembler::movptr(Address dst, intptr_t src) {
332 movl(dst, src);
333}
334
335
336void MacroAssembler::pop_callee_saved_registers() {
337 pop(rcx);
338 pop(rdx);
339 pop(rdi);
340 pop(rsi);
341}
342
343void MacroAssembler::push_callee_saved_registers() {
344 push(rsi);
345 push(rdi);
346 push(rdx);
347 push(rcx);
348}
349
350void MacroAssembler::pushoop(jobject obj) {
351 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
352}
353
354void MacroAssembler::pushklass(Metadata* obj) {
355 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
356}
357
358void MacroAssembler::pushptr(AddressLiteral src) {
359 if (src.is_lval()) {
360 push_literal32((int32_t)src.target(), src.rspec());
361 } else {
362 pushl(as_Address(src));
363 }
364}
365
366static void pass_arg0(MacroAssembler* masm, Register arg) {
367 masm->push(arg);
368}
369
370static void pass_arg1(MacroAssembler* masm, Register arg) {
371 masm->push(arg);
372}
373
374static void pass_arg2(MacroAssembler* masm, Register arg) {
375 masm->push(arg);
376}
377
378static void pass_arg3(MacroAssembler* masm, Register arg) {
379 masm->push(arg);
380}
381
382#ifndef PRODUCT
383extern "C" void findpc(intptr_t x);
384#endif
385
386void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
387 // In order to get locks to work, we need to fake a in_VM state
388 JavaThread* thread = JavaThread::current();
389 JavaThreadState saved_state = thread->thread_state();
390 thread->set_thread_state(_thread_in_vm);
391 if (ShowMessageBoxOnError) {
392 JavaThread* thread = JavaThread::current();
393 JavaThreadState saved_state = thread->thread_state();
394 thread->set_thread_state(_thread_in_vm);
395 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
396 ttyLocker ttyl;
397 BytecodeCounter::print();
398 }
399 // To see where a verify_oop failed, get $ebx+40/X for this frame.
400 // This is the value of eip which points to where verify_oop will return.
401 if (os::message_box(msg, "Execution stopped, print registers?")) {
402 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
403 BREAKPOINT::breakpoint();
404 }
405 }
406 fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 406, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0)
;
407}
408
409void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
410 ttyLocker ttyl;
411 FlagSetting fs(Debugging, true);
412 tty->print_cr("eip = 0x%08x", eip);
413#ifndef PRODUCT
414 if ((WizardMode || Verbose) && PrintMiscellaneous) {
415 tty->cr();
416 findpc(eip);
417 tty->cr();
418 }
419#endif
420#define PRINT_REG(rax) \
421 { tty->print("%s = ", #rax); os::print_location(tty, rax); }
422 PRINT_REG(rax);
423 PRINT_REG(rbx);
424 PRINT_REG(rcx);
425 PRINT_REG(rdx);
426 PRINT_REG(rdi);
427 PRINT_REG(rsi);
428 PRINT_REG(rbp);
429 PRINT_REG(rsp);
430#undef PRINT_REG
431 // Print some words near top of staack.
432 int* dump_sp = (int*) rsp;
433 for (int col1 = 0; col1 < 8; col1++) {
434 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
435 os::print_location(tty, *dump_sp++);
436 }
437 for (int row = 0; row < 16; row++) {
438 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
439 for (int col = 0; col < 8; col++) {
440 tty->print(" 0x%08x", *dump_sp++);
441 }
442 tty->cr();
443 }
444 // Print some instructions around pc:
445 Disassembler::decode((address)eip-64, (address)eip);
446 tty->print_cr("--------");
447 Disassembler::decode((address)eip, (address)eip+32);
448}
449
450void MacroAssembler::stop(const char* msg) {
451 ExternalAddress message((address)msg);
452 // push address of message
453 pushptr(message.addr());
454 { Label L; call(L, relocInfo::none); bind(L); } // push eip
455 pusha(); // push registers
456 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)((address)((address_word)(MacroAssembler::debug32)))));
457 hlt();
458}
459
460void MacroAssembler::warn(const char* msg) {
461 push_CPU_state();
462
463 ExternalAddress message((address) msg);
464 // push address of message
465 pushptr(message.addr());
466
467 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning)))));
468 addl(rsp, wordSize); // discard argument
469 pop_CPU_state();
470}
471
472void MacroAssembler::print_state() {
473 { Label L; call(L, relocInfo::none); bind(L); } // push eip
474 pusha(); // push registers
475
476 push_CPU_state();
477 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)((address)((address_word)(MacroAssembler::print_state32)))));
478 pop_CPU_state();
479
480 popa();
481 addl(rsp, wordSize);
482}
483
484#else // _LP64
485
486// 64 bit versions
487
488Address MacroAssembler::as_Address(AddressLiteral adr) {
489 // amd64 always does this as a pc-rel
490 // we can be absolute or disp based on the instruction type
491 // jmp/call are displacements others are absolute
492 assert(!adr.is_lval(), "must be rval")do { if (!(!adr.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 492, "assert(" "!adr.is_lval()" ") failed", "must be rval")
; ::breakpoint(); } } while (0)
;
493 assert(reachable(adr), "must be")do { if (!(reachable(adr))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 493, "assert(" "reachable(adr)" ") failed", "must be"); ::breakpoint
(); } } while (0)
;
494 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
495
496}
497
498Address MacroAssembler::as_Address(ArrayAddress adr) {
499 AddressLiteral base = adr.base();
500 lea(rscratch1, base);
501 Address index = adr.index();
502 assert(index._disp == 0, "must not have disp")do { if (!(index._disp == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 502, "assert(" "index._disp == 0" ") failed", "must not have disp"
); ::breakpoint(); } } while (0)
; // maybe it can?
503 Address array(rscratch1, index._index, index._scale, index._disp);
504 return array;
505}
506
507void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
508 Label L, E;
509
510#ifdef _WIN64
511 // Windows always allocates space for it's register args
512 assert(num_args <= 4, "only register arguments supported")do { if (!(num_args <= 4)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 512, "assert(" "num_args <= 4" ") failed", "only register arguments supported"
); ::breakpoint(); } } while (0)
;
513 subq(rsp, frame::arg_reg_save_area_bytes);
514#endif
515
516 // Align stack if necessary
517 testl(rsp, 15);
518 jcc(Assembler::zero, L);
519
520 subq(rsp, 8);
521 {
522 call(RuntimeAddress(entry_point));
523 }
524 addq(rsp, 8);
525 jmp(E);
526
527 bind(L);
528 {
529 call(RuntimeAddress(entry_point));
530 }
531
532 bind(E);
533
534#ifdef _WIN64
535 // restore stack pointer
536 addq(rsp, frame::arg_reg_save_area_bytes);
537#endif
538
539}
540
541void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
542 assert(!src2.is_lval(), "should use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 542, "assert(" "!src2.is_lval()" ") failed", "should use cmpptr"
); ::breakpoint(); } } while (0)
;
543
544 if (reachable(src2)) {
545 cmpq(src1, as_Address(src2));
546 } else {
547 lea(rscratch1, src2);
548 Assembler::cmpq(src1, Address(rscratch1, 0));
549 }
550}
551
552int MacroAssembler::corrected_idivq(Register reg) {
553 // Full implementation of Java ldiv and lrem; checks for special
554 // case as described in JVM spec., p.243 & p.271. The function
555 // returns the (pc) offset of the idivl instruction - may be needed
556 // for implicit exceptions.
557 //
558 // normal case special case
559 //
560 // input : rax: dividend min_long
561 // reg: divisor (may not be eax/edx) -1
562 //
563 // output: rax: quotient (= rax idiv reg) min_long
564 // rdx: remainder (= rax irem reg) 0
565 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 565, "assert(" "reg != rax && reg != rdx" ") failed"
, "reg cannot be rax or rdx register"); ::breakpoint(); } } while
(0)
;
566 static const int64_t min_long = 0x8000000000000000;
567 Label normal_case, special_case;
568
569 // check for special case
570 cmp64(rax, ExternalAddress((address) &min_long));
571 jcc(Assembler::notEqual, normal_case);
572 xorl(rdx, rdx); // prepare rdx for possible special case (where
573 // remainder = 0)
574 cmpq(reg, -1);
575 jcc(Assembler::equal, special_case);
576
577 // handle normal case
578 bind(normal_case);
579 cdqq();
580 int idivq_offset = offset();
581 idivq(reg);
582
583 // normal and special case exit
584 bind(special_case);
585
586 return idivq_offset;
587}
588
589void MacroAssembler::decrementq(Register reg, int value) {
590 if (value == min_jint) { subq(reg, value); return; }
591 if (value < 0) { incrementq(reg, -value); return; }
592 if (value == 0) { ; return; }
593 if (value == 1 && UseIncDec) { decq(reg) ; return; }
594 /* else */ { subq(reg, value) ; return; }
595}
596
597void MacroAssembler::decrementq(Address dst, int value) {
598 if (value == min_jint) { subq(dst, value); return; }
599 if (value < 0) { incrementq(dst, -value); return; }
600 if (value == 0) { ; return; }
601 if (value == 1 && UseIncDec) { decq(dst) ; return; }
602 /* else */ { subq(dst, value) ; return; }
603}
604
605void MacroAssembler::incrementq(AddressLiteral dst) {
606 if (reachable(dst)) {
607 incrementq(as_Address(dst));
608 } else {
609 lea(rscratch1, dst);
610 incrementq(Address(rscratch1, 0));
611 }
612}
613
614void MacroAssembler::incrementq(Register reg, int value) {
615 if (value == min_jint) { addq(reg, value); return; }
616 if (value < 0) { decrementq(reg, -value); return; }
617 if (value == 0) { ; return; }
618 if (value == 1 && UseIncDec) { incq(reg) ; return; }
619 /* else */ { addq(reg, value) ; return; }
620}
621
622void MacroAssembler::incrementq(Address dst, int value) {
623 if (value == min_jint) { addq(dst, value); return; }
624 if (value < 0) { decrementq(dst, -value); return; }
625 if (value == 0) { ; return; }
626 if (value == 1 && UseIncDec) { incq(dst) ; return; }
627 /* else */ { addq(dst, value) ; return; }
628}
629
630// 32bit can do a case table jump in one instruction but we no longer allow the base
631// to be installed in the Address class
632void MacroAssembler::jump(ArrayAddress entry) {
633 lea(rscratch1, entry.base());
634 Address dispatch = entry.index();
635 assert(dispatch._base == noreg, "must be")do { if (!(dispatch._base == noreg)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 635, "assert(" "dispatch._base == noreg" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
636 dispatch._base = rscratch1;
637 jmp(dispatch);
638}
639
640void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
641 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 641); ::breakpoint(); } while (0)
; // 64bit doesn't use two regs
642 cmpq(x_lo, y_lo);
643}
644
645void MacroAssembler::lea(Register dst, AddressLiteral src) {
646 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
647}
648
649void MacroAssembler::lea(Address dst, AddressLiteral adr) {
650 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
651 movptr(dst, rscratch1);
652}
653
654void MacroAssembler::leave() {
655 // %%% is this really better? Why not on 32bit too?
656 emit_int8((unsigned char)0xC9); // LEAVE
657}
658
659void MacroAssembler::lneg(Register hi, Register lo) {
660 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 660); ::breakpoint(); } while (0)
; // 64bit doesn't use two regs
661 negq(lo);
662}
663
664void MacroAssembler::movoop(Register dst, jobject obj) {
665 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
666}
667
668void MacroAssembler::movoop(Address dst, jobject obj) {
669 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
670 movq(dst, rscratch1);
671}
672
673void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
674 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
675}
676
677void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
678 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
679 movq(dst, rscratch1);
680}
681
682void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
683 if (src.is_lval()) {
684 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
685 } else {
686 if (reachable(src)) {
687 movq(dst, as_Address(src));
688 } else {
689 lea(scratch, src);
690 movq(dst, Address(scratch, 0));
691 }
692 }
693}
694
695void MacroAssembler::movptr(ArrayAddress dst, Register src) {
696 movq(as_Address(dst), src);
697}
698
699void MacroAssembler::movptr(Register dst, ArrayAddress src) {
700 movq(dst, as_Address(src));
701}
702
703// src should NEVER be a real pointer. Use AddressLiteral for true pointers
704void MacroAssembler::movptr(Address dst, intptr_t src) {
705 if (is_simm32(src)) {
706 movptr(dst, checked_cast<int32_t>(src));
707 } else {
708 mov64(rscratch1, src);
709 movq(dst, rscratch1);
710 }
711}
712
713// These are mostly for initializing NULL
714void MacroAssembler::movptr(Address dst, int32_t src) {
715 movslq(dst, src);
716}
717
718void MacroAssembler::movptr(Register dst, int32_t src) {
719 mov64(dst, (intptr_t)src);
720}
721
722void MacroAssembler::pushoop(jobject obj) {
723 movoop(rscratch1, obj);
724 push(rscratch1);
725}
726
727void MacroAssembler::pushklass(Metadata* obj) {
728 mov_metadata(rscratch1, obj);
729 push(rscratch1);
730}
731
732void MacroAssembler::pushptr(AddressLiteral src) {
733 lea(rscratch1, src);
734 if (src.is_lval()) {
735 push(rscratch1);
736 } else {
737 pushq(Address(rscratch1, 0));
738 }
739}
740
741void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
742 reset_last_Java_frame(r15_thread, clear_fp);
743}
744
745void MacroAssembler::set_last_Java_frame(Register last_java_sp,
746 Register last_java_fp,
747 address last_java_pc) {
748 vzeroupper();
749 // determine last_java_sp register
750 if (!last_java_sp->is_valid()) {
751 last_java_sp = rsp;
752 }
753
754 // last_java_fp is optional
755 if (last_java_fp->is_valid()) {
756 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
757 last_java_fp);
758 }
759
760 // last_java_pc is optional
761 if (last_java_pc != NULL__null) {
762 Address java_pc(r15_thread,
763 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
764 lea(rscratch1, InternalAddress(last_java_pc));
765 movptr(java_pc, rscratch1);
766 }
767
768 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
769}
770
771static void pass_arg0(MacroAssembler* masm, Register arg) {
772 if (c_rarg0 != arg ) {
773 masm->mov(c_rarg0, arg);
774 }
775}
776
777static void pass_arg1(MacroAssembler* masm, Register arg) {
778 if (c_rarg1 != arg ) {
779 masm->mov(c_rarg1, arg);
780 }
781}
782
783static void pass_arg2(MacroAssembler* masm, Register arg) {
784 if (c_rarg2 != arg ) {
785 masm->mov(c_rarg2, arg);
786 }
787}
788
789static void pass_arg3(MacroAssembler* masm, Register arg) {
790 if (c_rarg3 != arg ) {
791 masm->mov(c_rarg3, arg);
792 }
793}
794
795void MacroAssembler::stop(const char* msg) {
796 if (ShowMessageBoxOnError) {
797 address rip = pc();
798 pusha(); // get regs on stack
799 lea(c_rarg1, InternalAddress(rip));
800 movq(c_rarg2, rsp); // pass pointer to regs array
801 }
802 lea(c_rarg0, ExternalAddress((address) msg));
803 andq(rsp, -16); // align stack as required by ABI
804 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64)))));
805 hlt();
806}
807
808void MacroAssembler::warn(const char* msg) {
809 push(rbp);
810 movq(rbp, rsp);
811 andq(rsp, -16); // align stack as required by push_CPU_state and call
812 push_CPU_state(); // keeps alignment at 16 bytes
813 lea(c_rarg0, ExternalAddress((address) msg));
814 lea(rax, ExternalAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning)))));
815 call(rax);
816 pop_CPU_state();
817 mov(rsp, rbp);
818 pop(rbp);
819}
820
821void MacroAssembler::print_state() {
822 address rip = pc();
823 pusha(); // get regs on stack
824 push(rbp);
825 movq(rbp, rsp);
826 andq(rsp, -16); // align stack as required by push_CPU_state and call
827 push_CPU_state(); // keeps alignment at 16 bytes
828
829 lea(c_rarg0, InternalAddress(rip));
830 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
831 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64)((address)((address_word)(MacroAssembler::print_state64))), c_rarg0, c_rarg1);
832
833 pop_CPU_state();
834 mov(rsp, rbp);
835 pop(rbp);
836 popa();
837}
838
839#ifndef PRODUCT
840extern "C" void findpc(intptr_t x);
841#endif
842
843void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
844 // In order to get locks to work, we need to fake a in_VM state
845 if (ShowMessageBoxOnError) {
846 JavaThread* thread = JavaThread::current();
847 JavaThreadState saved_state = thread->thread_state();
848 thread->set_thread_state(_thread_in_vm);
849#ifndef PRODUCT
850 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
851 ttyLocker ttyl;
852 BytecodeCounter::print();
853 }
854#endif
855 // To see where a verify_oop failed, get $ebx+40/X for this frame.
856 // XXX correct this offset for amd64
857 // This is the value of eip which points to where verify_oop will return.
858 if (os::message_box(msg, "Execution stopped, print registers?")) {
859 print_state64(pc, regs);
860 BREAKPOINT::breakpoint();
861 }
862 }
863 fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 863, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0)
;
864}
865
866void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
867 ttyLocker ttyl;
868 FlagSetting fs(Debugging, true);
869 tty->print_cr("rip = 0x%016lx", (intptr_t)pc);
870#ifndef PRODUCT
871 tty->cr();
872 findpc(pc);
873 tty->cr();
874#endif
875#define PRINT_REG(rax, value) \
876 { tty->print("%s = ", #rax); os::print_location(tty, value); }
877 PRINT_REG(rax, regs[15]);
878 PRINT_REG(rbx, regs[12]);
879 PRINT_REG(rcx, regs[14]);
880 PRINT_REG(rdx, regs[13]);
881 PRINT_REG(rdi, regs[8]);
882 PRINT_REG(rsi, regs[9]);
883 PRINT_REG(rbp, regs[10]);
884 // rsp is actually not stored by pusha(), compute the old rsp from regs (rsp after pusha): regs + 16 = old rsp
885 PRINT_REG(rsp, (intptr_t)(&regs[16]));
886 PRINT_REG(r8 , regs[7]);
887 PRINT_REG(r9 , regs[6]);
888 PRINT_REG(r10, regs[5]);
889 PRINT_REG(r11, regs[4]);
890 PRINT_REG(r12, regs[3]);
891 PRINT_REG(r13, regs[2]);
892 PRINT_REG(r14, regs[1]);
893 PRINT_REG(r15, regs[0]);
894#undef PRINT_REG
895 // Print some words near the top of the stack.
896 int64_t* rsp = &regs[16];
897 int64_t* dump_sp = rsp;
898 for (int col1 = 0; col1 < 8; col1++) {
899 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
900 os::print_location(tty, *dump_sp++);
901 }
902 for (int row = 0; row < 25; row++) {
903 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
904 for (int col = 0; col < 4; col++) {
905 tty->print(" 0x%016lx", (intptr_t)*dump_sp++);
906 }
907 tty->cr();
908 }
909 // Print some instructions around pc:
910 Disassembler::decode((address)pc-64, (address)pc);
911 tty->print_cr("--------");
912 Disassembler::decode((address)pc, (address)pc+32);
913}
914
915// The java_calling_convention describes stack locations as ideal slots on
916// a frame with no abi restrictions. Since we must observe abi restrictions
917// (like the placement of the register window) the slots must be biased by
918// the following value.
919static int reg2offset_in(VMReg r) {
920 // Account for saved rbp and return address
921 // This should really be in_preserve_stack_slots
922 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
923}
924
925static int reg2offset_out(VMReg r) {
926 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
927}
928
929// A long move
930void MacroAssembler::long_move(VMRegPair src, VMRegPair dst) {
931
932 // The calling conventions assures us that each VMregpair is either
933 // all really one physical register or adjacent stack slots.
934
935 if (src.is_single_phys_reg() ) {
936 if (dst.is_single_phys_reg()) {
937 if (dst.first() != src.first()) {
938 mov(dst.first()->as_Register(), src.first()->as_Register());
939 }
940 } else {
941 assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 941, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
942 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
943 }
944 } else if (dst.is_single_phys_reg()) {
945 assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 945, "assert(" "src.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
946 movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
947 } else {
948 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg()
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 948, "assert(" "src.is_single_reg() && dst.is_single_reg()"
") failed", "not stack pairs"); ::breakpoint(); } } while (0
)
;
949 movq(rax, Address(rbp, reg2offset_in(src.first())));
950 movq(Address(rsp, reg2offset_out(dst.first())), rax);
951 }
952}
953
954// A double move
955void MacroAssembler::double_move(VMRegPair src, VMRegPair dst) {
956
957 // The calling conventions assures us that each VMregpair is either
958 // all really one physical register or adjacent stack slots.
959
960 if (src.is_single_phys_reg() ) {
961 if (dst.is_single_phys_reg()) {
962 // In theory these overlap but the ordering is such that this is likely a nop
963 if ( src.first() != dst.first()) {
964 movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
965 }
966 } else {
967 assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 967, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
968 movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
969 }
970 } else if (dst.is_single_phys_reg()) {
971 assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 971, "assert(" "src.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
972 movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first())));
973 } else {
974 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg()
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 974, "assert(" "src.is_single_reg() && dst.is_single_reg()"
") failed", "not stack pairs"); ::breakpoint(); } } while (0
)
;
975 movq(rax, Address(rbp, reg2offset_in(src.first())));
976 movq(Address(rsp, reg2offset_out(dst.first())), rax);
977 }
978}
979
980
981// A float arg may have to do float reg int reg conversion
982void MacroAssembler::float_move(VMRegPair src, VMRegPair dst) {
983 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move")do { if (!(!src.second()->is_valid() && !dst.second
()->is_valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 983, "assert(" "!src.second()->is_valid() && !dst.second()->is_valid()"
") failed", "bad float_move"); ::breakpoint(); } } while (0)
;
984
985 // The calling conventions assures us that each VMregpair is either
986 // all really one physical register or adjacent stack slots.
987
988 if (src.first()->is_stack()) {
989 if (dst.first()->is_stack()) {
990 movl(rax, Address(rbp, reg2offset_in(src.first())));
991 movptr(Address(rsp, reg2offset_out(dst.first())), rax);
992 } else {
993 // stack to reg
994 assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(dst.first()->is_XMMRegister())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 994, "assert(" "dst.first()->is_XMMRegister()" ") failed"
, "only expect xmm registers as parameters"); ::breakpoint();
} } while (0)
;
995 movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first())));
996 }
997 } else if (dst.first()->is_stack()) {
998 // reg to stack
999 assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(src.first()->is_XMMRegister())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 999, "assert(" "src.first()->is_XMMRegister()" ") failed"
, "only expect xmm registers as parameters"); ::breakpoint();
} } while (0)
;
1000 movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1001 } else {
1002 // reg to reg
1003 // In theory these overlap but the ordering is such that this is likely a nop
1004 if ( src.first() != dst.first()) {
1005 movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
1006 }
1007 }
1008}
1009
1010// On 64 bit we will store integer like items to the stack as
1011// 64 bits items (x86_32/64 abi) even though java would only store
1012// 32bits for a parameter. On 32bit it will simply be 32 bits
1013// So this routine will do 32->32 on 32bit and 32->64 on 64bit
1014void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst) {
1015 if (src.first()->is_stack()) {
1016 if (dst.first()->is_stack()) {
1017 // stack to stack
1018 movslq(rax, Address(rbp, reg2offset_in(src.first())));
1019 movq(Address(rsp, reg2offset_out(dst.first())), rax);
1020 } else {
1021 // stack to reg
1022 movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
1023 }
1024 } else if (dst.first()->is_stack()) {
1025 // reg to stack
1026 // Do we really have to sign extend???
1027 // __ movslq(src.first()->as_Register(), src.first()->as_Register());
1028 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1029 } else {
1030 // Do we really have to sign extend???
1031 // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
1032 if (dst.first() != src.first()) {
1033 movq(dst.first()->as_Register(), src.first()->as_Register());
1034 }
1035 }
1036}
1037
1038void MacroAssembler::move_ptr(VMRegPair src, VMRegPair dst) {
1039 if (src.first()->is_stack()) {
1040 if (dst.first()->is_stack()) {
1041 // stack to stack
1042 movq(rax, Address(rbp, reg2offset_in(src.first())));
1043 movq(Address(rsp, reg2offset_out(dst.first())), rax);
1044 } else {
1045 // stack to reg
1046 movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
1047 }
1048 } else if (dst.first()->is_stack()) {
1049 // reg to stack
1050 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1051 } else {
1052 if (dst.first() != src.first()) {
1053 movq(dst.first()->as_Register(), src.first()->as_Register());
1054 }
1055 }
1056}
1057
1058// An oop arg. Must pass a handle not the oop itself
1059void MacroAssembler::object_move(OopMap* map,
1060 int oop_handle_offset,
1061 int framesize_in_slots,
1062 VMRegPair src,
1063 VMRegPair dst,
1064 bool is_receiver,
1065 int* receiver_offset) {
1066
1067 // must pass a handle. First figure out the location we use as a handle
1068
1069 Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register();
1070
1071 // See if oop is NULL if it is we need no handle
1072
1073 if (src.first()->is_stack()) {
1074
1075 // Oop is already on the stack as an argument
1076 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1077 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1078 if (is_receiver) {
1079 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1080 }
1081
1082 cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD0L);
1083 lea(rHandle, Address(rbp, reg2offset_in(src.first())));
1084 // conditionally move a NULL
1085 cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first())));
1086 } else {
1087
1088 // Oop is in an a register we must store it to the space we reserve
1089 // on the stack for oop_handles and pass a handle if oop is non-NULL
1090
1091 const Register rOop = src.first()->as_Register();
1092 int oop_slot;
1093 if (rOop == j_rarg0)
1094 oop_slot = 0;
1095 else if (rOop == j_rarg1)
1096 oop_slot = 1;
1097 else if (rOop == j_rarg2)
1098 oop_slot = 2;
1099 else if (rOop == j_rarg3)
1100 oop_slot = 3;
1101 else if (rOop == j_rarg4)
1102 oop_slot = 4;
1103 else {
1104 assert(rOop == j_rarg5, "wrong register")do { if (!(rOop == j_rarg5)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1104, "assert(" "rOop == j_rarg5" ") failed", "wrong register"
); ::breakpoint(); } } while (0)
;
1105 oop_slot = 5;
1106 }
1107
1108 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
1109 int offset = oop_slot*VMRegImpl::stack_slot_size;
1110
1111 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1112 // Store oop in handle area, may be NULL
1113 movptr(Address(rsp, offset), rOop);
1114 if (is_receiver) {
1115 *receiver_offset = offset;
1116 }
1117
1118 cmpptr(rOop, (int32_t)NULL_WORD0L);
1119 lea(rHandle, Address(rsp, offset));
1120 // conditionally move a NULL from the handle area where it was just stored
1121 cmovptr(Assembler::equal, rHandle, Address(rsp, offset));
1122 }
1123
1124 // If arg is on the stack then place it otherwise it is already in correct reg.
1125 if (dst.first()->is_stack()) {
1126 movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1127 }
1128}
1129
1130#endif // _LP64
1131
1132// Now versions that are common to 32/64 bit
1133
1134void MacroAssembler::addptr(Register dst, int32_t imm32) {
1135 LP64_ONLY(addq(dst, imm32))addq(dst, imm32) NOT_LP64(addl(dst, imm32));
1136}
1137
1138void MacroAssembler::addptr(Register dst, Register src) {
1139 LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src));
1140}
1141
1142void MacroAssembler::addptr(Address dst, Register src) {
1143 LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src));
1144}
1145
1146void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
1147 if (reachable(src)) {
1148 Assembler::addsd(dst, as_Address(src));
1149 } else {
1150 lea(rscratch1, src);
1151 Assembler::addsd(dst, Address(rscratch1, 0));
1152 }
1153}
1154
1155void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
1156 if (reachable(src)) {
1157 addss(dst, as_Address(src));
1158 } else {
1159 lea(rscratch1, src);
1160 addss(dst, Address(rscratch1, 0));
1161 }
1162}
1163
1164void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) {
1165 if (reachable(src)) {
1166 Assembler::addpd(dst, as_Address(src));
1167 } else {
1168 lea(rscratch1, src);
1169 Assembler::addpd(dst, Address(rscratch1, 0));
1170 }
1171}
1172
1173// See 8273459. Function for ensuring 64-byte alignment, intended for stubs only.
1174// Stub code is generated once and never copied.
1175// NMethods can't use this because they get copied and we can't force alignment > 32 bytes.
1176void MacroAssembler::align64() {
1177 align(64, (unsigned long long) pc());
1178}
1179
1180void MacroAssembler::align32() {
1181 align(32, (unsigned long long) pc());
1182}
1183
1184void MacroAssembler::align(int modulus) {
1185 // 8273459: Ensure alignment is possible with current segment alignment
1186 assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment")do { if (!(modulus <= CodeEntryAlignment)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1186, "assert(" "modulus <= CodeEntryAlignment" ") failed"
, "Alignment must be <= CodeEntryAlignment"); ::breakpoint
(); } } while (0)
;
1187 align(modulus, offset());
1188}
1189
1190void MacroAssembler::align(int modulus, int target) {
1191 if (target % modulus != 0) {
1192 nop(modulus - (target % modulus));
1193 }
1194}
1195
1196void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
1197 // Used in sign-masking with aligned address.
1198 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1198, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
1199 if (reachable(src)) {
1200 Assembler::andpd(dst, as_Address(src));
1201 } else {
1202 lea(scratch_reg, src);
1203 Assembler::andpd(dst, Address(scratch_reg, 0));
1204 }
1205}
1206
1207void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
1208 // Used in sign-masking with aligned address.
1209 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1209, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
1210 if (reachable(src)) {
1211 Assembler::andps(dst, as_Address(src));
1212 } else {
1213 lea(scratch_reg, src);
1214 Assembler::andps(dst, Address(scratch_reg, 0));
1215 }
1216}
1217
1218void MacroAssembler::andptr(Register dst, int32_t imm32) {
1219 LP64_ONLY(andq(dst, imm32))andq(dst, imm32) NOT_LP64(andl(dst, imm32));
1220}
1221
1222void MacroAssembler::atomic_incl(Address counter_addr) {
1223 lock();
1224 incrementl(counter_addr);
1225}
1226
1227void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
1228 if (reachable(counter_addr)) {
1229 atomic_incl(as_Address(counter_addr));
1230 } else {
1231 lea(scr, counter_addr);
1232 atomic_incl(Address(scr, 0));
1233 }
1234}
1235
1236#ifdef _LP641
1237void MacroAssembler::atomic_incq(Address counter_addr) {
1238 lock();
1239 incrementq(counter_addr);
1240}
1241
1242void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1243 if (reachable(counter_addr)) {
1244 atomic_incq(as_Address(counter_addr));
1245 } else {
1246 lea(scr, counter_addr);
1247 atomic_incq(Address(scr, 0));
1248 }
1249}
1250#endif
1251
1252// Writes to stack successive pages until offset reached to check for
1253// stack overflow + shadow pages. This clobbers tmp.
1254void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1255 movptr(tmp, rsp);
1256 // Bang stack for total size given plus shadow page size.
1257 // Bang one page at a time because large size can bang beyond yellow and
1258 // red zones.
1259 Label loop;
1260 bind(loop);
1261 movl(Address(tmp, (-os::vm_page_size())), size );
1262 subptr(tmp, os::vm_page_size());
1263 subl(size, os::vm_page_size());
1264 jcc(Assembler::greater, loop);
1265
1266 // Bang down shadow pages too.
1267 // At this point, (tmp-0) is the last address touched, so don't
1268 // touch it again. (It was touched as (tmp-pagesize) but then tmp
1269 // was post-decremented.) Skip this address by starting at i=1, and
1270 // touch a few more pages below. N.B. It is important to touch all
1271 // the way down including all pages in the shadow zone.
1272 for (int i = 1; i < ((int)StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); i++) {
1273 // this could be any sized move but this is can be a debugging crumb
1274 // so the bigger the better.
1275 movptr(Address(tmp, (-i*os::vm_page_size())), size );
1276 }
1277}
1278
1279void MacroAssembler::reserved_stack_check() {
1280 // testing if reserved zone needs to be enabled
1281 Label no_reserved_zone_enabling;
1282 Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread)r15_thread;
1283 NOT_LP64(get_thread(rsi);)
1284
1285 cmpptr(rsp, Address(thread, JavaThread::reserved_stack_activation_offset()));
1286 jcc(Assembler::below, no_reserved_zone_enabling);
1287
1288 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)((address)((address_word)(SharedRuntime::enable_stack_reserved_zone
)))
, thread);
1289 jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
1290 should_not_reach_here();
1291
1292 bind(no_reserved_zone_enabling);
1293}
1294
1295void MacroAssembler::c2bool(Register x) {
1296 // implements x == 0 ? 0 : 1
1297 // note: must only look at least-significant byte of x
1298 // since C-style booleans are stored in one byte
1299 // only! (was bug)
1300 andl(x, 0xFF);
1301 setb(Assembler::notZero, x);
1302}
1303
1304// Wouldn't need if AddressLiteral version had new name
1305void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
1306 Assembler::call(L, rtype);
1307}
1308
1309void MacroAssembler::call(Register entry) {
1310 Assembler::call(entry);
1311}
1312
1313void MacroAssembler::call(AddressLiteral entry) {
1314 if (reachable(entry)) {
1315 Assembler::call_literal(entry.target(), entry.rspec());
1316 } else {
1317 lea(rscratch1, entry);
1318 Assembler::call(rscratch1);
1319 }
1320}
1321
1322void MacroAssembler::ic_call(address entry, jint method_index) {
1323 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
1324 movptr(rax, (intptr_t)Universe::non_oop_word());
1325 call(AddressLiteral(entry, rh));
1326}
1327
1328// Implementation of call_VM versions
1329
1330void MacroAssembler::call_VM(Register oop_result,
1331 address entry_point,
1332 bool check_exceptions) {
1333 Label C, E;
1334 call(C, relocInfo::none);
1335 jmp(E);
1336
1337 bind(C);
1338 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1339 ret(0);
1340
1341 bind(E);
1342}
1343
1344void MacroAssembler::call_VM(Register oop_result,
1345 address entry_point,
1346 Register arg_1,
1347 bool check_exceptions) {
1348 Label C, E;
1349 call(C, relocInfo::none);
1350 jmp(E);
1351
1352 bind(C);
1353 pass_arg1(this, arg_1);
1354 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1355 ret(0);
1356
1357 bind(E);
1358}
1359
1360void MacroAssembler::call_VM(Register oop_result,
1361 address entry_point,
1362 Register arg_1,
1363 Register arg_2,
1364 bool check_exceptions) {
1365 Label C, E;
1366 call(C, relocInfo::none);
1367 jmp(E);
1368
1369 bind(C);
1370
1371 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1371, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1372
1373 pass_arg2(this, arg_2);
1374 pass_arg1(this, arg_1);
1375 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1376 ret(0);
1377
1378 bind(E);
1379}
1380
1381void MacroAssembler::call_VM(Register oop_result,
1382 address entry_point,
1383 Register arg_1,
1384 Register arg_2,
1385 Register arg_3,
1386 bool check_exceptions) {
1387 Label C, E;
1388 call(C, relocInfo::none);
1389 jmp(E);
1390
1391 bind(C);
1392
1393 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1393, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1394 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1394, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1395 pass_arg3(this, arg_3);
1396
1397 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1397, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1398 pass_arg2(this, arg_2);
1399
1400 pass_arg1(this, arg_1);
1401 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1402 ret(0);
1403
1404 bind(E);
1405}
1406
1407void MacroAssembler::call_VM(Register oop_result,
1408 Register last_java_sp,
1409 address entry_point,
1410 int number_of_arguments,
1411 bool check_exceptions) {
1412 Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg);
1413 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1414}
1415
1416void MacroAssembler::call_VM(Register oop_result,
1417 Register last_java_sp,
1418 address entry_point,
1419 Register arg_1,
1420 bool check_exceptions) {
1421 pass_arg1(this, arg_1);
1422 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1423}
1424
1425void MacroAssembler::call_VM(Register oop_result,
1426 Register last_java_sp,
1427 address entry_point,
1428 Register arg_1,
1429 Register arg_2,
1430 bool check_exceptions) {
1431
1432 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1432, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1433 pass_arg2(this, arg_2);
1434 pass_arg1(this, arg_1);
1435 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1436}
1437
1438void MacroAssembler::call_VM(Register oop_result,
1439 Register last_java_sp,
1440 address entry_point,
1441 Register arg_1,
1442 Register arg_2,
1443 Register arg_3,
1444 bool check_exceptions) {
1445 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1445, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1446 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1446, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1447 pass_arg3(this, arg_3);
1448 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1448, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1449 pass_arg2(this, arg_2);
1450 pass_arg1(this, arg_1);
1451 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1452}
1453
1454void MacroAssembler::super_call_VM(Register oop_result,
1455 Register last_java_sp,
1456 address entry_point,
1457 int number_of_arguments,
1458 bool check_exceptions) {
1459 Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg);
1460 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1461}
1462
1463void MacroAssembler::super_call_VM(Register oop_result,
1464 Register last_java_sp,
1465 address entry_point,
1466 Register arg_1,
1467 bool check_exceptions) {
1468 pass_arg1(this, arg_1);
1469 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1470}
1471
1472void MacroAssembler::super_call_VM(Register oop_result,
1473 Register last_java_sp,
1474 address entry_point,
1475 Register arg_1,
1476 Register arg_2,
1477 bool check_exceptions) {
1478
1479 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1479, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1480 pass_arg2(this, arg_2);
1481 pass_arg1(this, arg_1);
1482 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1483}
1484
1485void MacroAssembler::super_call_VM(Register oop_result,
1486 Register last_java_sp,
1487 address entry_point,
1488 Register arg_1,
1489 Register arg_2,
1490 Register arg_3,
1491 bool check_exceptions) {
1492 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1492, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1493 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1493, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1494 pass_arg3(this, arg_3);
1495 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1495, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1496 pass_arg2(this, arg_2);
1497 pass_arg1(this, arg_1);
1498 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1499}
1500
1501void MacroAssembler::call_VM_base(Register oop_result,
1502 Register java_thread,
1503 Register last_java_sp,
1504 address entry_point,
1505 int number_of_arguments,
1506 bool check_exceptions) {
1507 // determine java_thread register
1508 if (!java_thread->is_valid()) {
1509#ifdef _LP641
1510 java_thread = r15_thread;
1511#else
1512 java_thread = rdi;
1513 get_thread(java_thread);
1514#endif // LP64
1515 }
1516 // determine last_java_sp register
1517 if (!last_java_sp->is_valid()) {
1518 last_java_sp = rsp;
1519 }
1520 // debugging support
1521 assert(number_of_arguments >= 0 , "cannot have negative number of arguments")do { if (!(number_of_arguments >= 0)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1521, "assert(" "number_of_arguments >= 0" ") failed", "cannot have negative number of arguments"
); ::breakpoint(); } } while (0)
;
1522 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"))do { if (!(java_thread == r15_thread)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1522, "assert(" "java_thread == r15_thread" ") failed", "unexpected register"
); ::breakpoint(); } } while (0)
;
1523#ifdef ASSERT1
1524 // TraceBytecodes does not use r12 but saves it over the call, so don't verify
1525 // r12 is the heapbase.
1526 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)if (UseCompressedOops && !TraceBytecodes) verify_heapbase
("call_VM_base: heap base corrupted?");
1527#endif // ASSERT
1528
1529 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result")do { if (!(java_thread != oop_result)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1529, "assert(" "java_thread != oop_result" ") failed", "cannot use the same register for java_thread & oop_result"
); ::breakpoint(); } } while (0)
;
1530 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp")do { if (!(java_thread != last_java_sp)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1530, "assert(" "java_thread != last_java_sp" ") failed", "cannot use the same register for java_thread & last_java_sp"
); ::breakpoint(); } } while (0)
;
1531
1532 // push java thread (becomes first argument of C function)
1533
1534 NOT_LP64(push(java_thread); number_of_arguments++);
1535 LP64_ONLY(mov(c_rarg0, r15_thread))mov(c_rarg0, r15_thread);
1536
1537 // set last Java frame before call
1538 assert(last_java_sp != rbp, "can't use ebp/rbp")do { if (!(last_java_sp != rbp)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1538, "assert(" "last_java_sp != rbp" ") failed", "can't use ebp/rbp"
); ::breakpoint(); } } while (0)
;
1539
1540 // Only interpreter should have to set fp
1541 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL__null);
1542
1543 // do the call, remove parameters
1544 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
1545
1546 // restore the thread (cannot use the pushed argument since arguments
1547 // may be overwritten by C code generated by an optimizing compiler);
1548 // however can use the register value directly if it is callee saved.
1549 if (LP64_ONLY(true ||)true || java_thread == rdi || java_thread == rsi) {
1550 // rdi & rsi (also r15) are callee saved -> nothing to do
1551#ifdef ASSERT1
1552 guarantee(java_thread != rax, "change this code")do { if (!(java_thread != rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1552, "guarantee(" "java_thread != rax" ") failed", "change this code"
); ::breakpoint(); } } while (0)
;
1553 push(rax);
1554 { Label L;
1555 get_thread(rax);
1556 cmpptr(java_thread, rax);
1557 jcc(Assembler::equal, L);
1558 STOP("MacroAssembler::call_VM_base: rdi not callee saved?")block_comment("MacroAssembler::call_VM_base: rdi not callee saved?"
); stop("MacroAssembler::call_VM_base: rdi not callee saved?"
)
;
1559 bind(L);
1560 }
1561 pop(rax);
1562#endif
1563 } else {
1564 get_thread(java_thread);
1565 }
1566 // reset last Java frame
1567 // Only interpreter should have to clear fp
1568 reset_last_Java_frame(java_thread, true);
1569
1570 // C++ interp handles this in the interpreter
1571 check_and_handle_popframe(java_thread);
1572 check_and_handle_earlyret(java_thread);
1573
1574 if (check_exceptions) {
1575 // check for pending exceptions (java_thread is set upon return)
1576 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD0L);
1577#ifndef _LP641
1578 jump_cc(Assembler::notEqual,
1579 RuntimeAddress(StubRoutines::forward_exception_entry()));
1580#else
1581 // This used to conditionally jump to forward_exception however it is
1582 // possible if we relocate that the branch will not reach. So we must jump
1583 // around so we can always reach
1584
1585 Label ok;
1586 jcc(Assembler::equal, ok);
1587 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1588 bind(ok);
1589#endif // LP64
1590 }
1591
1592 // get oop result if there is one and reset the value in the thread
1593 if (oop_result->is_valid()) {
1594 get_vm_result(oop_result, java_thread);
1595 }
1596}
1597
1598void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1599
1600 // Calculate the value for last_Java_sp
1601 // somewhat subtle. call_VM does an intermediate call
1602 // which places a return address on the stack just under the
1603 // stack pointer as the user finsihed with it. This allows
1604 // use to retrieve last_Java_pc from last_Java_sp[-1].
1605 // On 32bit we then have to push additional args on the stack to accomplish
1606 // the actual requested call. On 64bit call_VM only can use register args
1607 // so the only extra space is the return address that call_VM created.
1608 // This hopefully explains the calculations here.
1609
1610#ifdef _LP641
1611 // We've pushed one address, correct last_Java_sp
1612 lea(rax, Address(rsp, wordSize));
1613#else
1614 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
1615#endif // LP64
1616
1617 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
1618
1619}
1620
1621// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
1622void MacroAssembler::call_VM_leaf0(address entry_point) {
1623 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1624}
1625
1626void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1627 call_VM_leaf_base(entry_point, number_of_arguments);
1628}
1629
1630void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1631 pass_arg0(this, arg_0);
1632 call_VM_leaf(entry_point, 1);
1633}
1634
1635void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1636
1637 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1637, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1638 pass_arg1(this, arg_1);
1639 pass_arg0(this, arg_0);
1640 call_VM_leaf(entry_point, 2);
1641}
1642
1643void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1644 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1644, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1645 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1645, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1646 pass_arg2(this, arg_2);
1647 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1647, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1648 pass_arg1(this, arg_1);
1649 pass_arg0(this, arg_0);
1650 call_VM_leaf(entry_point, 3);
1651}
1652
1653void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1654 pass_arg0(this, arg_0);
1655 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1656}
1657
1658void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1659
1660 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1660, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1661 pass_arg1(this, arg_1);
1662 pass_arg0(this, arg_0);
1663 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1664}
1665
1666void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1667 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1667, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1668 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1668, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1669 pass_arg2(this, arg_2);
1670 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1670, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1671 pass_arg1(this, arg_1);
1672 pass_arg0(this, arg_0);
1673 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1674}
1675
1676void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1677 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"))do { if (!(arg_0 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1677, "assert(" "arg_0 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1678 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1678, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1679 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1679, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1680 pass_arg3(this, arg_3);
1681 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1681, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1682 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1682, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1683 pass_arg2(this, arg_2);
1684 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1684, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1685 pass_arg1(this, arg_1);
1686 pass_arg0(this, arg_0);
1687 MacroAssembler::call_VM_leaf_base(entry_point, 4);
1688}
1689
1690void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
1691 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
1692 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD0L);
1693 verify_oop_msg(oop_result, "broken oop in call_VM_base")_verify_oop_checked(oop_result, "broken oop " "oop_result" ", "
"\"broken oop in call_VM_base\"", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1693)
;
1694}
1695
1696void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
1697 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
1698 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD0L);
1699}
1700
1701void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1702}
1703
1704void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1705}
1706
1707void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
1708 if (reachable(src1)) {
1709 cmpl(as_Address(src1), imm);
1710 } else {
1711 lea(rscratch1, src1);
1712 cmpl(Address(rscratch1, 0), imm);
1713 }
1714}
1715
1716void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
1717 assert(!src2.is_lval(), "use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1717, "assert(" "!src2.is_lval()" ") failed", "use cmpptr")
; ::breakpoint(); } } while (0)
;
1718 if (reachable(src2)) {
1719 cmpl(src1, as_Address(src2));
1720 } else {
1721 lea(rscratch1, src2);
1722 cmpl(src1, Address(rscratch1, 0));
1723 }
1724}
1725
1726void MacroAssembler::cmp32(Register src1, int32_t imm) {
1727 Assembler::cmpl(src1, imm);
1728}
1729
1730void MacroAssembler::cmp32(Register src1, Address src2) {
1731 Assembler::cmpl(src1, src2);
1732}
1733
1734void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
1735 ucomisd(opr1, opr2);
1736
1737 Label L;
1738 if (unordered_is_less) {
1739 movl(dst, -1);
1740 jcc(Assembler::parity, L);
1741 jcc(Assembler::below , L);
1742 movl(dst, 0);
1743 jcc(Assembler::equal , L);
1744 increment(dst);
1745 } else { // unordered is greater
1746 movl(dst, 1);
1747 jcc(Assembler::parity, L);
1748 jcc(Assembler::above , L);
1749 movl(dst, 0);
1750 jcc(Assembler::equal , L);
1751 decrementl(dst);
1752 }
1753 bind(L);
1754}
1755
1756void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
1757 ucomiss(opr1, opr2);
1758
1759 Label L;
1760 if (unordered_is_less) {
1761 movl(dst, -1);
1762 jcc(Assembler::parity, L);
1763 jcc(Assembler::below , L);
1764 movl(dst, 0);
1765 jcc(Assembler::equal , L);
1766 increment(dst);
1767 } else { // unordered is greater
1768 movl(dst, 1);
1769 jcc(Assembler::parity, L);
1770 jcc(Assembler::above , L);
1771 movl(dst, 0);
1772 jcc(Assembler::equal , L);
1773 decrementl(dst);
1774 }
1775 bind(L);
1776}
1777
1778
1779void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
1780 if (reachable(src1)) {
1781 cmpb(as_Address(src1), imm);
1782 } else {
1783 lea(rscratch1, src1);
1784 cmpb(Address(rscratch1, 0), imm);
1785 }
1786}
1787
1788void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
1789#ifdef _LP641
1790 if (src2.is_lval()) {
1791 movptr(rscratch1, src2);
1792 Assembler::cmpq(src1, rscratch1);
1793 } else if (reachable(src2)) {
1794 cmpq(src1, as_Address(src2));
1795 } else {
1796 lea(rscratch1, src2);
1797 Assembler::cmpq(src1, Address(rscratch1, 0));
1798 }
1799#else
1800 if (src2.is_lval()) {
1801 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
1802 } else {
1803 cmpl(src1, as_Address(src2));
1804 }
1805#endif // _LP64
1806}
1807
1808void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
1809 assert(src2.is_lval(), "not a mem-mem compare")do { if (!(src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1809, "assert(" "src2.is_lval()" ") failed", "not a mem-mem compare"
); ::breakpoint(); } } while (0)
;
1810#ifdef _LP641
1811 // moves src2's literal address
1812 movptr(rscratch1, src2);
1813 Assembler::cmpq(src1, rscratch1);
1814#else
1815 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
1816#endif // _LP64
1817}
1818
1819void MacroAssembler::cmpoop(Register src1, Register src2) {
1820 cmpptr(src1, src2);
1821}
1822
1823void MacroAssembler::cmpoop(Register src1, Address src2) {
1824 cmpptr(src1, src2);
1825}
1826
1827#ifdef _LP641
1828void MacroAssembler::cmpoop(Register src1, jobject src2) {
1829 movoop(rscratch1, src2);
1830 cmpptr(src1, rscratch1);
1831}
1832#endif
1833
1834void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
1835 if (reachable(adr)) {
1836 lock();
1837 cmpxchgptr(reg, as_Address(adr));
1838 } else {
1839 lea(rscratch1, adr);
1840 lock();
1841 cmpxchgptr(reg, Address(rscratch1, 0));
1842 }
1843}
1844
1845void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
1846 LP64_ONLY(cmpxchgq(reg, adr))cmpxchgq(reg, adr) NOT_LP64(cmpxchgl(reg, adr));
1847}
1848
1849void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
1850 if (reachable(src)) {
1851 Assembler::comisd(dst, as_Address(src));
1852 } else {
1853 lea(rscratch1, src);
1854 Assembler::comisd(dst, Address(rscratch1, 0));
1855 }
1856}
1857
1858void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
1859 if (reachable(src)) {
1860 Assembler::comiss(dst, as_Address(src));
1861 } else {
1862 lea(rscratch1, src);
1863 Assembler::comiss(dst, Address(rscratch1, 0));
1864 }
1865}
1866
1867
1868void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
1869 Condition negated_cond = negate_condition(cond);
1870 Label L;
1871 jcc(negated_cond, L);
1872 pushf(); // Preserve flags
1873 atomic_incl(counter_addr);
1874 popf();
1875 bind(L);
1876}
1877
1878int MacroAssembler::corrected_idivl(Register reg) {
1879 // Full implementation of Java idiv and irem; checks for
1880 // special case as described in JVM spec., p.243 & p.271.
1881 // The function returns the (pc) offset of the idivl
1882 // instruction - may be needed for implicit exceptions.
1883 //
1884 // normal case special case
1885 //
1886 // input : rax,: dividend min_int
1887 // reg: divisor (may not be rax,/rdx) -1
1888 //
1889 // output: rax,: quotient (= rax, idiv reg) min_int
1890 // rdx: remainder (= rax, irem reg) 0
1891 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1891, "assert(" "reg != rax && reg != rdx" ") failed"
, "reg cannot be rax, or rdx register"); ::breakpoint(); } } while
(0)
;
1892 const int min_int = 0x80000000;
1893 Label normal_case, special_case;
1894
1895 // check for special case
1896 cmpl(rax, min_int);
1897 jcc(Assembler::notEqual, normal_case);
1898 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
1899 cmpl(reg, -1);
1900 jcc(Assembler::equal, special_case);
1901
1902 // handle normal case
1903 bind(normal_case);
1904 cdql();
1905 int idivl_offset = offset();
1906 idivl(reg);
1907
1908 // normal and special case exit
1909 bind(special_case);
1910
1911 return idivl_offset;
1912}
1913
1914
1915
1916void MacroAssembler::decrementl(Register reg, int value) {
1917 if (value == min_jint) {subl(reg, value) ; return; }
1918 if (value < 0) { incrementl(reg, -value); return; }
1919 if (value == 0) { ; return; }
1920 if (value == 1 && UseIncDec) { decl(reg) ; return; }
1921 /* else */ { subl(reg, value) ; return; }
1922}
1923
1924void MacroAssembler::decrementl(Address dst, int value) {
1925 if (value == min_jint) {subl(dst, value) ; return; }
1926 if (value < 0) { incrementl(dst, -value); return; }
1927 if (value == 0) { ; return; }
1928 if (value == 1 && UseIncDec) { decl(dst) ; return; }
1929 /* else */ { subl(dst, value) ; return; }
1930}
1931
1932void MacroAssembler::division_with_shift (Register reg, int shift_value) {
1933 assert (shift_value > 0, "illegal shift value")do { if (!(shift_value > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1933, "assert(" "shift_value > 0" ") failed", "illegal shift value"
); ::breakpoint(); } } while (0)
;
1934 Label _is_positive;
1935 testl (reg, reg);
1936 jcc (Assembler::positive, _is_positive);
1937 int offset = (1 << shift_value) - 1 ;
1938
1939 if (offset == 1) {
1940 incrementl(reg);
1941 } else {
1942 addl(reg, offset);
1943 }
1944
1945 bind (_is_positive);
1946 sarl(reg, shift_value);
1947}
1948
1949void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
1950 if (reachable(src)) {
1951 Assembler::divsd(dst, as_Address(src));
1952 } else {
1953 lea(rscratch1, src);
1954 Assembler::divsd(dst, Address(rscratch1, 0));
1955 }
1956}
1957
1958void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
1959 if (reachable(src)) {
1960 Assembler::divss(dst, as_Address(src));
1961 } else {
1962 lea(rscratch1, src);
1963 Assembler::divss(dst, Address(rscratch1, 0));
1964 }
1965}
1966
1967void MacroAssembler::enter() {
1968 push(rbp);
1969 mov(rbp, rsp);
1970}
1971
1972// A 5 byte nop that is safe for patching (see patch_verified_entry)
1973void MacroAssembler::fat_nop() {
1974 if (UseAddressNop) {
1975 addr_nop_5();
1976 } else {
1977 emit_int8(0x26); // es:
1978 emit_int8(0x2e); // cs:
1979 emit_int8(0x64); // fs:
1980 emit_int8(0x65); // gs:
1981 emit_int8((unsigned char)0x90);
1982 }
1983}
1984
1985#ifndef _LP641
1986void MacroAssembler::fcmp(Register tmp) {
1987 fcmp(tmp, 1, true, true);
1988}
1989
1990void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
1991 assert(!pop_right || pop_left, "usage error")do { if (!(!pop_right || pop_left)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1991, "assert(" "!pop_right || pop_left" ") failed", "usage error"
); ::breakpoint(); } } while (0)
;
1992 if (VM_Version::supports_cmov()) {
1993 assert(tmp == noreg, "unneeded temp")do { if (!(tmp == noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1993, "assert(" "tmp == noreg" ") failed", "unneeded temp")
; ::breakpoint(); } } while (0)
;
1994 if (pop_left) {
1995 fucomip(index);
1996 } else {
1997 fucomi(index);
1998 }
1999 if (pop_right) {
2000 fpop();
2001 }
2002 } else {
2003 assert(tmp != noreg, "need temp")do { if (!(tmp != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2003, "assert(" "tmp != noreg" ") failed", "need temp"); ::
breakpoint(); } } while (0)
;
2004 if (pop_left) {
2005 if (pop_right) {
2006 fcompp();
2007 } else {
2008 fcomp(index);
2009 }
2010 } else {
2011 fcom(index);
2012 }
2013 // convert FPU condition into eflags condition via rax,
2014 save_rax(tmp);
2015 fwait(); fnstsw_ax();
2016 sahf();
2017 restore_rax(tmp);
2018 }
2019 // condition codes set as follows:
2020 //
2021 // CF (corresponds to C0) if x < y
2022 // PF (corresponds to C2) if unordered
2023 // ZF (corresponds to C3) if x = y
2024}
2025
2026void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
2027 fcmp2int(dst, unordered_is_less, 1, true, true);
2028}
2029
2030void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
2031 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
2032 Label L;
2033 if (unordered_is_less) {
2034 movl(dst, -1);
2035 jcc(Assembler::parity, L);
2036 jcc(Assembler::below , L);
2037 movl(dst, 0);
2038 jcc(Assembler::equal , L);
2039 increment(dst);
2040 } else { // unordered is greater
2041 movl(dst, 1);
2042 jcc(Assembler::parity, L);
2043 jcc(Assembler::above , L);
2044 movl(dst, 0);
2045 jcc(Assembler::equal , L);
2046 decrementl(dst);
2047 }
2048 bind(L);
2049}
2050
2051void MacroAssembler::fld_d(AddressLiteral src) {
2052 fld_d(as_Address(src));
2053}
2054
2055void MacroAssembler::fld_s(AddressLiteral src) {
2056 fld_s(as_Address(src));
2057}
2058
2059void MacroAssembler::fldcw(AddressLiteral src) {
2060 Assembler::fldcw(as_Address(src));
2061}
2062
2063void MacroAssembler::fpop() {
2064 ffree();
2065 fincstp();
2066}
2067
2068void MacroAssembler::fremr(Register tmp) {
2069 save_rax(tmp);
2070 { Label L;
2071 bind(L);
2072 fprem();
2073 fwait(); fnstsw_ax();
2074 sahf();
2075 jcc(Assembler::parity, L);
2076 }
2077 restore_rax(tmp);
2078 // Result is in ST0.
2079 // Note: fxch & fpop to get rid of ST1
2080 // (otherwise FPU stack could overflow eventually)
2081 fxch(1);
2082 fpop();
2083}
2084
2085void MacroAssembler::empty_FPU_stack() {
2086 if (VM_Version::supports_mmx()) {
2087 emms();
2088 } else {
2089 for (int i = 8; i-- > 0; ) ffree(i);
2090 }
2091}
2092#endif // !LP64
2093
2094void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
2095 if (reachable(src)) {
2096 Assembler::mulpd(dst, as_Address(src));
2097 } else {
2098 lea(rscratch1, src);
2099 Assembler::mulpd(dst, Address(rscratch1, 0));
2100 }
2101}
2102
2103void MacroAssembler::load_float(Address src) {
2104#ifdef _LP641
2105 movflt(xmm0, src);
2106#else
2107 if (UseSSE >= 1) {
2108 movflt(xmm0, src);
2109 } else {
2110 fld_s(src);
2111 }
2112#endif // LP64
2113}
2114
2115void MacroAssembler::store_float(Address dst) {
2116#ifdef _LP641
2117 movflt(dst, xmm0);
2118#else
2119 if (UseSSE >= 1) {
2120 movflt(dst, xmm0);
2121 } else {
2122 fstp_s(dst);
2123 }
2124#endif // LP64
2125}
2126
2127void MacroAssembler::load_double(Address src) {
2128#ifdef _LP641
2129 movdbl(xmm0, src);
2130#else
2131 if (UseSSE >= 2) {
2132 movdbl(xmm0, src);
2133 } else {
2134 fld_d(src);
2135 }
2136#endif // LP64
2137}
2138
2139void MacroAssembler::store_double(Address dst) {
2140#ifdef _LP641
2141 movdbl(dst, xmm0);
2142#else
2143 if (UseSSE >= 2) {
2144 movdbl(dst, xmm0);
2145 } else {
2146 fstp_d(dst);
2147 }
2148#endif // LP64
2149}
2150
2151// dst = c = a * b + c
2152void MacroAssembler::fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) {
2153 Assembler::vfmadd231sd(c, a, b);
2154 if (dst != c) {
2155 movdbl(dst, c);
2156 }
2157}
2158
2159// dst = c = a * b + c
2160void MacroAssembler::fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) {
2161 Assembler::vfmadd231ss(c, a, b);
2162 if (dst != c) {
2163 movflt(dst, c);
2164 }
2165}
2166
2167// dst = c = a * b + c
2168void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) {
2169 Assembler::vfmadd231pd(c, a, b, vector_len);
2170 if (dst != c) {
2171 vmovdqu(dst, c);
2172 }
2173}
2174
2175// dst = c = a * b + c
2176void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) {
2177 Assembler::vfmadd231ps(c, a, b, vector_len);
2178 if (dst != c) {
2179 vmovdqu(dst, c);
2180 }
2181}
2182
2183// dst = c = a * b + c
2184void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) {
2185 Assembler::vfmadd231pd(c, a, b, vector_len);
2186 if (dst != c) {
2187 vmovdqu(dst, c);
2188 }
2189}
2190
2191// dst = c = a * b + c
2192void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) {
2193 Assembler::vfmadd231ps(c, a, b, vector_len);
2194 if (dst != c) {
2195 vmovdqu(dst, c);
2196 }
2197}
2198
2199void MacroAssembler::incrementl(AddressLiteral dst) {
2200 if (reachable(dst)) {
2201 incrementl(as_Address(dst));
2202 } else {
2203 lea(rscratch1, dst);
2204 incrementl(Address(rscratch1, 0));
2205 }
2206}
2207
2208void MacroAssembler::incrementl(ArrayAddress dst) {
2209 incrementl(as_Address(dst));
2210}
2211
2212void MacroAssembler::incrementl(Register reg, int value) {
2213 if (value == min_jint) {addl(reg, value) ; return; }
2214 if (value < 0) { decrementl(reg, -value); return; }
2215 if (value == 0) { ; return; }
2216 if (value == 1 && UseIncDec) { incl(reg) ; return; }
2217 /* else */ { addl(reg, value) ; return; }
2218}
2219
2220void MacroAssembler::incrementl(Address dst, int value) {
2221 if (value == min_jint) {addl(dst, value) ; return; }
2222 if (value < 0) { decrementl(dst, -value); return; }
2223 if (value == 0) { ; return; }
2224 if (value == 1 && UseIncDec) { incl(dst) ; return; }
2225 /* else */ { addl(dst, value) ; return; }
2226}
2227
2228void MacroAssembler::jump(AddressLiteral dst) {
2229 if (reachable(dst)) {
2230 jmp_literal(dst.target(), dst.rspec());
2231 } else {
2232 lea(rscratch1, dst);
2233 jmp(rscratch1);
2234 }
2235}
2236
2237void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
2238 if (reachable(dst)) {
2239 InstructionMark im(this);
2240 relocate(dst.reloc());
2241 const int short_size = 2;
2242 const int long_size = 6;
2243 int offs = (intptr_t)dst.target() - ((intptr_t)pc());
2244 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
2245 // 0111 tttn #8-bit disp
2246 emit_int8(0x70 | cc);
2247 emit_int8((offs - short_size) & 0xFF);
2248 } else {
2249 // 0000 1111 1000 tttn #32-bit disp
2250 emit_int8(0x0F);
2251 emit_int8((unsigned char)(0x80 | cc));
2252 emit_int32(offs - long_size);
2253 }
2254 } else {
2255#ifdef ASSERT1
2256 warning("reversing conditional branch");
2257#endif /* ASSERT */
2258 Label skip;
2259 jccb(reverse[cc], skip)jccb_0(reverse[cc], skip, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2259)
;
2260 lea(rscratch1, dst);
2261 Assembler::jmp(rscratch1);
2262 bind(skip);
2263 }
2264}
2265
2266void MacroAssembler::fld_x(AddressLiteral src) {
2267 Assembler::fld_x(as_Address(src));
2268}
2269
2270void MacroAssembler::ldmxcsr(AddressLiteral src) {
2271 if (reachable(src)) {
2272 Assembler::ldmxcsr(as_Address(src));
2273 } else {
2274 lea(rscratch1, src);
2275 Assembler::ldmxcsr(Address(rscratch1, 0));
2276 }
2277}
2278
2279int MacroAssembler::load_signed_byte(Register dst, Address src) {
2280 int off;
2281 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2282 off = offset();
2283 movsbl(dst, src); // movsxb
2284 } else {
2285 off = load_unsigned_byte(dst, src);
2286 shll(dst, 24);
2287 sarl(dst, 24);
2288 }
2289 return off;
2290}
2291
2292// Note: load_signed_short used to be called load_signed_word.
2293// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
2294// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
2295// The term "word" in HotSpot means a 32- or 64-bit machine word.
2296int MacroAssembler::load_signed_short(Register dst, Address src) {
2297 int off;
2298 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2299 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
2300 // version but this is what 64bit has always done. This seems to imply
2301 // that users are only using 32bits worth.
2302 off = offset();
2303 movswl(dst, src); // movsxw
2304 } else {
2305 off = load_unsigned_short(dst, src);
2306 shll(dst, 16);
2307 sarl(dst, 16);
2308 }
2309 return off;
2310}
2311
2312int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
2313 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
2314 // and "3.9 Partial Register Penalties", p. 22).
2315 int off;
2316 if (LP64_ONLY(true || )true || VM_Version::is_P6() || src.uses(dst)) {
2317 off = offset();
2318 movzbl(dst, src); // movzxb
2319 } else {
2320 xorl(dst, dst);
2321 off = offset();
2322 movb(dst, src);
2323 }
2324 return off;
2325}
2326
2327// Note: load_unsigned_short used to be called load_unsigned_word.
2328int MacroAssembler::load_unsigned_short(Register dst, Address src) {
2329 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
2330 // and "3.9 Partial Register Penalties", p. 22).
2331 int off;
2332 if (LP64_ONLY(true ||)true || VM_Version::is_P6() || src.uses(dst)) {
2333 off = offset();
2334 movzwl(dst, src); // movzxw
2335 } else {
2336 xorl(dst, dst);
2337 off = offset();
2338 movw(dst, src);
2339 }
2340 return off;
2341}
2342
2343void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
2344 switch (size_in_bytes) {
2345#ifndef _LP641
2346 case 8:
2347 assert(dst2 != noreg, "second dest register required")do { if (!(dst2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2347, "assert(" "dst2 != noreg" ") failed", "second dest register required"
); ::breakpoint(); } } while (0)
;
2348 movl(dst, src);
2349 movl(dst2, src.plus_disp(BytesPerInt));
2350 break;
2351#else
2352 case 8: movq(dst, src); break;
2353#endif
2354 case 4: movl(dst, src); break;
2355 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
2356 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
2357 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2357); ::breakpoint(); } while (0)
;
2358 }
2359}
2360
2361void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
2362 switch (size_in_bytes) {
2363#ifndef _LP641
2364 case 8:
2365 assert(src2 != noreg, "second source register required")do { if (!(src2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2365, "assert(" "src2 != noreg" ") failed", "second source register required"
); ::breakpoint(); } } while (0)
;
2366 movl(dst, src);
2367 movl(dst.plus_disp(BytesPerInt), src2);
2368 break;
2369#else
2370 case 8: movq(dst, src); break;
2371#endif
2372 case 4: movl(dst, src); break;
2373 case 2: movw(dst, src); break;
2374 case 1: movb(dst, src); break;
2375 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2375); ::breakpoint(); } while (0)
;
2376 }
2377}
2378
2379void MacroAssembler::mov32(AddressLiteral dst, Register src) {
2380 if (reachable(dst)) {
2381 movl(as_Address(dst), src);
2382 } else {
2383 lea(rscratch1, dst);
2384 movl(Address(rscratch1, 0), src);
2385 }
2386}
2387
2388void MacroAssembler::mov32(Register dst, AddressLiteral src) {
2389 if (reachable(src)) {
2390 movl(dst, as_Address(src));
2391 } else {
2392 lea(rscratch1, src);
2393 movl(dst, Address(rscratch1, 0));
2394 }
2395}
2396
2397// C++ bool manipulation
2398
2399void MacroAssembler::movbool(Register dst, Address src) {
2400 if(sizeof(bool) == 1)
2401 movb(dst, src);
2402 else if(sizeof(bool) == 2)
2403 movw(dst, src);
2404 else if(sizeof(bool) == 4)
2405 movl(dst, src);
2406 else
2407 // unsupported
2408 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2408); ::breakpoint(); } while (0)
;
2409}
2410
2411void MacroAssembler::movbool(Address dst, bool boolconst) {
2412 if(sizeof(bool) == 1)
2413 movb(dst, (int) boolconst);
2414 else if(sizeof(bool) == 2)
2415 movw(dst, (int) boolconst);
2416 else if(sizeof(bool) == 4)
2417 movl(dst, (int) boolconst);
2418 else
2419 // unsupported
2420 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2420); ::breakpoint(); } while (0)
;
2421}
2422
2423void MacroAssembler::movbool(Address dst, Register src) {
2424 if(sizeof(bool) == 1)
2425 movb(dst, src);
2426 else if(sizeof(bool) == 2)
2427 movw(dst, src);
2428 else if(sizeof(bool) == 4)
2429 movl(dst, src);
2430 else
2431 // unsupported
2432 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2432); ::breakpoint(); } while (0)
;
2433}
2434
2435void MacroAssembler::movbyte(ArrayAddress dst, int src) {
2436 movb(as_Address(dst), src);
2437}
2438
2439void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
2440 if (reachable(src)) {
2441 movdl(dst, as_Address(src));
2442 } else {
2443 lea(rscratch1, src);
2444 movdl(dst, Address(rscratch1, 0));
2445 }
2446}
2447
2448void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
2449 if (reachable(src)) {
2450 movq(dst, as_Address(src));
2451 } else {
2452 lea(rscratch1, src);
2453 movq(dst, Address(rscratch1, 0));
2454 }
2455}
2456
2457void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
2458 if (reachable(src)) {
2459 if (UseXmmLoadAndClearUpper) {
2460 movsd (dst, as_Address(src));
2461 } else {
2462 movlpd(dst, as_Address(src));
2463 }
2464 } else {
2465 lea(rscratch1, src);
2466 if (UseXmmLoadAndClearUpper) {
2467 movsd (dst, Address(rscratch1, 0));
2468 } else {
2469 movlpd(dst, Address(rscratch1, 0));
2470 }
2471 }
2472}
2473
2474void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
2475 if (reachable(src)) {
2476 movss(dst, as_Address(src));
2477 } else {
2478 lea(rscratch1, src);
2479 movss(dst, Address(rscratch1, 0));
2480 }
2481}
2482
2483void MacroAssembler::movptr(Register dst, Register src) {
2484 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2485}
2486
2487void MacroAssembler::movptr(Register dst, Address src) {
2488 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2489}
2490
2491// src should NEVER be a real pointer. Use AddressLiteral for true pointers
2492void MacroAssembler::movptr(Register dst, intptr_t src) {
2493 LP64_ONLY(mov64(dst, src))mov64(dst, src) NOT_LP64(movl(dst, src));
2494}
2495
2496void MacroAssembler::movptr(Address dst, Register src) {
2497 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2498}
2499
2500void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2501 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2501, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2502 Assembler::movdqu(dst, src);
2503}
2504
2505void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2506 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2506, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2507 Assembler::movdqu(dst, src);
2508}
2509
2510void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2511 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2511, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2512 Assembler::movdqu(dst, src);
2513}
2514
2515void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2516 if (reachable(src)) {
2517 movdqu(dst, as_Address(src));
2518 } else {
2519 lea(scratchReg, src);
2520 movdqu(dst, Address(scratchReg, 0));
2521 }
2522}
2523
2524void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2525 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2525, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2526 Assembler::vmovdqu(dst, src);
2527}
2528
2529void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2530 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2530, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2531 Assembler::vmovdqu(dst, src);
2532}
2533
2534void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2535 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2535, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2536 Assembler::vmovdqu(dst, src);
2537}
2538
2539void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2540 if (reachable(src)) {
2541 vmovdqu(dst, as_Address(src));
2542 }
2543 else {
2544 lea(scratch_reg, src);
2545 vmovdqu(dst, Address(scratch_reg, 0));
2546 }
2547}
2548
2549void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) {
2550 assert(vector_len <= AVX_256bit, "AVX2 vector length")do { if (!(vector_len <= AVX_256bit)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2550, "assert(" "vector_len <= AVX_256bit" ") failed", "AVX2 vector length"
); ::breakpoint(); } } while (0)
;
2551 if (vector_len == AVX_256bit) {
2552 vmovdqu(dst, src, scratch_reg);
2553 } else {
2554 movdqu(dst, src, scratch_reg);
2555 }
2556}
2557
2558void MacroAssembler::kmov(KRegister dst, Address src) {
2559 if (VM_Version::supports_avx512bw()) {
2560 kmovql(dst, src);
2561 } else {
2562 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2562, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2563 kmovwl(dst, src);
2564 }
2565}
2566
2567void MacroAssembler::kmov(Address dst, KRegister src) {
2568 if (VM_Version::supports_avx512bw()) {
2569 kmovql(dst, src);
2570 } else {
2571 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2571, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2572 kmovwl(dst, src);
2573 }
2574}
2575
2576void MacroAssembler::kmov(KRegister dst, KRegister src) {
2577 if (VM_Version::supports_avx512bw()) {
2578 kmovql(dst, src);
2579 } else {
2580 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2580, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2581 kmovwl(dst, src);
2582 }
2583}
2584
2585void MacroAssembler::kmov(Register dst, KRegister src) {
2586 if (VM_Version::supports_avx512bw()) {
2587 kmovql(dst, src);
2588 } else {
2589 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2589, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2590 kmovwl(dst, src);
2591 }
2592}
2593
2594void MacroAssembler::kmov(KRegister dst, Register src) {
2595 if (VM_Version::supports_avx512bw()) {
2596 kmovql(dst, src);
2597 } else {
2598 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2598, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2599 kmovwl(dst, src);
2600 }
2601}
2602
2603void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) {
2604 if (reachable(src)) {
2605 kmovql(dst, as_Address(src));
2606 } else {
2607 lea(scratch_reg, src);
2608 kmovql(dst, Address(scratch_reg, 0));
2609 }
2610}
2611
2612void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
2613 if (reachable(src)) {
2614 kmovwl(dst, as_Address(src));
2615 } else {
2616 lea(scratch_reg, src);
2617 kmovwl(dst, Address(scratch_reg, 0));
2618 }
2619}
2620
2621void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2622 int vector_len, Register scratch_reg) {
2623 if (reachable(src)) {
2624 if (mask == k0) {
2625 Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
2626 } else {
2627 Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
2628 }
2629 } else {
2630 lea(scratch_reg, src);
2631 if (mask == k0) {
2632 Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
2633 } else {
2634 Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2635 }
2636 }
2637}
2638
2639void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2640 int vector_len, Register scratch_reg) {
2641 if (reachable(src)) {
2642 Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
2643 } else {
2644 lea(scratch_reg, src);
2645 Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2646 }
2647}
2648
2649void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2650 int vector_len, Register scratch_reg) {
2651 if (reachable(src)) {
2652 Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
2653 } else {
2654 lea(scratch_reg, src);
2655 Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2656 }
2657}
2658
2659void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2660 int vector_len, Register scratch_reg) {
2661 if (reachable(src)) {
2662 Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
2663 } else {
2664 lea(scratch_reg, src);
2665 Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2666 }
2667}
2668
2669void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2670 if (reachable(src)) {
2671 Assembler::evmovdquq(dst, as_Address(src), vector_len);
2672 } else {
2673 lea(rscratch, src);
2674 Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2675 }
2676}
2677
2678void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2679 if (reachable(src)) {
2680 Assembler::movdqa(dst, as_Address(src));
2681 } else {
2682 lea(rscratch1, src);
2683 Assembler::movdqa(dst, Address(rscratch1, 0));
2684 }
2685}
2686
2687void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2688 if (reachable(src)) {
2689 Assembler::movsd(dst, as_Address(src));
2690 } else {
2691 lea(rscratch1, src);
2692 Assembler::movsd(dst, Address(rscratch1, 0));
2693 }
2694}
2695
2696void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
2697 if (reachable(src)) {
2698 Assembler::movss(dst, as_Address(src));
2699 } else {
2700 lea(rscratch1, src);
2701 Assembler::movss(dst, Address(rscratch1, 0));
2702 }
2703}
2704
2705void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
2706 if (reachable(src)) {
2707 Assembler::mulsd(dst, as_Address(src));
2708 } else {
2709 lea(rscratch1, src);
2710 Assembler::mulsd(dst, Address(rscratch1, 0));
2711 }
2712}
2713
2714void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
2715 if (reachable(src)) {
2716 Assembler::mulss(dst, as_Address(src));
2717 } else {
2718 lea(rscratch1, src);
2719 Assembler::mulss(dst, Address(rscratch1, 0));
2720 }
2721}
2722
2723void MacroAssembler::null_check(Register reg, int offset) {
2724 if (needs_explicit_null_check(offset)) {
2725 // provoke OS NULL exception if reg = NULL by
2726 // accessing M[reg] w/o changing any (non-CC) registers
2727 // NOTE: cmpl is plenty here to provoke a segv
2728 cmpptr(rax, Address(reg, 0));
2729 // Note: should probably use testl(rax, Address(reg, 0));
2730 // may be shorter code (however, this version of
2731 // testl needs to be implemented first)
2732 } else {
2733 // nothing to do, (later) access of M[reg + offset]
2734 // will provoke OS NULL exception if reg = NULL
2735 }
2736}
2737
2738void MacroAssembler::os_breakpoint() {
2739 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2740 // (e.g., MSVC can't call ps() otherwise)
2741 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)((address)((address_word)(os::breakpoint)))));
2742}
2743
2744void MacroAssembler::unimplemented(const char* what) {
2745 const char* buf = NULL__null;
2746 {
2747 ResourceMark rm;
2748 stringStream ss;
2749 ss.print("unimplemented: %s", what);
2750 buf = code_string(ss.as_string());
2751 }
2752 stop(buf);
2753}
2754
2755#ifdef _LP641
2756#define XSTATE_BV0x200 0x200
2757#endif
2758
2759void MacroAssembler::pop_CPU_state() {
2760 pop_FPU_state();
2761 pop_IU_state();
2762}
2763
2764void MacroAssembler::pop_FPU_state() {
2765#ifndef _LP641
2766 frstor(Address(rsp, 0));
2767#else
2768 fxrstor(Address(rsp, 0));
2769#endif
2770 addptr(rsp, FPUStateSizeInWords * wordSize);
2771}
2772
2773void MacroAssembler::pop_IU_state() {
2774 popa();
2775 LP64_ONLY(addq(rsp, 8))addq(rsp, 8);
2776 popf();
2777}
2778
2779// Save Integer and Float state
2780// Warning: Stack must be 16 byte aligned (64bit)
2781void MacroAssembler::push_CPU_state() {
2782 push_IU_state();
2783 push_FPU_state();
2784}
2785
2786void MacroAssembler::push_FPU_state() {
2787 subptr(rsp, FPUStateSizeInWords * wordSize);
2788#ifndef _LP641
2789 fnsave(Address(rsp, 0));
2790 fwait();
2791#else
2792 fxsave(Address(rsp, 0));
2793#endif // LP64
2794}
2795
2796void MacroAssembler::push_IU_state() {
2797 // Push flags first because pusha kills them
2798 pushf();
2799 // Make sure rsp stays 16-byte aligned
2800 LP64_ONLY(subq(rsp, 8))subq(rsp, 8);
2801 pusha();
2802}
2803
2804void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { // determine java_thread register
2805 if (!java_thread->is_valid()) {
2806 java_thread = rdi;
2807 get_thread(java_thread);
2808 }
2809 // we must set sp to zero to clear frame
2810 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD0L);
2811 // must clear fp, so that compiled frames are not confused; it is
2812 // possible that we need it only for debugging
2813 if (clear_fp) {
2814 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD0L);
2815 }
2816 // Always clear the pc because it could have been set by make_walkable()
2817 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD0L);
2818 vzeroupper();
2819}
2820
2821void MacroAssembler::restore_rax(Register tmp) {
2822 if (tmp == noreg) pop(rax);
2823 else if (tmp != rax) mov(rax, tmp);
2824}
2825
2826void MacroAssembler::round_to(Register reg, int modulus) {
2827 addptr(reg, modulus - 1);
2828 andptr(reg, -modulus);
2829}
2830
2831void MacroAssembler::save_rax(Register tmp) {
2832 if (tmp == noreg) push(rax);
2833 else if (tmp != rax) mov(tmp, rax);
2834}
2835
2836void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) {
2837 if (at_return) {
2838 // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
2839 // we may safely use rsp instead to perform the stack watermark check.
2840 cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, JavaThread::polling_word_offset()));
2841 jcc(Assembler::above, slow_path);
2842 return;
2843 }
2844 testb(Address(thread_reg, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());
2845 jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
2846}
2847
2848// Calls to C land
2849//
2850// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
2851// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
2852// has to be reset to 0. This is required to allow proper stack traversal.
2853void MacroAssembler::set_last_Java_frame(Register java_thread,
2854 Register last_java_sp,
2855 Register last_java_fp,
2856 address last_java_pc) {
2857 vzeroupper();
2858 // determine java_thread register
2859 if (!java_thread->is_valid()) {
2860 java_thread = rdi;
2861 get_thread(java_thread);
2862 }
2863 // determine last_java_sp register
2864 if (!last_java_sp->is_valid()) {
2865 last_java_sp = rsp;
2866 }
2867
2868 // last_java_fp is optional
2869
2870 if (last_java_fp->is_valid()) {
2871 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
2872 }
2873
2874 // last_java_pc is optional
2875
2876 if (last_java_pc != NULL__null) {
2877 lea(Address(java_thread,
2878 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
2879 InternalAddress(last_java_pc));
2880
2881 }
2882 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
2883}
2884
2885void MacroAssembler::shlptr(Register dst, int imm8) {
2886 LP64_ONLY(shlq(dst, imm8))shlq(dst, imm8) NOT_LP64(shll(dst, imm8));
2887}
2888
2889void MacroAssembler::shrptr(Register dst, int imm8) {
2890 LP64_ONLY(shrq(dst, imm8))shrq(dst, imm8) NOT_LP64(shrl(dst, imm8));
2891}
2892
2893void MacroAssembler::sign_extend_byte(Register reg) {
2894 if (LP64_ONLY(true ||)true || (VM_Version::is_P6() && reg->has_byte_register())) {
2895 movsbl(reg, reg); // movsxb
2896 } else {
2897 shll(reg, 24);
2898 sarl(reg, 24);
2899 }
2900}
2901
2902void MacroAssembler::sign_extend_short(Register reg) {
2903 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2904 movswl(reg, reg); // movsxw
2905 } else {
2906 shll(reg, 16);
2907 sarl(reg, 16);
2908 }
2909}
2910
2911void MacroAssembler::testl(Register dst, AddressLiteral src) {
2912 assert(reachable(src), "Address should be reachable")do { if (!(reachable(src))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2912, "assert(" "reachable(src)" ") failed", "Address should be reachable"
); ::breakpoint(); } } while (0)
;
2913 testl(dst, as_Address(src));
2914}
2915
2916void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
2917 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2917, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2918 Assembler::pcmpeqb(dst, src);
2919}
2920
2921void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
2922 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2922, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2923 Assembler::pcmpeqw(dst, src);
2924}
2925
2926void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2927 assert((dst->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2927, "assert(" "(dst->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
2928 Assembler::pcmpestri(dst, src, imm8);
2929}
2930
2931void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2932 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2932, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2933 Assembler::pcmpestri(dst, src, imm8);
2934}
2935
2936void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2937 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2937, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2938 Assembler::pmovzxbw(dst, src);
2939}
2940
2941void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
2942 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2942, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2943 Assembler::pmovzxbw(dst, src);
2944}
2945
2946void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
2947 assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2947, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
2948 Assembler::pmovmskb(dst, src);
2949}
2950
2951void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
2952 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2952, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2953 Assembler::ptest(dst, src);
2954}
2955
2956void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
2957 if (reachable(src)) {
2958 Assembler::sqrtsd(dst, as_Address(src));
2959 } else {
2960 lea(rscratch1, src);
2961 Assembler::sqrtsd(dst, Address(rscratch1, 0));
2962 }
2963}
2964
2965void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
2966 if (reachable(src)) {
2967 Assembler::sqrtss(dst, as_Address(src));
2968 } else {
2969 lea(rscratch1, src);
2970 Assembler::sqrtss(dst, Address(rscratch1, 0));
2971 }
2972}
2973
2974void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
2975 if (reachable(src)) {
2976 Assembler::subsd(dst, as_Address(src));
2977 } else {
2978 lea(rscratch1, src);
2979 Assembler::subsd(dst, Address(rscratch1, 0));
2980 }
2981}
2982
2983void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) {
2984 if (reachable(src)) {
2985 Assembler::roundsd(dst, as_Address(src), rmode);
2986 } else {
2987 lea(scratch_reg, src);
2988 Assembler::roundsd(dst, Address(scratch_reg, 0), rmode);
2989 }
2990}
2991
2992void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
2993 if (reachable(src)) {
2994 Assembler::subss(dst, as_Address(src));
2995 } else {
2996 lea(rscratch1, src);
2997 Assembler::subss(dst, Address(rscratch1, 0));
2998 }
2999}
3000
3001void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
3002 if (reachable(src)) {
3003 Assembler::ucomisd(dst, as_Address(src));
3004 } else {
3005 lea(rscratch1, src);
3006 Assembler::ucomisd(dst, Address(rscratch1, 0));
3007 }
3008}
3009
3010void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
3011 if (reachable(src)) {
3012 Assembler::ucomiss(dst, as_Address(src));
3013 } else {
3014 lea(rscratch1, src);
3015 Assembler::ucomiss(dst, Address(rscratch1, 0));
3016 }
3017}
3018
3019void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
3020 // Used in sign-bit flipping with aligned address.
3021 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3021, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
3022 if (reachable(src)) {
3023 Assembler::xorpd(dst, as_Address(src));
3024 } else {
3025 lea(scratch_reg, src);
3026 Assembler::xorpd(dst, Address(scratch_reg, 0));
3027 }
3028}
3029
3030void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) {
3031 if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
3032 Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
3033 }
3034 else {
3035 Assembler::xorpd(dst, src);
3036 }
3037}
3038
3039void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
3040 if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
3041 Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
3042 } else {
3043 Assembler::xorps(dst, src);
3044 }
3045}
3046
3047void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
3048 // Used in sign-bit flipping with aligned address.
3049 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3049, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
3050 if (reachable(src)) {
3051 Assembler::xorps(dst, as_Address(src));
3052 } else {
3053 lea(scratch_reg, src);
3054 Assembler::xorps(dst, Address(scratch_reg, 0));
3055 }
3056}
3057
3058void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
3059 // Used in sign-bit flipping with aligned address.
3060 bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
3061 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || aligned_adr)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3061, "assert(" "(UseAVX > 0) || aligned_adr" ") failed"
, "SSE mode requires address alignment 16 bytes"); ::breakpoint
(); } } while (0)
;
3062 if (reachable(src)) {
3063 Assembler::pshufb(dst, as_Address(src));
3064 } else {
3065 lea(rscratch1, src);
3066 Assembler::pshufb(dst, Address(rscratch1, 0));
3067 }
3068}
3069
3070// AVX 3-operands instructions
3071
3072void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3073 if (reachable(src)) {
3074 vaddsd(dst, nds, as_Address(src));
3075 } else {
3076 lea(rscratch1, src);
3077 vaddsd(dst, nds, Address(rscratch1, 0));
3078 }
3079}
3080
3081void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3082 if (reachable(src)) {
3083 vaddss(dst, nds, as_Address(src));
3084 } else {
3085 lea(rscratch1, src);
3086 vaddss(dst, nds, Address(rscratch1, 0));
3087 }
3088}
3089
3090void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
3091 assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3091, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX"
); ::breakpoint(); } } while (0)
;
3092 if (reachable(src)) {
3093 Assembler::vpaddb(dst, nds, as_Address(src), vector_len);
3094 } else {
3095 lea(rscratch, src);
3096 Assembler::vpaddb(dst, nds, Address(rscratch, 0), vector_len);
3097 }
3098}
3099
3100void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
3101 assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3101, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX"
); ::breakpoint(); } } while (0)
;
3102 if (reachable(src)) {
3103 Assembler::vpaddd(dst, nds, as_Address(src), vector_len);
3104 } else {
3105 lea(rscratch, src);
3106 Assembler::vpaddd(dst, nds, Address(rscratch, 0), vector_len);
3107 }
3108}
3109
3110void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
3111 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3111, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3112 vandps(dst, nds, negate_field, vector_len);
3113}
3114
3115void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
3116 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3116, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3117 vandpd(dst, nds, negate_field, vector_len);
3118}
3119
3120void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3121 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3121, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3122 Assembler::vpaddb(dst, nds, src, vector_len);
3123}
3124
3125void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3126 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3126, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3127 Assembler::vpaddb(dst, nds, src, vector_len);
3128}
3129
3130void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3131 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3131, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3132 Assembler::vpaddw(dst, nds, src, vector_len);
3133}
3134
3135void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3136 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3136, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3137 Assembler::vpaddw(dst, nds, src, vector_len);
3138}
3139
3140void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3141 if (reachable(src)) {
3142 Assembler::vpand(dst, nds, as_Address(src), vector_len);
3143 } else {
3144 lea(scratch_reg, src);
3145 Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3146 }
3147}
3148
3149void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3150 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3150, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3151 Assembler::vpbroadcastw(dst, src, vector_len);
3152}
3153
3154void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3155 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3155, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3156 Assembler::vpcmpeqb(dst, nds, src, vector_len);
3157}
3158
3159void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3160 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3160, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3161 Assembler::vpcmpeqw(dst, nds, src, vector_len);
3162}
3163
3164void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
3165 AddressLiteral src, int vector_len, Register scratch_reg) {
3166 if (reachable(src)) {
3167 Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
3168 } else {
3169 lea(scratch_reg, src);
3170 Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
3171 }
3172}
3173
3174void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3175 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3176 if (reachable(src)) {
3177 Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3178 } else {
3179 lea(scratch_reg, src);
3180 Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3181 }
3182}
3183
3184void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3185 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3186 if (reachable(src)) {
3187 Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3188 } else {
3189 lea(scratch_reg, src);
3190 Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3191 }
3192}
3193
3194void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3195 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3196 if (reachable(src)) {
3197 Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3198 } else {
3199 lea(scratch_reg, src);
3200 Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3201 }
3202}
3203
3204void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3205 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3206 if (reachable(src)) {
3207 Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3208 } else {
3209 lea(scratch_reg, src);
3210 Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3211 }
3212}
3213
3214void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
3215 if (width == Assembler::Q) {
3216 Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
3217 } else {
3218 Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
3219 }
3220}
3221
3222void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
3223 int eq_cond_enc = 0x29;
3224 int gt_cond_enc = 0x37;
3225 if (width != Assembler::Q) {
3226 eq_cond_enc = 0x74 + width;
3227 gt_cond_enc = 0x64 + width;
3228 }
3229 switch (cond) {
3230 case eq:
3231 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3232 break;
3233 case neq:
3234 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3235 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3236 break;
3237 case le:
3238 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3239 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3240 break;
3241 case nlt:
3242 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3243 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3244 break;
3245 case lt:
3246 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3247 break;
3248 case nle:
3249 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3250 break;
3251 default:
3252 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3252, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
3253 }
3254}
3255
3256void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3257 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3257, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3258 Assembler::vpmovzxbw(dst, src, vector_len);
3259}
3260
3261void MacroAssembler::vpmovmskb(Register dst, XMMRegister src, int vector_len) {
3262 assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3262, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
3263 Assembler::vpmovmskb(dst, src, vector_len);
3264}
3265
3266void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3267 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3267, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3268 Assembler::vpmullw(dst, nds, src, vector_len);
3269}
3270
3271void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3272 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3272, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3273 Assembler::vpmullw(dst, nds, src, vector_len);
3274}
3275
3276void MacroAssembler::vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3277 assert((UseAVX > 0), "AVX support is needed")do { if (!((UseAVX > 0))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3277, "assert(" "(UseAVX > 0)" ") failed", "AVX support is needed"
); ::breakpoint(); } } while (0)
;
3278 if (reachable(src)) {
3279 Assembler::vpmulld(dst, nds, as_Address(src), vector_len);
3280 } else {
3281 lea(scratch_reg, src);
3282 Assembler::vpmulld(dst, nds, Address(scratch_reg, 0), vector_len);
3283 }
3284}
3285
3286void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3287 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3287, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3288 Assembler::vpsubb(dst, nds, src, vector_len);
3289}
3290
3291void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3292 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3292, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3293 Assembler::vpsubb(dst, nds, src, vector_len);
3294}
3295
3296void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3297 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3297, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3298 Assembler::vpsubw(dst, nds, src, vector_len);
3299}
3300
3301void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3302 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3302, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3303 Assembler::vpsubw(dst, nds, src, vector_len);
3304}
3305
3306void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3307 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3307, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3308 Assembler::vpsraw(dst, nds, shift, vector_len);
3309}
3310
3311void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3312 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3312, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3313 Assembler::vpsraw(dst, nds, shift, vector_len);
3314}
3315
3316void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3317 assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3317, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint
(); } } while (0)
;
3318 if (!VM_Version::supports_avx512vl() && vector_len < 2) {
3319 vector_len = 2;
3320 }
3321 Assembler::evpsraq(dst, nds, shift, vector_len);
3322}
3323
3324void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3325 assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3325, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint
(); } } while (0)
;
3326 if (!VM_Version::supports_avx512vl() && vector_len < 2) {
3327 vector_len = 2;
3328 }
3329 Assembler::evpsraq(dst, nds, shift, vector_len);
3330}
3331
3332void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3333 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3333, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3334 Assembler::vpsrlw(dst, nds, shift, vector_len);
3335}
3336
3337void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3338 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3338, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3339 Assembler::vpsrlw(dst, nds, shift, vector_len);
3340}
3341
3342void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3343 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3343, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3344 Assembler::vpsllw(dst, nds, shift, vector_len);
3345}
3346
3347void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3348 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3348, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3349 Assembler::vpsllw(dst, nds, shift, vector_len);
3350}
3351
3352void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
3353 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3353, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3354 Assembler::vptest(dst, src);
3355}
3356
3357void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3358 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3358, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3359 Assembler::punpcklbw(dst, src);
3360}
3361
3362void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
3363 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3363, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3364 Assembler::pshufd(dst, src, mode);
3365}
3366
3367void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3368 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3368, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3369 Assembler::pshuflw(dst, src, mode);
3370}
3371
3372void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3373 if (reachable(src)) {
3374 vandpd(dst, nds, as_Address(src), vector_len);
3375 } else {
3376 lea(scratch_reg, src);
3377 vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3378 }
3379}
3380
3381void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3382 if (reachable(src)) {
3383 vandps(dst, nds, as_Address(src), vector_len);
3384 } else {
3385 lea(scratch_reg, src);
3386 vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3387 }
3388}
3389
3390void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
3391 bool merge, int vector_len, Register scratch_reg) {
3392 if (reachable(src)) {
3393 Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
3394 } else {
3395 lea(scratch_reg, src);
3396 Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
3397 }
3398}
3399
3400void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3401 if (reachable(src)) {
3402 vdivsd(dst, nds, as_Address(src));
3403 } else {
3404 lea(rscratch1, src);
3405 vdivsd(dst, nds, Address(rscratch1, 0));
3406 }
3407}
3408
3409void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3410 if (reachable(src)) {
3411 vdivss(dst, nds, as_Address(src));
3412 } else {
3413 lea(rscratch1, src);
3414 vdivss(dst, nds, Address(rscratch1, 0));
3415 }
3416}
3417
3418void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3419 if (reachable(src)) {
3420 vmulsd(dst, nds, as_Address(src));
3421 } else {
3422 lea(rscratch1, src);
3423 vmulsd(dst, nds, Address(rscratch1, 0));
3424 }
3425}
3426
3427void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3428 if (reachable(src)) {
3429 vmulss(dst, nds, as_Address(src));
3430 } else {
3431 lea(rscratch1, src);
3432 vmulss(dst, nds, Address(rscratch1, 0));
3433 }
3434}
3435
3436void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3437 if (reachable(src)) {
3438 vsubsd(dst, nds, as_Address(src));
3439 } else {
3440 lea(rscratch1, src);
3441 vsubsd(dst, nds, Address(rscratch1, 0));
3442 }
3443}
3444
3445void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3446 if (reachable(src)) {
3447 vsubss(dst, nds, as_Address(src));
3448 } else {
3449 lea(rscratch1, src);
3450 vsubss(dst, nds, Address(rscratch1, 0));
3451 }
3452}
3453
3454void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3455 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3455, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3456 vxorps(dst, nds, src, Assembler::AVX_128bit);
3457}
3458
3459void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3460 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3460, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3461 vxorpd(dst, nds, src, Assembler::AVX_128bit);
3462}
3463
3464void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3465 if (reachable(src)) {
3466 vxorpd(dst, nds, as_Address(src), vector_len);
3467 } else {
3468 lea(scratch_reg, src);
3469 vxorpd(dst, nds, Address(scratch_reg, 0), vector_len);
3470 }
3471}
3472
3473void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3474 if (reachable(src)) {
3475 vxorps(dst, nds, as_Address(src), vector_len);
3476 } else {
3477 lea(scratch_reg, src);
3478 vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3479 }
3480}
3481
3482void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3483 if (UseAVX > 1 || (vector_len < 1)) {
3484 if (reachable(src)) {
3485 Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3486 } else {
3487 lea(scratch_reg, src);
3488 Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3489 }
3490 }
3491 else {
3492 MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3493 }
3494}
3495
3496void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3497 if (reachable(src)) {
3498 Assembler::vpermd(dst, nds, as_Address(src), vector_len);
3499 } else {
3500 lea(scratch_reg, src);
3501 Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
3502 }
3503}
3504
3505void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3506 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3507 STATIC_ASSERT(inverted_jweak_mask == -2)static_assert((inverted_jweak_mask == -2), "inverted_jweak_mask == -2"
)
; // otherwise check this code
3508 // The inverted mask is sign-extended
3509 andptr(possibly_jweak, inverted_jweak_mask);
3510}
3511
3512void MacroAssembler::resolve_jobject(Register value,
3513 Register thread,
3514 Register tmp) {
3515 assert_different_registers(value, thread, tmp);
3516 Label done, not_weak;
3517 testptr(value, value);
3518 jcc(Assembler::zero, done); // Use NULL as-is.
3519 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3520 jcc(Assembler::zero, not_weak);
3521 // Resolve jweak.
3522 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3523 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
3524 verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3524)
;
3525 jmp(done);
3526 bind(not_weak);
3527 // Resolve (untagged) jobject.
3528 access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
3529 verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3529)
;
3530 bind(done);
3531}
3532
3533void MacroAssembler::subptr(Register dst, int32_t imm32) {
3534 LP64_ONLY(subq(dst, imm32))subq(dst, imm32) NOT_LP64(subl(dst, imm32));
3535}
3536
3537// Force generation of a 4 byte immediate value even if it fits into 8bit
3538void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
3539 LP64_ONLY(subq_imm32(dst, imm32))subq_imm32(dst, imm32) NOT_LP64(subl_imm32(dst, imm32));
3540}
3541
3542void MacroAssembler::subptr(Register dst, Register src) {
3543 LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src));
3544}
3545
3546// C++ bool manipulation
3547void MacroAssembler::testbool(Register dst) {
3548 if(sizeof(bool) == 1)
3549 testb(dst, 0xff);
3550 else if(sizeof(bool) == 2) {
3551 // testw implementation needed for two byte bools
3552 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3552); ::breakpoint(); } while (0)
;
3553 } else if(sizeof(bool) == 4)
3554 testl(dst, dst);
3555 else
3556 // unsupported
3557 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3557); ::breakpoint(); } while (0)
;
3558}
3559
3560void MacroAssembler::testptr(Register dst, Register src) {
3561 LP64_ONLY(testq(dst, src))testq(dst, src) NOT_LP64(testl(dst, src));
3562}
3563
3564// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3565void MacroAssembler::tlab_allocate(Register thread, Register obj,
3566 Register var_size_in_bytes,
3567 int con_size_in_bytes,
3568 Register t1,
3569 Register t2,
3570 Label& slow_case) {
3571 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3572 bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3573}
3574
3575// Defines obj, preserves var_size_in_bytes
3576void MacroAssembler::eden_allocate(Register thread, Register obj,
3577 Register var_size_in_bytes,
3578 int con_size_in_bytes,
3579 Register t1,
3580 Label& slow_case) {
3581 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3582 bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
3583}
3584
3585// Preserves the contents of address, destroys the contents length_in_bytes and temp.
3586void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
3587 assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different")do { if (!(address != length_in_bytes && address != temp
&& temp != length_in_bytes)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3587, "assert(" "address != length_in_bytes && address != temp && temp != length_in_bytes"
") failed", "registers must be different"); ::breakpoint(); }
} while (0)
;
3588 assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord")do { if (!((offset_in_bytes & (BytesPerWord - 1)) == 0)) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3588, "assert(" "(offset_in_bytes & (BytesPerWord - 1)) == 0"
") failed", "offset must be a multiple of BytesPerWord"); ::
breakpoint(); } } while (0)
;
3589 Label done;
3590
3591 testptr(length_in_bytes, length_in_bytes);
3592 jcc(Assembler::zero, done);
3593
3594 // initialize topmost word, divide index by 2, check if odd and test if zero
3595 // note: for the remaining code to work, index must be a multiple of BytesPerWord
3596#ifdef ASSERT1
3597 {
3598 Label L;
3599 testptr(length_in_bytes, BytesPerWord - 1);
3600 jcc(Assembler::zero, L);
3601 stop("length must be a multiple of BytesPerWord");
3602 bind(L);
3603 }
3604#endif
3605 Register index = length_in_bytes;
3606 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3607 if (UseIncDec) {
3608 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3609 } else {
3610 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3611 shrptr(index, 1);
3612 }
3613#ifndef _LP641
3614 // index could have not been a multiple of 8 (i.e., bit 2 was set)
3615 {
3616 Label even;
3617 // note: if index was a multiple of 8, then it cannot
3618 // be 0 now otherwise it must have been 0 before
3619 // => if it is even, we don't need to check for 0 again
3620 jcc(Assembler::carryClear, even);
3621 // clear topmost word (no jump would be needed if conditional assignment worked here)
3622 movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
3623 // index could be 0 now, must check again
3624 jcc(Assembler::zero, done);
3625 bind(even);
3626 }
3627#endif // !_LP64
3628 // initialize remaining object fields: index is a multiple of 2 now
3629 {
3630 Label loop;
3631 bind(loop);
3632 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3633 NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
3634 decrement(index);
3635 jcc(Assembler::notZero, loop);
3636 }
3637
3638 bind(done);
3639}
3640
3641// Look up the method for a megamorphic invokeinterface call.
3642// The target method is determined by <intf_klass, itable_index>.
3643// The receiver klass is in recv_klass.
3644// On success, the result will be in method_result, and execution falls through.
3645// On failure, execution transfers to the given label.
3646void MacroAssembler::lookup_interface_method(Register recv_klass,
3647 Register intf_klass,
3648 RegisterOrConstant itable_index,
3649 Register method_result,
3650 Register scan_temp,
3651 Label& L_no_such_interface,
3652 bool return_method) {
3653 assert_different_registers(recv_klass, intf_klass, scan_temp);
3654 assert_different_registers(method_result, intf_klass, scan_temp);
3655 assert(recv_klass != method_result || !return_method,do { if (!(recv_klass != method_result || !return_method)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3656, "assert(" "recv_klass != method_result || !return_method"
") failed", "recv_klass can be destroyed when method isn't needed"
); ::breakpoint(); } } while (0)
3656 "recv_klass can be destroyed when method isn't needed")do { if (!(recv_klass != method_result || !return_method)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3656, "assert(" "recv_klass != method_result || !return_method"
") failed", "recv_klass can be destroyed when method isn't needed"
); ::breakpoint(); } } while (0)
;
3657
3658 assert(itable_index.is_constant() || itable_index.as_register() == method_result,do { if (!(itable_index.is_constant() || itable_index.as_register
() == method_result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result"
") failed", "caller must use same register for non-constant itable index as for method"
); ::breakpoint(); } } while (0)
3659 "caller must use same register for non-constant itable index as for method")do { if (!(itable_index.is_constant() || itable_index.as_register
() == method_result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result"
") failed", "caller must use same register for non-constant itable index as for method"
); ::breakpoint(); } } while (0)
;
3660
3661 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3662 int vtable_base = in_bytes(Klass::vtable_start_offset());
3663 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3664 int scan_step = itableOffsetEntry::size() * wordSize;
3665 int vte_size = vtableEntry::size_in_bytes();
3666 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3667 assert(vte_size == wordSize, "else adjust times_vte_scale")do { if (!(vte_size == wordSize)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3667, "assert(" "vte_size == wordSize" ") failed", "else adjust times_vte_scale"
); ::breakpoint(); } } while (0)
;
3668
3669 movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
3670
3671 // %%% Could store the aligned, prescaled offset in the klassoop.
3672 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
3673
3674 if (return_method) {
3675 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3676 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below")do { if (!(itableMethodEntry::size() * wordSize == wordSize))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3676, "assert(" "itableMethodEntry::size() * wordSize == wordSize"
") failed", "adjust the scaling in the code below"); ::breakpoint
(); } } while (0)
;
3677 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
3678 }
3679
3680 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
3681 // if (scan->interface() == intf) {
3682 // result = (klass + scan->offset() + itable_index);
3683 // }
3684 // }
3685 Label search, found_method;
3686
3687 for (int peel = 1; peel >= 0; peel--) {
3688 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3689 cmpptr(intf_klass, method_result);
3690
3691 if (peel) {
3692 jccb(Assembler::equal, found_method)jccb_0(Assembler::equal, found_method, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3692)
;
3693 } else {
3694 jccb(Assembler::notEqual, search)jccb_0(Assembler::notEqual, search, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3694)
;
3695 // (invert the test to fall through to found_method...)
3696 }
3697
3698 if (!peel) break;
3699
3700 bind(search);
3701
3702 // Check that the previous entry is non-null. A null entry means that
3703 // the receiver class doesn't implement the interface, and wasn't the
3704 // same as when the caller was compiled.
3705 testptr(method_result, method_result);
3706 jcc(Assembler::zero, L_no_such_interface);
3707 addptr(scan_temp, scan_step);
3708 }
3709
3710 bind(found_method);
3711
3712 if (return_method) {
3713 // Got a hit.
3714 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3715 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
3716 }
3717}
3718
3719
3720// virtual method calling
3721void MacroAssembler::lookup_virtual_method(Register recv_klass,
3722 RegisterOrConstant vtable_index,
3723 Register method_result) {
3724 const int base = in_bytes(Klass::vtable_start_offset());
3725 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below")do { if (!(vtableEntry::size() * wordSize == wordSize)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3725, "assert(" "vtableEntry::size() * wordSize == wordSize"
") failed", "else adjust the scaling in the code below"); ::
breakpoint(); } } while (0)
;
3726 Address vtable_entry_addr(recv_klass,
3727 vtable_index, Address::times_ptr,
3728 base + vtableEntry::method_offset_in_bytes());
3729 movptr(method_result, vtable_entry_addr);
3730}
3731
3732
3733void MacroAssembler::check_klass_subtype(Register sub_klass,
3734 Register super_klass,
3735 Register temp_reg,
3736 Label& L_success) {
3737 Label L_failure;
3738 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL__null);
3739 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL__null);
3740 bind(L_failure);
3741}
3742
3743
3744void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3745 Register super_klass,
3746 Register temp_reg,
3747 Label* L_success,
3748 Label* L_failure,
3749 Label* L_slow_path,
3750 RegisterOrConstant super_check_offset) {
3751 assert_different_registers(sub_klass, super_klass, temp_reg);
3752 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3753 if (super_check_offset.is_register()) {
3754 assert_different_registers(sub_klass, super_klass,
3755 super_check_offset.as_register());
3756 } else if (must_load_sco) {
3757 assert(temp_reg != noreg, "supply either a temp or a register offset")do { if (!(temp_reg != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3757, "assert(" "temp_reg != noreg" ") failed", "supply either a temp or a register offset"
); ::breakpoint(); } } while (0)
;
3758 }
3759
3760 Label L_fallthrough;
3761 int label_nulls = 0;
3762 if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; }
3763 if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; }
3764 if (L_slow_path == NULL__null) { L_slow_path = &L_fallthrough; label_nulls++; }
3765 assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3765, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch"
); ::breakpoint(); } } while (0)
;
3766
3767 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3768 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3769 Address super_check_offset_addr(super_klass, sco_offset);
3770
3771 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
3772 // range of a jccb. If this routine grows larger, reconsider at
3773 // least some of these.
3774#define local_jcc(assembler_cond, label) \
3775 if (&(label) == &L_fallthrough) jccb(assembler_cond, label)jccb_0(assembler_cond, label, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3775)
; \
3776 else jcc( assembler_cond, label) /*omit semi*/
3777
3778 // Hacked jmp, which may only be used just before L_fallthrough.
3779#define final_jmp(label) \
3780 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3781 else jmp(label) /*omit semi*/
3782
3783 // If the pointers are equal, we are done (e.g., String[] elements).
3784 // This self-check enables sharing of secondary supertype arrays among
3785 // non-primary types such as array-of-interface. Otherwise, each such
3786 // type would need its own customized SSA.
3787 // We move this check to the front of the fast path because many
3788 // type checks are in fact trivially successful in this manner,
3789 // so we get a nicely predicted branch right at the start of the check.
3790 cmpptr(sub_klass, super_klass);
3791 local_jcc(Assembler::equal, *L_success);
3792
3793 // Check the supertype display:
3794 if (must_load_sco) {
3795 // Positive movl does right thing on LP64.
3796 movl(temp_reg, super_check_offset_addr);
3797 super_check_offset = RegisterOrConstant(temp_reg);
3798 }
3799 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
3800 cmpptr(super_klass, super_check_addr); // load displayed supertype
3801
3802 // This check has worked decisively for primary supers.
3803 // Secondary supers are sought in the super_cache ('super_cache_addr').
3804 // (Secondary supers are interfaces and very deeply nested subtypes.)
3805 // This works in the same check above because of a tricky aliasing
3806 // between the super_cache and the primary super display elements.
3807 // (The 'super_check_addr' can address either, as the case requires.)
3808 // Note that the cache is updated below if it does not help us find
3809 // what we need immediately.
3810 // So if it was a primary super, we can just fail immediately.
3811 // Otherwise, it's the slow path for us (no success at this point).
3812
3813 if (super_check_offset.is_register()) {
3814 local_jcc(Assembler::equal, *L_success);
3815 cmpl(super_check_offset.as_register(), sc_offset);
3816 if (L_failure == &L_fallthrough) {
3817 local_jcc(Assembler::equal, *L_slow_path);
3818 } else {
3819 local_jcc(Assembler::notEqual, *L_failure);
3820 final_jmp(*L_slow_path);
3821 }
3822 } else if (super_check_offset.as_constant() == sc_offset) {
3823 // Need a slow path; fast failure is impossible.
3824 if (L_slow_path == &L_fallthrough) {
3825 local_jcc(Assembler::equal, *L_success);
3826 } else {
3827 local_jcc(Assembler::notEqual, *L_slow_path);
3828 final_jmp(*L_success);
3829 }
3830 } else {
3831 // No slow path; it's a fast decision.
3832 if (L_failure == &L_fallthrough) {
3833 local_jcc(Assembler::equal, *L_success);
3834 } else {
3835 local_jcc(Assembler::notEqual, *L_failure);
3836 final_jmp(*L_success);
3837 }
3838 }
3839
3840 bind(L_fallthrough);
3841
3842#undef local_jcc
3843#undef final_jmp
3844}
3845
3846
3847void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3848 Register super_klass,
3849 Register temp_reg,
3850 Register temp2_reg,
3851 Label* L_success,
3852 Label* L_failure,
3853 bool set_cond_codes) {
3854 assert_different_registers(sub_klass, super_klass, temp_reg);
3855 if (temp2_reg != noreg)
3856 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3857#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3858
3859 Label L_fallthrough;
3860 int label_nulls = 0;
3861 if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; }
3862 if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; }
3863 assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3863, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch"
); ::breakpoint(); } } while (0)
;
3864
3865 // a couple of useful fields in sub_klass:
3866 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3867 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3868 Address secondary_supers_addr(sub_klass, ss_offset);
3869 Address super_cache_addr( sub_klass, sc_offset);
3870
3871 // Do a linear scan of the secondary super-klass chain.
3872 // This code is rarely used, so simplicity is a virtue here.
3873 // The repne_scan instruction uses fixed registers, which we must spill.
3874 // Don't worry too much about pre-existing connections with the input regs.
3875
3876 assert(sub_klass != rax, "killed reg")do { if (!(sub_klass != rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3876, "assert(" "sub_klass != rax" ") failed", "killed reg"
); ::breakpoint(); } } while (0)
; // killed by mov(rax, super)
3877 assert(sub_klass != rcx, "killed reg")do { if (!(sub_klass != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3877, "assert(" "sub_klass != rcx" ") failed", "killed reg"
); ::breakpoint(); } } while (0)
; // killed by lea(rcx, &pst_counter)
3878
3879 // Get super_klass value into rax (even if it was in rdi or rcx).
3880 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
3881 if (super_klass != rax || UseCompressedOops) {
3882 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
3883 mov(rax, super_klass);
3884 }
3885 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
3886 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
3887
3888#ifndef PRODUCT
3889 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3890 ExternalAddress pst_counter_addr((address) pst_counter);
3891 NOT_LP64( incrementl(pst_counter_addr) );
3892 LP64_ONLY( lea(rcx, pst_counter_addr) )lea(rcx, pst_counter_addr);
3893 LP64_ONLY( incrementl(Address(rcx, 0)) )incrementl(Address(rcx, 0));
3894#endif //PRODUCT
3895
3896 // We will consult the secondary-super array.
3897 movptr(rdi, secondary_supers_addr);
3898 // Load the array length. (Positive movl does right thing on LP64.)
3899 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
3900 // Skip to start of data.
3901 addptr(rdi, Array<Klass*>::base_offset_in_bytes());
3902
3903 // Scan RCX words at [RDI] for an occurrence of RAX.
3904 // Set NZ/Z based on last compare.
3905 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3906 // not change flags (only scas instruction which is repeated sets flags).
3907 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3908
3909 testptr(rax,rax); // Set Z = 0
3910 repne_scan();
3911
3912 // Unspill the temp. registers:
3913 if (pushed_rdi) pop(rdi);
3914 if (pushed_rcx) pop(rcx);
3915 if (pushed_rax) pop(rax);
3916
3917 if (set_cond_codes) {
3918 // Special hack for the AD files: rdi is guaranteed non-zero.
3919 assert(!pushed_rdi, "rdi must be left non-NULL")do { if (!(!pushed_rdi)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3919, "assert(" "!pushed_rdi" ") failed", "rdi must be left non-NULL"
); ::breakpoint(); } } while (0)
;
3920 // Also, the condition codes are properly set Z/NZ on succeed/failure.
3921 }
3922
3923 if (L_failure == &L_fallthrough)
3924 jccb(Assembler::notEqual, *L_failure)jccb_0(Assembler::notEqual, *L_failure, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3924)
;
3925 else jcc(Assembler::notEqual, *L_failure);
3926
3927 // Success. Cache the super we found and proceed in triumph.
3928 movptr(super_cache_addr, super_klass);
3929
3930 if (L_success != &L_fallthrough) {
3931 jmp(*L_success);
3932 }
3933
3934#undef IS_A_TEMP
3935
3936 bind(L_fallthrough);
3937}
3938
3939void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
3940 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required")do { if (!(L_fast_path != __null || L_slow_path != __null)) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3940, "assert(" "L_fast_path != __null || L_slow_path != __null"
") failed", "at least one is required"); ::breakpoint(); } }
while (0)
;
3941
3942 Label L_fallthrough;
3943 if (L_fast_path == NULL__null) {
3944 L_fast_path = &L_fallthrough;
3945 } else if (L_slow_path == NULL__null) {
3946 L_slow_path = &L_fallthrough;
3947 }
3948
3949 // Fast path check: class is fully initialized
3950 cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3951 jcc(Assembler::equal, *L_fast_path);
3952
3953 // Fast path check: current thread is initializer thread
3954 cmpptr(thread, Address(klass, InstanceKlass::init_thread_offset()));
3955 if (L_slow_path == &L_fallthrough) {
3956 jcc(Assembler::equal, *L_fast_path);
3957 bind(*L_slow_path);
3958 } else if (L_fast_path == &L_fallthrough) {
3959 jcc(Assembler::notEqual, *L_slow_path);
3960 bind(*L_fast_path);
3961 } else {
3962 Unimplemented()do { (*g_assert_poison) = 'X';; report_unimplemented("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3962); ::breakpoint(); } while (0)
;
3963 }
3964}
3965
3966void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
3967 if (VM_Version::supports_cmov()) {
3968 cmovl(cc, dst, src);
3969 } else {
3970 Label L;
3971 jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3971)
;
3972 movl(dst, src);
3973 bind(L);
3974 }
3975}
3976
3977void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
3978 if (VM_Version::supports_cmov()) {
3979 cmovl(cc, dst, src);
3980 } else {
3981 Label L;
3982 jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3982)
;
3983 movl(dst, src);
3984 bind(L);
3985 }
3986}
3987
3988void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
3989 if (!VerifyOops) return;
3990
3991 // Pass register number to verify_oop_subroutine
3992 const char* b = NULL__null;
3993 {
3994 ResourceMark rm;
3995 stringStream ss;
3996 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
3997 b = code_string(ss.as_string());
3998 }
3999 BLOCK_COMMENT("verify_oop {");
4000#ifdef _LP641
4001 push(rscratch1); // save r10, trashed by movptr()
4002#endif
4003 push(rax); // save rax,
4004 push(reg); // pass register argument
4005 ExternalAddress buffer((address) b);
4006 // avoid using pushptr, as it modifies scratch registers
4007 // and our contract is not to modify anything
4008 movptr(rax, buffer.addr());
4009 push(rax);
4010 // call indirectly to solve generation ordering problem
4011 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4012 call(rax);
4013 // Caller pops the arguments (oop, message) and restores rax, r10
4014 BLOCK_COMMENT("} verify_oop");
4015}
4016
4017void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
4018 if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
4019 vpternlogd(dst, 0xFF, dst, dst, vector_len);
4020 } else {
4021 assert(UseAVX > 0, "")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4021, "assert(" "UseAVX > 0" ") failed", ""); ::breakpoint
(); } } while (0)
;
4022 vpcmpeqb(dst, dst, dst, vector_len);
4023 }
4024}
4025
4026Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
4027 int extra_slot_offset) {
4028 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4029 int stackElementSize = Interpreter::stackElementSize;
4030 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4031#ifdef ASSERT1
4032 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4033 assert(offset1 - offset == stackElementSize, "correct arithmetic")do { if (!(offset1 - offset == stackElementSize)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4033, "assert(" "offset1 - offset == stackElementSize" ") failed"
, "correct arithmetic"); ::breakpoint(); } } while (0)
;
4034#endif
4035 Register scale_reg = noreg;
4036 Address::ScaleFactor scale_factor = Address::no_scale;
4037 if (arg_slot.is_constant()) {
4038 offset += arg_slot.as_constant() * stackElementSize;
4039 } else {
4040 scale_reg = arg_slot.as_register();
4041 scale_factor = Address::times(stackElementSize);
4042 }
4043 offset += wordSize; // return PC is on stack
4044 return Address(rsp, scale_reg, scale_factor, offset);
4045}
4046
4047void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
4048 if (!VerifyOops) return;
4049
4050 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
4051 // Pass register number to verify_oop_subroutine
4052 const char* b = NULL__null;
4053 {
4054 ResourceMark rm;
4055 stringStream ss;
4056 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
4057 b = code_string(ss.as_string());
4058 }
4059#ifdef _LP641
4060 push(rscratch1); // save r10, trashed by movptr()
4061#endif
4062 push(rax); // save rax,
4063 // addr may contain rsp so we will have to adjust it based on the push
4064 // we just did (and on 64 bit we do two pushes)
4065 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
4066 // stores rax into addr which is backwards of what was intended.
4067 if (addr.uses(rsp)) {
4068 lea(rax, addr);
4069 pushptr(Address(rax, LP64_ONLY(2 *)2 * BytesPerWord));
4070 } else {
4071 pushptr(addr);
4072 }
4073
4074 ExternalAddress buffer((address) b);
4075 // pass msg argument
4076 // avoid using pushptr, as it modifies scratch registers
4077 // and our contract is not to modify anything
4078 movptr(rax, buffer.addr());
4079 push(rax);
4080
4081 // call indirectly to solve generation ordering problem
4082 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4083 call(rax);
4084 // Caller pops the arguments (addr, message) and restores rax, r10.
4085}
4086
4087void MacroAssembler::verify_tlab() {
4088#ifdef ASSERT1
4089 if (UseTLAB && VerifyOops) {
4090 Label next, ok;
4091 Register t1 = rsi;
4092 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread)r15_thread;
4093
4094 push(t1);
4095 NOT_LP64(push(thread_reg));
4096 NOT_LP64(get_thread(thread_reg));
4097
4098 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4099 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
4100 jcc(Assembler::aboveEqual, next);
4101 STOP("assert(top >= start)")block_comment("assert(top >= start)"); stop("assert(top >= start)"
)
;
4102 should_not_reach_here();
4103
4104 bind(next);
4105 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
4106 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4107 jcc(Assembler::aboveEqual, ok);
4108 STOP("assert(top <= end)")block_comment("assert(top <= end)"); stop("assert(top <= end)"
)
;
4109 should_not_reach_here();
4110
4111 bind(ok);
4112 NOT_LP64(pop(thread_reg));
4113 pop(t1);
4114 }
4115#endif
4116}
4117
4118class ControlWord {
4119 public:
4120 int32_t _value;
4121
4122 int rounding_control() const { return (_value >> 10) & 3 ; }
4123 int precision_control() const { return (_value >> 8) & 3 ; }
4124 bool precision() const { return ((_value >> 5) & 1) != 0; }
4125 bool underflow() const { return ((_value >> 4) & 1) != 0; }
4126 bool overflow() const { return ((_value >> 3) & 1) != 0; }
4127 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
4128 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
4129 bool invalid() const { return ((_value >> 0) & 1) != 0; }
4130
4131 void print() const {
4132 // rounding control
4133 const char* rc;
4134 switch (rounding_control()) {
4135 case 0: rc = "round near"; break;
4136 case 1: rc = "round down"; break;
4137 case 2: rc = "round up "; break;
4138 case 3: rc = "chop "; break;
4139 default:
4140 rc = NULL__null; // silence compiler warnings
4141 fatal("Unknown rounding control: %d", rounding_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4141, "Unknown rounding control: %d", rounding_control()); ::
breakpoint(); } while (0)
;
4142 };
4143 // precision control
4144 const char* pc;
4145 switch (precision_control()) {
4146 case 0: pc = "24 bits "; break;
4147 case 1: pc = "reserved"; break;
4148 case 2: pc = "53 bits "; break;
4149 case 3: pc = "64 bits "; break;
4150 default:
4151 pc = NULL__null; // silence compiler warnings
4152 fatal("Unknown precision control: %d", precision_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4152, "Unknown precision control: %d", precision_control())
; ::breakpoint(); } while (0)
;
4153 };
4154 // flags
4155 char f[9];
4156 f[0] = ' ';
4157 f[1] = ' ';
4158 f[2] = (precision ()) ? 'P' : 'p';
4159 f[3] = (underflow ()) ? 'U' : 'u';
4160 f[4] = (overflow ()) ? 'O' : 'o';
4161 f[5] = (zero_divide ()) ? 'Z' : 'z';
4162 f[6] = (denormalized()) ? 'D' : 'd';
4163 f[7] = (invalid ()) ? 'I' : 'i';
4164 f[8] = '\x0';
4165 // output
4166 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
4167 }
4168
4169};
4170
4171class StatusWord {
4172 public:
4173 int32_t _value;
4174
4175 bool busy() const { return ((_value >> 15) & 1) != 0; }
4176 bool C3() const { return ((_value >> 14) & 1) != 0; }
4177 bool C2() const { return ((_value >> 10) & 1) != 0; }
4178 bool C1() const { return ((_value >> 9) & 1) != 0; }
4179 bool C0() const { return ((_value >> 8) & 1) != 0; }
4180 int top() const { return (_value >> 11) & 7 ; }
4181 bool error_status() const { return ((_value >> 7) & 1) != 0; }
4182 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
4183 bool precision() const { return ((_value >> 5) & 1) != 0; }
4184 bool underflow() const { return ((_value >> 4) & 1) != 0; }
4185 bool overflow() const { return ((_value >> 3) & 1) != 0; }
4186 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
4187 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
4188 bool invalid() const { return ((_value >> 0) & 1) != 0; }
4189
4190 void print() const {
4191 // condition codes
4192 char c[5];
4193 c[0] = (C3()) ? '3' : '-';
4194 c[1] = (C2()) ? '2' : '-';
4195 c[2] = (C1()) ? '1' : '-';
4196 c[3] = (C0()) ? '0' : '-';
4197 c[4] = '\x0';
4198 // flags
4199 char f[9];
4200 f[0] = (error_status()) ? 'E' : '-';
4201 f[1] = (stack_fault ()) ? 'S' : '-';
4202 f[2] = (precision ()) ? 'P' : '-';
4203 f[3] = (underflow ()) ? 'U' : '-';
4204 f[4] = (overflow ()) ? 'O' : '-';
4205 f[5] = (zero_divide ()) ? 'Z' : '-';
4206 f[6] = (denormalized()) ? 'D' : '-';
4207 f[7] = (invalid ()) ? 'I' : '-';
4208 f[8] = '\x0';
4209 // output
4210 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
4211 }
4212
4213};
4214
4215class TagWord {
4216 public:
4217 int32_t _value;
4218
4219 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
4220
4221 void print() const {
4222 printf("%04x", _value & 0xFFFF);
4223 }
4224
4225};
4226
4227class FPU_Register {
4228 public:
4229 int32_t _m0;
4230 int32_t _m1;
4231 int16_t _ex;
4232
4233 bool is_indefinite() const {
4234 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
4235 }
4236
4237 void print() const {
4238 char sign = (_ex < 0) ? '-' : '+';
4239 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
4240 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
4241 };
4242
4243};
4244
4245class FPU_State {
4246 public:
4247 enum {
4248 register_size = 10,
4249 number_of_registers = 8,
4250 register_mask = 7
4251 };
4252
4253 ControlWord _control_word;
4254 StatusWord _status_word;
4255 TagWord _tag_word;
4256 int32_t _error_offset;
4257 int32_t _error_selector;
4258 int32_t _data_offset;
4259 int32_t _data_selector;
4260 int8_t _register[register_size * number_of_registers];
4261
4262 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
4263 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
4264
4265 const char* tag_as_string(int tag) const {
4266 switch (tag) {
4267 case 0: return "valid";
4268 case 1: return "zero";
4269 case 2: return "special";
4270 case 3: return "empty";
4271 }
4272 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4272); ::breakpoint(); } while (0)
;
4273 return NULL__null;
4274 }
4275
4276 void print() const {
4277 // print computation registers
4278 { int t = _status_word.top();
4279 for (int i = 0; i < number_of_registers; i++) {
4280 int j = (i - t) & register_mask;
4281 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
4282 st(j)->print();
4283 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
4284 }
4285 }
4286 printf("\n");
4287 // print control registers
4288 printf("ctrl = "); _control_word.print(); printf("\n");
4289 printf("stat = "); _status_word .print(); printf("\n");
4290 printf("tags = "); _tag_word .print(); printf("\n");
4291 }
4292
4293};
4294
4295class Flag_Register {
4296 public:
4297 int32_t _value;
4298
4299 bool overflow() const { return ((_value >> 11) & 1) != 0; }
4300 bool direction() const { return ((_value >> 10) & 1) != 0; }
4301 bool sign() const { return ((_value >> 7) & 1) != 0; }
4302 bool zero() const { return ((_value >> 6) & 1) != 0; }
4303 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
4304 bool parity() const { return ((_value >> 2) & 1) != 0; }
4305 bool carry() const { return ((_value >> 0) & 1) != 0; }
4306
4307 void print() const {
4308 // flags
4309 char f[8];
4310 f[0] = (overflow ()) ? 'O' : '-';
4311 f[1] = (direction ()) ? 'D' : '-';
4312 f[2] = (sign ()) ? 'S' : '-';
4313 f[3] = (zero ()) ? 'Z' : '-';
4314 f[4] = (auxiliary_carry()) ? 'A' : '-';
4315 f[5] = (parity ()) ? 'P' : '-';
4316 f[6] = (carry ()) ? 'C' : '-';
4317 f[7] = '\x0';
4318 // output
4319 printf("%08x flags = %s", _value, f);
4320 }
4321
4322};
4323
4324class IU_Register {
4325 public:
4326 int32_t _value;
4327
4328 void print() const {
4329 printf("%08x %11d", _value, _value);
4330 }
4331
4332};
4333
4334class IU_State {
4335 public:
4336 Flag_Register _eflags;
4337 IU_Register _rdi;
4338 IU_Register _rsi;
4339 IU_Register _rbp;
4340 IU_Register _rsp;
4341 IU_Register _rbx;
4342 IU_Register _rdx;
4343 IU_Register _rcx;
4344 IU_Register _rax;
4345
4346 void print() const {
4347 // computation registers
4348 printf("rax, = "); _rax.print(); printf("\n");
4349 printf("rbx, = "); _rbx.print(); printf("\n");
4350 printf("rcx = "); _rcx.print(); printf("\n");
4351 printf("rdx = "); _rdx.print(); printf("\n");
4352 printf("rdi = "); _rdi.print(); printf("\n");
4353 printf("rsi = "); _rsi.print(); printf("\n");
4354 printf("rbp, = "); _rbp.print(); printf("\n");
4355 printf("rsp = "); _rsp.print(); printf("\n");
4356 printf("\n");
4357 // control registers
4358 printf("flgs = "); _eflags.print(); printf("\n");
4359 }
4360};
4361
4362
4363class CPU_State {
4364 public:
4365 FPU_State _fpu_state;
4366 IU_State _iu_state;
4367
4368 void print() const {
4369 printf("--------------------------------------------------\n");
4370 _iu_state .print();
4371 printf("\n");
4372 _fpu_state.print();
4373 printf("--------------------------------------------------\n");
4374 }
4375
4376};
4377
4378
4379static void _print_CPU_state(CPU_State* state) {
4380 state->print();
4381};
4382
4383
4384void MacroAssembler::print_CPU_state() {
4385 push_CPU_state();
4386 push(rsp); // pass CPU state
4387 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)((address)((address_word)(_print_CPU_state)))));
4388 addptr(rsp, wordSize); // discard argument
4389 pop_CPU_state();
4390}
4391
4392
4393#ifndef _LP641
4394static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
4395 static int counter = 0;
4396 FPU_State* fs = &state->_fpu_state;
4397 counter++;
4398 // For leaf calls, only verify that the top few elements remain empty.
4399 // We only need 1 empty at the top for C2 code.
4400 if( stack_depth < 0 ) {
4401 if( fs->tag_for_st(7) != 3 ) {
4402 printf("FPR7 not empty\n");
4403 state->print();
4404 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4404, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4405 return false;
4406 }
4407 return true; // All other stack states do not matter
4408 }
4409
4410 assert((fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std(),do { if (!((fs->_control_word._value & 0xffff) == StubRoutines
::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()"
") failed", "bad FPU control word"); ::breakpoint(); } } while
(0)
4411 "bad FPU control word")do { if (!((fs->_control_word._value & 0xffff) == StubRoutines
::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()"
") failed", "bad FPU control word"); ::breakpoint(); } } while
(0)
;
4412
4413 // compute stack depth
4414 int i = 0;
4415 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
4416 int d = i;
4417 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
4418 // verify findings
4419 if (i != FPU_State::number_of_registers) {
4420 // stack not contiguous
4421 printf("%s: stack not contiguous at ST%d\n", s, i);
4422 state->print();
4423 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4423, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4424 return false;
4425 }
4426 // check if computed stack depth corresponds to expected stack depth
4427 if (stack_depth < 0) {
4428 // expected stack depth is -stack_depth or less
4429 if (d > -stack_depth) {
4430 // too many elements on the stack
4431 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
4432 state->print();
4433 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4433, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4434 return false;
4435 }
4436 } else {
4437 // expected stack depth is stack_depth
4438 if (d != stack_depth) {
4439 // wrong stack depth
4440 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
4441 state->print();
4442 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4442, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4443 return false;
4444 }
4445 }
4446 // everything is cool
4447 return true;
4448}
4449
4450void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
4451 if (!VerifyFPU) return;
4452 push_CPU_state();
4453 push(rsp); // pass CPU state
4454 ExternalAddress msg((address) s);
4455 // pass message string s
4456 pushptr(msg.addr());
4457 push(stack_depth); // pass stack depth
4458 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)((address)((address_word)(_verify_FPU)))));
4459 addptr(rsp, 3 * wordSize); // discard arguments
4460 // check for error
4461 { Label L;
4462 testl(rax, rax);
4463 jcc(Assembler::notZero, L);
4464 int3(); // break if error condition
4465 bind(L);
4466 }
4467 pop_CPU_state();
4468}
4469#endif // _LP64
4470
4471void MacroAssembler::restore_cpu_control_state_after_jni() {
4472 // Either restore the MXCSR register after returning from the JNI Call
4473 // or verify that it wasn't changed (with -Xcheck:jni flag).
4474 if (VM_Version::supports_sse()) {
4475 if (RestoreMXCSROnJNICalls) {
4476 ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()));
4477 } else if (CheckJNICalls) {
4478 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
4479 }
4480 }
4481 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
4482 vzeroupper();
4483 // Reset k1 to 0xffff.
4484
4485#ifdef COMPILER21
4486 if (PostLoopMultiversioning && VM_Version::supports_evex()) {
4487 push(rcx);
4488 movl(rcx, 0xffff);
4489 kmovwl(k1, rcx);
4490 pop(rcx);
4491 }
4492#endif // COMPILER2
4493
4494#ifndef _LP641
4495 // Either restore the x87 floating pointer control word after returning
4496 // from the JNI call or verify that it wasn't changed.
4497 if (CheckJNICalls) {
4498 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry()));
4499 }
4500#endif // _LP64
4501}
4502
4503// ((OopHandle)result).resolve();
4504void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
4505 assert_different_registers(result, tmp);
4506
4507 // Only 64 bit platforms support GCs that require a tmp register
4508 // Only IN_HEAP loads require a thread_tmp register
4509 // OopHandle::resolve is an indirection like jobject.
4510 access_load_at(T_OBJECT, IN_NATIVE,
4511 result, Address(result, 0), tmp, /*tmp_thread*/noreg);
4512}
4513
4514// ((WeakHandle)result).resolve();
4515void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) {
4516 assert_different_registers(rresult, rtmp);
4517 Label resolved;
4518
4519 // A null weak handle resolves to null.
4520 cmpptr(rresult, 0);
4521 jcc(Assembler::equal, resolved);
4522
4523 // Only 64 bit platforms support GCs that require a tmp register
4524 // Only IN_HEAP loads require a thread_tmp register
4525 // WeakHandle::resolve is an indirection like jweak.
4526 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4527 rresult, Address(rresult, 0), rtmp, /*tmp_thread*/noreg);
4528 bind(resolved);
4529}
4530
4531void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
4532 // get mirror
4533 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4534 load_method_holder(mirror, method);
4535 movptr(mirror, Address(mirror, mirror_offset));
4536 resolve_oop_handle(mirror, tmp);
4537}
4538
4539void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4540 load_method_holder(rresult, rmethod);
4541 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4542}
4543
4544void MacroAssembler::load_method_holder(Register holder, Register method) {
4545 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
4546 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4547 movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
4548}
4549
4550void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
4551 assert_different_registers(src, tmp);
4552 assert_different_registers(dst, tmp);
4553#ifdef _LP641
4554 if (UseCompressedClassPointers) {
4555 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4556 decode_klass_not_null(dst, tmp);
4557 } else
4558#endif
4559 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4560}
4561
4562void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
4563 assert_different_registers(src, tmp);
4564 assert_different_registers(dst, tmp);
4565#ifdef _LP641
4566 if (UseCompressedClassPointers) {
4567 encode_klass_not_null(src, tmp);
4568 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4569 } else
4570#endif
4571 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4572}
4573
4574void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
4575 Register tmp1, Register thread_tmp) {
4576 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4577 decorators = AccessInternal::decorator_fixup(decorators);
4578 bool as_raw = (decorators & AS_RAW) != 0;
4579 if (as_raw) {
4580 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4581 } else {
4582 bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4583 }
4584}
4585
4586void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
4587 Register tmp1, Register tmp2) {
4588 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4589 decorators = AccessInternal::decorator_fixup(decorators);
4590 bool as_raw = (decorators & AS_RAW) != 0;
4591 if (as_raw) {
4592 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
4593 } else {
4594 bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
4595 }
4596}
4597
4598void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
4599 Register thread_tmp, DecoratorSet decorators) {
4600 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
4601}
4602
4603// Doesn't do verfication, generates fixed size code
4604void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
4605 Register thread_tmp, DecoratorSet decorators) {
4606 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
4607}
4608
4609void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
4610 Register tmp2, DecoratorSet decorators) {
4611 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
4612}
4613
4614// Used for storing NULLs.
4615void MacroAssembler::store_heap_oop_null(Address dst) {
4616 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
4617}
4618
4619#ifdef _LP641
4620void MacroAssembler::store_klass_gap(Register dst, Register src) {
4621 if (UseCompressedClassPointers) {
4622 // Store to klass gap in destination
4623 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
4624 }
4625}
4626
4627#ifdef ASSERT1
4628void MacroAssembler::verify_heapbase(const char* msg) {
4629 assert (UseCompressedOops, "should be compressed")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4629, "assert(" "UseCompressedOops" ") failed", "should be compressed"
); ::breakpoint(); } } while (0)
;
4630 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4630, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4631 if (CheckCompressedOops) {
4632 Label ok;
4633 push(rscratch1); // cmpptr trashes rscratch1
4634 cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4635 jcc(Assembler::equal, ok);
4636 STOP(msg)block_comment(msg); stop(msg);
4637 bind(ok);
4638 pop(rscratch1);
4639 }
4640}
4641#endif
4642
4643// Algorithm must match oop.inline.hpp encode_heap_oop.
4644void MacroAssembler::encode_heap_oop(Register r) {
4645#ifdef ASSERT1
4646 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4647#endif
4648 verify_oop_msg(r, "broken oop in encode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4648)
;
4649 if (CompressedOops::base() == NULL__null) {
4650 if (CompressedOops::shift() != 0) {
4651 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4651, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4652 shrq(r, LogMinObjAlignmentInBytes);
4653 }
4654 return;
4655 }
4656 testq(r, r);
4657 cmovq(Assembler::equal, r, r12_heapbase);
4658 subq(r, r12_heapbase);
4659 shrq(r, LogMinObjAlignmentInBytes);
4660}
4661
4662void MacroAssembler::encode_heap_oop_not_null(Register r) {
4663#ifdef ASSERT1
4664 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
4665 if (CheckCompressedOops) {
4666 Label ok;
4667 testq(r, r);
4668 jcc(Assembler::notEqual, ok);
4669 STOP("null oop passed to encode_heap_oop_not_null")block_comment("null oop passed to encode_heap_oop_not_null");
stop("null oop passed to encode_heap_oop_not_null")
;
4670 bind(ok);
4671 }
4672#endif
4673 verify_oop_msg(r, "broken oop in encode_heap_oop_not_null")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop_not_null\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4673)
;
4674 if (CompressedOops::base() != NULL__null) {
4675 subq(r, r12_heapbase);
4676 }
4677 if (CompressedOops::shift() != 0) {
4678 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4678, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4679 shrq(r, LogMinObjAlignmentInBytes);
4680 }
4681}
4682
4683void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
4684#ifdef ASSERT1
4685 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
4686 if (CheckCompressedOops) {
4687 Label ok;
4688 testq(src, src);
4689 jcc(Assembler::notEqual, ok);
4690 STOP("null oop passed to encode_heap_oop_not_null2")block_comment("null oop passed to encode_heap_oop_not_null2")
; stop("null oop passed to encode_heap_oop_not_null2")
;
4691 bind(ok);
4692 }
4693#endif
4694 verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2")_verify_oop_checked(src, "broken oop " "src" ", " "\"broken oop in encode_heap_oop_not_null2\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4694)
;
4695 if (dst != src) {
4696 movq(dst, src);
4697 }
4698 if (CompressedOops::base() != NULL__null) {
4699 subq(dst, r12_heapbase);
4700 }
4701 if (CompressedOops::shift() != 0) {
4702 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4702, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4703 shrq(dst, LogMinObjAlignmentInBytes);
4704 }
4705}
4706
4707void MacroAssembler::decode_heap_oop(Register r) {
4708#ifdef ASSERT1
4709 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
4710#endif
4711 if (CompressedOops::base() == NULL__null) {
4712 if (CompressedOops::shift() != 0) {
4713 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4713, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4714 shlq(r, LogMinObjAlignmentInBytes);
4715 }
4716 } else {
4717 Label done;
4718 shlq(r, LogMinObjAlignmentInBytes);
4719 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4719)
;
4720 addq(r, r12_heapbase);
4721 bind(done);
4722 }
4723 verify_oop_msg(r, "broken oop in decode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in decode_heap_oop\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4723)
;
4724}
4725
4726void MacroAssembler::decode_heap_oop_not_null(Register r) {
4727 // Note: it will change flags
4728 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4728, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4729 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4729, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4730 // Cannot assert, unverified entry point counts instructions (see .ad file)
4731 // vtableStubs also counts instructions in pd_code_size_limit.
4732 // Also do not verify_oop as this is called by verify_oop.
4733 if (CompressedOops::shift() != 0) {
4734 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4734, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4735 shlq(r, LogMinObjAlignmentInBytes);
4736 if (CompressedOops::base() != NULL__null) {
4737 addq(r, r12_heapbase);
4738 }
4739 } else {
4740 assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4740, "assert(" "CompressedOops::base() == __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
4741 }
4742}
4743
4744void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
4745 // Note: it will change flags
4746 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4746, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4747 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4747, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4748 // Cannot assert, unverified entry point counts instructions (see .ad file)
4749 // vtableStubs also counts instructions in pd_code_size_limit.
4750 // Also do not verify_oop as this is called by verify_oop.
4751 if (CompressedOops::shift() != 0) {
4752 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4752, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4753 if (LogMinObjAlignmentInBytes == Address::times_8) {
4754 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
4755 } else {
4756 if (dst != src) {
4757 movq(dst, src);
4758 }
4759 shlq(dst, LogMinObjAlignmentInBytes);
4760 if (CompressedOops::base() != NULL__null) {
4761 addq(dst, r12_heapbase);
4762 }
4763 }
4764 } else {
4765 assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4765, "assert(" "CompressedOops::base() == __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
4766 if (dst != src) {
4767 movq(dst, src);
4768 }
4769 }
4770}
4771
4772void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
4773 assert_different_registers(r, tmp);
4774 if (CompressedKlassPointers::base() != NULL__null) {
4775 mov64(tmp, (int64_t)CompressedKlassPointers::base());
4776 subq(r, tmp);
4777 }
4778 if (CompressedKlassPointers::shift() != 0) {
4779 assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4779, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4780 shrq(r, LogKlassAlignmentInBytes);
4781 }
4782}
4783
4784void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) {
4785 assert_different_registers(src, dst);
4786 if (CompressedKlassPointers::base() != NULL__null) {
4787 mov64(dst, -(int64_t)CompressedKlassPointers::base());
4788 addq(dst, src);
4789 } else {
4790 movptr(dst, src);
4791 }
4792 if (CompressedKlassPointers::shift() != 0) {
4793 assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4793, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4794 shrq(dst, LogKlassAlignmentInBytes);
4795 }
4796}
4797
4798void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
4799 assert_different_registers(r, tmp);
4800 // Note: it will change flags
4801 assert(UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4801, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4802 // Cannot assert, unverified entry point counts instructions (see .ad file)
4803 // vtableStubs also counts instructions in pd_code_size_limit.
4804 // Also do not verify_oop as this is called by verify_oop.
4805 if (CompressedKlassPointers::shift() != 0) {
4806 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4806, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4807 shlq(r, LogKlassAlignmentInBytes);
4808 }
4809 if (CompressedKlassPointers::base() != NULL__null) {
4810 mov64(tmp, (int64_t)CompressedKlassPointers::base());
4811 addq(r, tmp);
4812 }
4813}
4814
4815void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) {
4816 assert_different_registers(src, dst);
4817 // Note: it will change flags
4818 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4818, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4819 // Cannot assert, unverified entry point counts instructions (see .ad file)
4820 // vtableStubs also counts instructions in pd_code_size_limit.
4821 // Also do not verify_oop as this is called by verify_oop.
4822
4823 if (CompressedKlassPointers::base() == NULL__null &&
4824 CompressedKlassPointers::shift() == 0) {
4825 // The best case scenario is that there is no base or shift. Then it is already
4826 // a pointer that needs nothing but a register rename.
4827 movl(dst, src);
4828 } else {
4829 if (CompressedKlassPointers::base() != NULL__null) {
4830 mov64(dst, (int64_t)CompressedKlassPointers::base());
4831 } else {
4832 xorq(dst, dst);
4833 }
4834 if (CompressedKlassPointers::shift() != 0) {
4835 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4835, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4836 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?")do { if (!(LogKlassAlignmentInBytes == Address::times_8)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4836, "assert(" "LogKlassAlignmentInBytes == Address::times_8"
") failed", "klass not aligned on 64bits?"); ::breakpoint();
} } while (0)
;
4837 leaq(dst, Address(dst, src, Address::times_8, 0));
4838 } else {
4839 addq(dst, src);
4840 }
4841 }
4842}
4843
4844void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4845 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4845, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4846 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4846, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4847 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4847, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4848 int oop_index = oop_recorder()->find_index(obj);
4849 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4850 mov_narrow_oop(dst, oop_index, rspec);
4851}
4852
4853void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
4854 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4854, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4855 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4855, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4856 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4856, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4857 int oop_index = oop_recorder()->find_index(obj);
4858 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4859 mov_narrow_oop(dst, oop_index, rspec);
4860}
4861
4862void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
4863 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4863, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4864 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4864, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4865 int klass_index = oop_recorder()->find_index(k);
4866 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4867 mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4868}
4869
4870void MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
4871 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4871, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4872 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4872, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4873 int klass_index = oop_recorder()->find_index(k);
4874 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4875 mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4876}
4877
4878void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
4879 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4879, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4880 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4880, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4881 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4881, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4882 int oop_index = oop_recorder()->find_index(obj);
4883 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4884 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
4885}
4886
4887void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
4888 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4888, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4889 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4889, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4890 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4890, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4891 int oop_index = oop_recorder()->find_index(obj);
4892 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4893 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
4894}
4895
4896void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
4897 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4897, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4898 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4898, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4899 int klass_index = oop_recorder()->find_index(k);
4900 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4901 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4902}
4903
4904void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
4905 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4905, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4906 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4906, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4907 int klass_index = oop_recorder()->find_index(k);
4908 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4909 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4910}
4911
4912void MacroAssembler::reinit_heapbase() {
4913 if (UseCompressedOops) {
4914 if (Universe::heap() != NULL__null) {
4915 if (CompressedOops::base() == NULL__null) {
4916 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
4917 } else {
4918 mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
4919 }
4920 } else {
4921 movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4922 }
4923 }
4924}
4925
4926#endif // _LP64
4927
4928// C2 compiled method's prolog code.
4929void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
4930
4931 // WARNING: Initial instruction MUST be 5 bytes or longer so that
4932 // NativeJump::patch_verified_entry will be able to patch out the entry
4933 // code safely. The push to verify stack depth is ok at 5 bytes,
4934 // the frame allocation can be either 3 or 6 bytes. So if we don't do
4935 // stack bang then we must use the 6 byte frame allocation even if
4936 // we have no frame. :-(
4937 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect")do { if (!(stack_bang_size >= framesize || stack_bang_size
<= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4937, "assert(" "stack_bang_size >= framesize || stack_bang_size <= 0"
") failed", "stack bang size incorrect"); ::breakpoint(); } }
while (0)
;
4938
4939 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned")do { if (!((framesize & (StackAlignmentInBytes-1)) == 0))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4939, "assert(" "(framesize & (StackAlignmentInBytes-1)) == 0"
") failed", "frame size not aligned"); ::breakpoint(); } } while
(0)
;
4940 // Remove word for return addr
4941 framesize -= wordSize;
4942 stack_bang_size -= wordSize;
4943
4944 // Calls to C2R adapters often do not accept exceptional returns.
4945 // We require that their callers must bang for them. But be careful, because
4946 // some VM calls (such as call site linkage) can use several kilobytes of
4947 // stack. But the stack safety zone should account for that.
4948 // See bugs 4446381, 4468289, 4497237.
4949 if (stack_bang_size > 0) {
4950 generate_stack_overflow_check(stack_bang_size);
4951
4952 // We always push rbp, so that on return to interpreter rbp, will be
4953 // restored correctly and we can correct the stack.
4954 push(rbp);
4955 // Save caller's stack pointer into RBP if the frame pointer is preserved.
4956 if (PreserveFramePointer) {
4957 mov(rbp, rsp);
4958 }
4959 // Remove word for ebp
4960 framesize -= wordSize;
4961
4962 // Create frame
4963 if (framesize) {
4964 subptr(rsp, framesize);
4965 }
4966 } else {
4967 // Create frame (force generation of a 4 byte immediate value)
4968 subptr_imm32(rsp, framesize);
4969
4970 // Save RBP register now.
4971 framesize -= wordSize;
4972 movptr(Address(rsp, framesize), rbp);
4973 // Save caller's stack pointer into RBP if the frame pointer is preserved.
4974 if (PreserveFramePointer) {
4975 movptr(rbp, rsp);
4976 if (framesize > 0) {
4977 addptr(rbp, framesize);
4978 }
4979 }
4980 }
4981
4982 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
4983 framesize -= wordSize;
4984 movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4985 }
4986
4987#ifndef _LP641
4988 // If method sets FPU control word do it now
4989 if (fp_mode_24b) {
4990 fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
4991 }
4992 if (UseSSE >= 2 && VerifyFPU) {
4993 verify_FPU(0, "FPU stack must be clean on entry");
4994 }
4995#endif
4996
4997#ifdef ASSERT1
4998 if (VerifyStackAtCalls) {
4999 Label L;
5000 push(rax);
5001 mov(rax, rsp);
5002 andptr(rax, StackAlignmentInBytes-1);
5003 cmpptr(rax, StackAlignmentInBytes-wordSize);
5004 pop(rax);
5005 jcc(Assembler::equal, L);
5006 STOP("Stack is not properly aligned!")block_comment("Stack is not properly aligned!"); stop("Stack is not properly aligned!"
)
;
5007 bind(L);
5008 }
5009#endif
5010
5011 if (!is_stub) {
5012 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5013 bs->nmethod_entry_barrier(this);
5014 }
5015}
5016
5017#if COMPILER2_OR_JVMCI1
5018
5019// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5020void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5021 // cnt - number of qwords (8-byte words).
5022 // base - start address, qword aligned.
5023 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5024 bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
5025 if (use64byteVector) {
5026 vpxor(xtmp, xtmp, xtmp, AVX_512bit);
5027 } else if (MaxVectorSize >= 32) {
5028 vpxor(xtmp, xtmp, xtmp, AVX_256bit);
5029 } else {
5030 pxor(xtmp, xtmp);
5031 }
5032 jmp(L_zero_64_bytes);
5033
5034 BIND(L_loop);
5035 if (MaxVectorSize >= 32) {
5036 fill64(base, 0, xtmp, use64byteVector);
5037 } else {
5038 movdqu(Address(base, 0), xtmp);
5039 movdqu(Address(base, 16), xtmp);
5040 movdqu(Address(base, 32), xtmp);
5041 movdqu(Address(base, 48), xtmp);
5042 }
5043 addptr(base, 64);
5044
5045 BIND(L_zero_64_bytes);
5046 subptr(cnt, 8);
5047 jccb(Assembler::greaterEqual, L_loop)jccb_0(Assembler::greaterEqual, L_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5047)
;
5048
5049 // Copy trailing 64 bytes
5050 if (use64byteVector) {
5051 addptr(cnt, 8);
5052 jccb(Assembler::equal, L_end)jccb_0(Assembler::equal, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5052)
;
5053 fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
5054 jmp(L_end);
5055 } else {
5056 addptr(cnt, 4);
5057 jccb(Assembler::less, L_tail)jccb_0(Assembler::less, L_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5057)
;
5058 if (MaxVectorSize >= 32) {
5059 vmovdqu(Address(base, 0), xtmp);
5060 } else {
5061 movdqu(Address(base, 0), xtmp);
5062 movdqu(Address(base, 16), xtmp);
5063 }
5064 }
5065 addptr(base, 32);
5066 subptr(cnt, 4);
5067
5068 BIND(L_tail);
5069 addptr(cnt, 4);
5070 jccb(Assembler::lessEqual, L_end)jccb_0(Assembler::lessEqual, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5070)
;
5071 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5072 fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
5073 } else {
5074 decrement(cnt);
5075
5076 BIND(L_sloop);
5077 movq(Address(base, 0), xtmp);
5078 addptr(base, 8);
5079 decrement(cnt);
5080 jccb(Assembler::greaterEqual, L_sloop)jccb_0(Assembler::greaterEqual, L_sloop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5080)
;
5081 }
5082 BIND(L_end);
5083}
5084
5085// Clearing constant sized memory using YMM/ZMM registers.
5086void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5087 assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "")do { if (!(UseAVX > 2 && VM_Version::supports_avx512vlbw
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5087, "assert(" "UseAVX > 2 && VM_Version::supports_avx512vlbw()"
") failed", ""); ::breakpoint(); } } while (0)
;
5088 bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
5089
5090 int vector64_count = (cnt & (~0x7)) >> 3;
5091 cnt = cnt & 0x7;
5092
5093 // 64 byte initialization loop.
5094 vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
5095 for (int i = 0; i < vector64_count; i++) {
5096 fill64(base, i * 64, xtmp, use64byteVector);
5097 }
5098
5099 // Clear remaining 64 byte tail.
5100 int disp = vector64_count * 64;
5101 if (cnt) {
5102 switch (cnt) {
5103 case 1:
5104 movq(Address(base, disp), xtmp);
5105 break;
5106 case 2:
5107 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_128bit);
5108 break;
5109 case 3:
5110 movl(rtmp, 0x7);
5111 kmovwl(mask, rtmp);
5112 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit);
5113 break;
5114 case 4:
5115 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5116 break;
5117 case 5:
5118 if (use64byteVector) {
5119 movl(rtmp, 0x1F);
5120 kmovwl(mask, rtmp);
5121 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5122 } else {
5123 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5124 movq(Address(base, disp + 32), xtmp);
5125 }
5126 break;
5127 case 6:
5128 if (use64byteVector) {
5129 movl(rtmp, 0x3F);
5130 kmovwl(mask, rtmp);
5131 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5132 } else {
5133 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5134 evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit);
5135 }
5136 break;
5137 case 7:
5138 if (use64byteVector) {
5139 movl(rtmp, 0x7F);
5140 kmovwl(mask, rtmp);
5141 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5142 } else {
5143 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5144 movl(rtmp, 0x7);
5145 kmovwl(mask, rtmp);
5146 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
5147 }
5148 break;
5149 default:
5150 fatal("Unexpected length : %d\n",cnt)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5150, "Unexpected length : %d\n",cnt); ::breakpoint(); } while
(0)
;
5151 break;
5152 }
5153 }
5154}
5155
5156void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
5157 bool is_large, KRegister mask) {
5158 // cnt - number of qwords (8-byte words).
5159 // base - start address, qword aligned.
5160 // is_large - if optimizers know cnt is larger than InitArrayShortSize
5161 assert(base==rdi, "base register must be edi for rep stos")do { if (!(base==rdi)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5161, "assert(" "base==rdi" ") failed", "base register must be edi for rep stos"
); ::breakpoint(); } } while (0)
;
5162 assert(tmp==rax, "tmp register must be eax for rep stos")do { if (!(tmp==rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5162, "assert(" "tmp==rax" ") failed", "tmp register must be eax for rep stos"
); ::breakpoint(); } } while (0)
;
5163 assert(cnt==rcx, "cnt register must be ecx for rep stos")do { if (!(cnt==rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5163, "assert(" "cnt==rcx" ") failed", "cnt register must be ecx for rep stos"
); ::breakpoint(); } } while (0)
;
5164 assert(InitArrayShortSize % BytesPerLong == 0,do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed"
, "InitArrayShortSize should be the multiple of BytesPerLong"
); ::breakpoint(); } } while (0)
5165 "InitArrayShortSize should be the multiple of BytesPerLong")do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed"
, "InitArrayShortSize should be the multiple of BytesPerLong"
); ::breakpoint(); } } while (0)
;
5166
5167 Label DONE;
5168 if (!is_large || !UseXMMForObjInit) {
5169 xorptr(tmp, tmp);
5170 }
5171
5172 if (!is_large) {
5173 Label LOOP, LONG;
5174 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
5175 jccb(Assembler::greater, LONG)jccb_0(Assembler::greater, LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5175)
;
5176
5177 NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5178
5179 decrement(cnt);
5180 jccb(Assembler::negative, DONE)jccb_0(Assembler::negative, DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5180)
; // Zero length
5181
5182 // Use individual pointer-sized stores for small counts:
5183 BIND(LOOP);
5184 movptr(Address(base, cnt, Address::times_ptr), tmp);
5185 decrement(cnt);
5186 jccb(Assembler::greaterEqual, LOOP)jccb_0(Assembler::greaterEqual, LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5186)
;
5187 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5187)
;
5188
5189 BIND(LONG);
5190 }
5191
5192 // Use longer rep-prefixed ops for non-small counts:
5193 if (UseFastStosb) {
5194 shlptr(cnt, 3); // convert to number of bytes
5195 rep_stosb();
5196 } else if (UseXMMForObjInit) {
5197 xmm_clear_mem(base, cnt, tmp, xtmp, mask);
5198 } else {
5199 NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5200 rep_stos();
5201 }
5202
5203 BIND(DONE);
5204}
5205
5206#endif //COMPILER2_OR_JVMCI
5207
5208
5209void MacroAssembler::generate_fill(BasicType t, bool aligned,
5210 Register to, Register value, Register count,
5211 Register rtmp, XMMRegister xtmp) {
5212 ShortBranchVerifier sbv(this);
5213 assert_different_registers(to, value, count, rtmp);
5214 Label L_exit;
5215 Label L_fill_2_bytes, L_fill_4_bytes;
5216
5217#if defined(COMPILER21) && defined(_LP641)
5218 if(MaxVectorSize >=32 &&
5219 VM_Version::supports_avx512vlbw() &&
5220 VM_Version::supports_bmi2()) {
5221 generate_fill_avx3(t, to, value, count, rtmp, xtmp);
5222 return;
5223 }
5224#endif
5225
5226 int shift = -1;
5227 switch (t) {
5228 case T_BYTE:
5229 shift = 2;
5230 break;
5231 case T_SHORT:
5232 shift = 1;
5233 break;
5234 case T_INT:
5235 shift = 0;
5236 break;
5237 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5237); ::breakpoint(); } while (0)
;
5238 }
5239
5240 if (t == T_BYTE) {
5241 andl(value, 0xff);
5242 movl(rtmp, value);
5243 shll(rtmp, 8);
5244 orl(value, rtmp);
5245 }
5246 if (t == T_SHORT) {
5247 andl(value, 0xffff);
5248 }
5249 if (t == T_BYTE || t == T_SHORT) {
5250 movl(rtmp, value);
5251 shll(rtmp, 16);
5252 orl(value, rtmp);
5253 }
5254
5255 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
5256 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
5257 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
5258 Label L_skip_align2;
5259 // align source address at 4 bytes address boundary
5260 if (t == T_BYTE) {
5261 Label L_skip_align1;
5262 // One byte misalignment happens only for byte arrays
5263 testptr(to, 1);
5264 jccb(Assembler::zero, L_skip_align1)jccb_0(Assembler::zero, L_skip_align1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5264)
;
5265 movb(Address(to, 0), value);
5266 increment(to);
5267 decrement(count);
5268 BIND(L_skip_align1);
5269 }
5270 // Two bytes misalignment happens only for byte and short (char) arrays
5271 testptr(to, 2);
5272 jccb(Assembler::zero, L_skip_align2)jccb_0(Assembler::zero, L_skip_align2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5272)
;
5273 movw(Address(to, 0), value);
5274 addptr(to, 2);
5275 subl(count, 1<<(shift-1));
5276 BIND(L_skip_align2);
5277 }
5278 if (UseSSE < 2) {
5279 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
5280 // Fill 32-byte chunks
5281 subl(count, 8 << shift);
5282 jcc(Assembler::less, L_check_fill_8_bytes);
5283 align(16);
5284
5285 BIND(L_fill_32_bytes_loop);
5286
5287 for (int i = 0; i < 32; i += 4) {
5288 movl(Address(to, i), value);
5289 }
5290
5291 addptr(to, 32);
5292 subl(count, 8 << shift);
5293 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
5294 BIND(L_check_fill_8_bytes);
5295 addl(count, 8 << shift);
5296 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5296)
;
5297 jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5297)
;
5298
5299 //
5300 // length is too short, just fill qwords
5301 //
5302 BIND(L_fill_8_bytes_loop);
5303 movl(Address(to, 0), value);
5304 movl(Address(to, 4), value);
5305 addptr(to, 8);
5306 BIND(L_fill_8_bytes);
5307 subl(count, 1 << (shift + 1));
5308 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5309 // fall through to fill 4 bytes
5310 } else {
5311 Label L_fill_32_bytes;
5312 if (!UseUnalignedLoadStores) {
5313 // align to 8 bytes, we know we are 4 byte aligned to start
5314 testptr(to, 4);
5315 jccb(Assembler::zero, L_fill_32_bytes)jccb_0(Assembler::zero, L_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5315)
;
5316 movl(Address(to, 0), value);
5317 addptr(to, 4);
5318 subl(count, 1<<shift);
5319 }
5320 BIND(L_fill_32_bytes);
5321 {
5322 assert( UseSSE >= 2, "supported cpu only" )do { if (!(UseSSE >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5322, "assert(" "UseSSE >= 2" ") failed", "supported cpu only"
); ::breakpoint(); } } while (0)
;
5323 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
5324 movdl(xtmp, value);
5325 if (UseAVX >= 2 && UseUnalignedLoadStores) {
5326 Label L_check_fill_32_bytes;
5327 if (UseAVX > 2) {
5328 // Fill 64-byte chunks
5329 Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;
5330
5331 // If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2
5332 cmpl(count, VM_Version::avx3_threshold());
5333 jccb(Assembler::below, L_check_fill_64_bytes_avx2)jccb_0(Assembler::below, L_check_fill_64_bytes_avx2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5333)
;
5334
5335 vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
5336
5337 subl(count, 16 << shift);
5338 jccb(Assembler::less, L_check_fill_32_bytes)jccb_0(Assembler::less, L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5338)
;
5339 align(16);
5340
5341 BIND(L_fill_64_bytes_loop_avx3);
5342 evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit);
5343 addptr(to, 64);
5344 subl(count, 16 << shift);
5345 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3);
5346 jmpb(L_check_fill_32_bytes)jmpb_0(L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5346)
;
5347
5348 BIND(L_check_fill_64_bytes_avx2);
5349 }
5350 // Fill 64-byte chunks
5351 Label L_fill_64_bytes_loop;
5352 vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit);
5353
5354 subl(count, 16 << shift);
5355 jcc(Assembler::less, L_check_fill_32_bytes);
5356 align(16);
5357
5358 BIND(L_fill_64_bytes_loop);
5359 vmovdqu(Address(to, 0), xtmp);
5360 vmovdqu(Address(to, 32), xtmp);
5361 addptr(to, 64);
5362 subl(count, 16 << shift);
5363 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
5364
5365 BIND(L_check_fill_32_bytes);
5366 addl(count, 8 << shift);
5367 jccb(Assembler::less, L_check_fill_8_bytes)jccb_0(Assembler::less, L_check_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5367)
;
5368 vmovdqu(Address(to, 0), xtmp);
5369 addptr(to, 32);
5370 subl(count, 8 << shift);
5371
5372 BIND(L_check_fill_8_bytes);
5373 // clean upper bits of YMM registers
5374 movdl(xtmp, value);
5375 pshufd(xtmp, xtmp, 0);
5376 } else {
5377 // Fill 32-byte chunks
5378 pshufd(xtmp, xtmp, 0);
5379
5380 subl(count, 8 << shift);
5381 jcc(Assembler::less, L_check_fill_8_bytes);
5382 align(16);
5383
5384 BIND(L_fill_32_bytes_loop);
5385
5386 if (UseUnalignedLoadStores) {
5387 movdqu(Address(to, 0), xtmp);
5388 movdqu(Address(to, 16), xtmp);
5389 } else {
5390 movq(Address(to, 0), xtmp);
5391 movq(Address(to, 8), xtmp);
5392 movq(Address(to, 16), xtmp);
5393 movq(Address(to, 24), xtmp);
5394 }
5395
5396 addptr(to, 32);
5397 subl(count, 8 << shift);
5398 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
5399
5400 BIND(L_check_fill_8_bytes);
5401 }
5402 addl(count, 8 << shift);
5403 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5403)
;
5404 jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5404)
;
5405
5406 //
5407 // length is too short, just fill qwords
5408 //
5409 BIND(L_fill_8_bytes_loop);
5410 movq(Address(to, 0), xtmp);
5411 addptr(to, 8);
5412 BIND(L_fill_8_bytes);
5413 subl(count, 1 << (shift + 1));
5414 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5415 }
5416 }
5417 // fill trailing 4 bytes
5418 BIND(L_fill_4_bytes);
5419 testl(count, 1<<shift);
5420 jccb(Assembler::zero, L_fill_2_bytes)jccb_0(Assembler::zero, L_fill_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5420)
;
5421 movl(Address(to, 0), value);
5422 if (t == T_BYTE || t == T_SHORT) {
5423 Label L_fill_byte;
5424 addptr(to, 4);
5425 BIND(L_fill_2_bytes);
5426 // fill trailing 2 bytes
5427 testl(count, 1<<(shift-1));
5428 jccb(Assembler::zero, L_fill_byte)jccb_0(Assembler::zero, L_fill_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5428)
;
5429 movw(Address(to, 0), value);
5430 if (t == T_BYTE) {
5431 addptr(to, 2);
5432 BIND(L_fill_byte);
5433 // fill trailing byte
5434 testl(count, 1);
5435 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5435)
;
5436 movb(Address(to, 0), value);
5437 } else {
5438 BIND(L_fill_byte);
5439 }
5440 } else {
5441 BIND(L_fill_2_bytes);
5442 }
5443 BIND(L_exit);
5444}
5445
5446void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len) {
5447 switch(type) {
5448 case T_BYTE:
5449 case T_BOOLEAN:
5450 evpbroadcastb(dst, src, vector_len);
5451 break;
5452 case T_SHORT:
5453 case T_CHAR:
5454 evpbroadcastw(dst, src, vector_len);
5455 break;
5456 case T_INT:
5457 case T_FLOAT:
5458 evpbroadcastd(dst, src, vector_len);
5459 break;
5460 case T_LONG:
5461 case T_DOUBLE:
5462 evpbroadcastq(dst, src, vector_len);
5463 break;
5464 default:
5465 fatal("Unhandled type : %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5465, "Unhandled type : %s", type2name(type)); ::breakpoint
(); } while (0)
;
5466 break;
5467 }
5468}
5469
5470// encode char[] to byte[] in ISO_8859_1 or ASCII
5471 //@IntrinsicCandidate
5472 //private static int implEncodeISOArray(byte[] sa, int sp,
5473 //byte[] da, int dp, int len) {
5474 // int i = 0;
5475 // for (; i < len; i++) {
5476 // char c = StringUTF16.getChar(sa, sp++);
5477 // if (c > '\u00FF')
5478 // break;
5479 // da[dp++] = (byte)c;
5480 // }
5481 // return i;
5482 //}
5483 //
5484 //@IntrinsicCandidate
5485 //private static int implEncodeAsciiArray(char[] sa, int sp,
5486 // byte[] da, int dp, int len) {
5487 // int i = 0;
5488 // for (; i < len; i++) {
5489 // char c = sa[sp++];
5490 // if (c >= '\u0080')
5491 // break;
5492 // da[dp++] = (byte)c;
5493 // }
5494 // return i;
5495 //}
5496void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
5497 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
5498 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
5499 Register tmp5, Register result, bool ascii) {
5500
5501 // rsi: src
5502 // rdi: dst
5503 // rdx: len
5504 // rcx: tmp5
5505 // rax: result
5506 ShortBranchVerifier sbv(this);
5507 assert_different_registers(src, dst, len, tmp5, result);
5508 Label L_done, L_copy_1_char, L_copy_1_char_exit;
5509
5510 int mask = ascii ? 0xff80ff80 : 0xff00ff00;
5511 int short_mask = ascii ? 0xff80 : 0xff00;
5512
5513 // set result
5514 xorl(result, result);
5515 // check for zero length
5516 testl(len, len);
5517 jcc(Assembler::zero, L_done);
5518
5519 movl(result, len);
5520
5521 // Setup pointers
5522 lea(src, Address(src, len, Address::times_2)); // char[]
5523 lea(dst, Address(dst, len, Address::times_1)); // byte[]
5524 negptr(len);
5525
5526 if (UseSSE42Intrinsics || UseAVX >= 2) {
5527 Label L_copy_8_chars, L_copy_8_chars_exit;
5528 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
5529
5530 if (UseAVX >= 2) {
5531 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
5532 movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
5533 movdl(tmp1Reg, tmp5);
5534 vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit);
5535 jmp(L_chars_32_check);
5536
5537 bind(L_copy_32_chars);
5538 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
5539 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
5540 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
5541 vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
5542 jccb(Assembler::notZero, L_copy_32_chars_exit)jccb_0(Assembler::notZero, L_copy_32_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5542)
;
5543 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
5544 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
5545 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
5546
5547 bind(L_chars_32_check);
5548 addptr(len, 32);
5549 jcc(Assembler::lessEqual, L_copy_32_chars);
5550
5551 bind(L_copy_32_chars_exit);
5552 subptr(len, 16);
5553 jccb(Assembler::greater, L_copy_16_chars_exit)jccb_0(Assembler::greater, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5553)
;
5554
5555 } else if (UseSSE42Intrinsics) {
5556 movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
5557 movdl(tmp1Reg, tmp5);
5558 pshufd(tmp1Reg, tmp1Reg, 0);
5559 jmpb(L_chars_16_check)jmpb_0(L_chars_16_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5559)
;
5560 }
5561
5562 bind(L_copy_16_chars);
5563 if (UseAVX >= 2) {
5564 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
5565 vptest(tmp2Reg, tmp1Reg);
5566 jcc(Assembler::notZero, L_copy_16_chars_exit);
5567 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1);
5568 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1);
5569 } else {
5570 if (UseAVX > 0) {
5571 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
5572 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
5573 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0);
5574 } else {
5575 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
5576 por(tmp2Reg, tmp3Reg);
5577 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
5578 por(tmp2Reg, tmp4Reg);
5579 }
5580 ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
5581 jccb(Assembler::notZero, L_copy_16_chars_exit)jccb_0(Assembler::notZero, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5581)
;
5582 packuswb(tmp3Reg, tmp4Reg);
5583 }
5584 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg);
5585
5586 bind(L_chars_16_check);
5587 addptr(len, 16);
5588 jcc(Assembler::lessEqual, L_copy_16_chars);
5589
5590 bind(L_copy_16_chars_exit);
5591 if (UseAVX >= 2) {
5592 // clean upper bits of YMM registers
5593 vpxor(tmp2Reg, tmp2Reg);
5594 vpxor(tmp3Reg, tmp3Reg);
5595 vpxor(tmp4Reg, tmp4Reg);
5596 movdl(tmp1Reg, tmp5);
5597 pshufd(tmp1Reg, tmp1Reg, 0);
5598 }
5599 subptr(len, 8);
5600 jccb(Assembler::greater, L_copy_8_chars_exit)jccb_0(Assembler::greater, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5600)
;
5601
5602 bind(L_copy_8_chars);
5603 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));
5604 ptest(tmp3Reg, tmp1Reg);
5605 jccb(Assembler::notZero, L_copy_8_chars_exit)jccb_0(Assembler::notZero, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5605)
;
5606 packuswb(tmp3Reg, tmp1Reg);
5607 movq(Address(dst, len, Address::times_1, -8), tmp3Reg);
5608 addptr(len, 8);
5609 jccb(Assembler::lessEqual, L_copy_8_chars)jccb_0(Assembler::lessEqual, L_copy_8_chars, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5609)
;
5610
5611 bind(L_copy_8_chars_exit);
5612 subptr(len, 8);
5613 jccb(Assembler::zero, L_done)jccb_0(Assembler::zero, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5613)
;
5614 }
5615
5616 bind(L_copy_1_char);
5617 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
5618 testl(tmp5, short_mask); // check if Unicode or non-ASCII char
5619 jccb(Assembler::notZero, L_copy_1_char_exit)jccb_0(Assembler::notZero, L_copy_1_char_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5619)
;
5620 movb(Address(dst, len, Address::times_1, 0), tmp5);
5621 addptr(len, 1);
5622 jccb(Assembler::less, L_copy_1_char)jccb_0(Assembler::less, L_copy_1_char, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5622)
;
5623
5624 bind(L_copy_1_char_exit);
5625 addptr(result, len); // len is negative count of not processed elements
5626
5627 bind(L_done);
5628}
5629
5630#ifdef _LP641
5631/**
5632 * Helper for multiply_to_len().
5633 */
5634void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
5635 addq(dest_lo, src1);
5636 adcq(dest_hi, 0);
5637 addq(dest_lo, src2);
5638 adcq(dest_hi, 0);
5639}
5640
5641/**
5642 * Multiply 64 bit by 64 bit first loop.
5643 */
5644void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
5645 Register y, Register y_idx, Register z,
5646 Register carry, Register product,
5647 Register idx, Register kdx) {
5648 //
5649 // jlong carry, x[], y[], z[];
5650 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5651 // huge_128 product = y[idx] * x[xstart] + carry;
5652 // z[kdx] = (jlong)product;
5653 // carry = (jlong)(product >>> 64);
5654 // }
5655 // z[xstart] = carry;
5656 //
5657
5658 Label L_first_loop, L_first_loop_exit;
5659 Label L_one_x, L_one_y, L_multiply;
5660
5661 decrementl(xstart);
5662 jcc(Assembler::negative, L_one_x);
5663
5664 movq(x_xstart, Address(x, xstart, Address::times_4, 0));
5665 rorq(x_xstart, 32); // convert big-endian to little-endian
5666
5667 bind(L_first_loop);
5668 decrementl(idx);
5669 jcc(Assembler::negative, L_first_loop_exit);
5670 decrementl(idx);
5671 jcc(Assembler::negative, L_one_y);
5672 movq(y_idx, Address(y, idx, Address::times_4, 0));
5673 rorq(y_idx, 32); // convert big-endian to little-endian
5674 bind(L_multiply);
5675 movq(product, x_xstart);
5676 mulq(y_idx); // product(rax) * y_idx -> rdx:rax
5677 addq(product, carry);
5678 adcq(rdx, 0);
5679 subl(kdx, 2);
5680 movl(Address(z, kdx, Address::times_4, 4), product);
5681 shrq(product, 32);
5682 movl(Address(z, kdx, Address::times_4, 0), product);
5683 movq(carry, rdx);
5684 jmp(L_first_loop);
5685
5686 bind(L_one_y);
5687 movl(y_idx, Address(y, 0));
5688 jmp(L_multiply);
5689
5690 bind(L_one_x);
5691 movl(x_xstart, Address(x, 0));
5692 jmp(L_first_loop);
5693
5694 bind(L_first_loop_exit);
5695}
5696
5697/**
5698 * Multiply 64 bit by 64 bit and add 128 bit.
5699 */
5700void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z,
5701 Register yz_idx, Register idx,
5702 Register carry, Register product, int offset) {
5703 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
5704 // z[kdx] = (jlong)product;
5705
5706 movq(yz_idx, Address(y, idx, Address::times_4, offset));
5707 rorq(yz_idx, 32); // convert big-endian to little-endian
5708 movq(product, x_xstart);
5709 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax)
5710 movq(yz_idx, Address(z, idx, Address::times_4, offset));
5711 rorq(yz_idx, 32); // convert big-endian to little-endian
5712
5713 add2_with_carry(rdx, product, carry, yz_idx);
5714
5715 movl(Address(z, idx, Address::times_4, offset+4), product);
5716 shrq(product, 32);
5717 movl(Address(z, idx, Address::times_4, offset), product);
5718
5719}
5720
5721/**
5722 * Multiply 128 bit by 128 bit. Unrolled inner loop.
5723 */
5724void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
5725 Register yz_idx, Register idx, Register jdx,
5726 Register carry, Register product,
5727 Register carry2) {
5728 // jlong carry, x[], y[], z[];
5729 // int kdx = ystart+1;
5730 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5731 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
5732 // z[kdx+idx+1] = (jlong)product;
5733 // jlong carry2 = (jlong)(product >>> 64);
5734 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
5735 // z[kdx+idx] = (jlong)product;
5736 // carry = (jlong)(product >>> 64);
5737 // }
5738 // idx += 2;
5739 // if (idx > 0) {
5740 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
5741 // z[kdx+idx] = (jlong)product;
5742 // carry = (jlong)(product >>> 64);
5743 // }
5744 //
5745
5746 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5747
5748 movl(jdx, idx);
5749 andl(jdx, 0xFFFFFFFC);
5750 shrl(jdx, 2);
5751
5752 bind(L_third_loop);
5753 subl(jdx, 1);
5754 jcc(Assembler::negative, L_third_loop_exit);
5755 subl(idx, 4);
5756
5757 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
5758 movq(carry2, rdx);
5759
5760 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
5761 movq(carry, rdx);
5762 jmp(L_third_loop);
5763
5764 bind (L_third_loop_exit);
5765
5766 andl (idx, 0x3);
5767 jcc(Assembler::zero, L_post_third_loop_done);
5768
5769 Label L_check_1;
5770 subl(idx, 2);
5771 jcc(Assembler::negative, L_check_1);
5772
5773 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
5774 movq(carry, rdx);
5775
5776 bind (L_check_1);
5777 addl (idx, 0x2);
5778 andl (idx, 0x1);
5779 subl(idx, 1);
5780 jcc(Assembler::negative, L_post_third_loop_done);
5781
5782 movl(yz_idx, Address(y, idx, Address::times_4, 0));
5783 movq(product, x_xstart);
5784 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax)
5785 movl(yz_idx, Address(z, idx, Address::times_4, 0));
5786
5787 add2_with_carry(rdx, product, yz_idx, carry);
5788
5789 movl(Address(z, idx, Address::times_4, 0), product);
5790 shrq(product, 32);
5791
5792 shlq(rdx, 32);
5793 orq(product, rdx);
5794 movq(carry, product);
5795
5796 bind(L_post_third_loop_done);
5797}
5798
5799/**
5800 * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop.
5801 *
5802 */
5803void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z,
5804 Register carry, Register carry2,
5805 Register idx, Register jdx,
5806 Register yz_idx1, Register yz_idx2,
5807 Register tmp, Register tmp3, Register tmp4) {
5808 assert(UseBMI2Instructions, "should be used only when BMI2 is available")do { if (!(UseBMI2Instructions)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5808, "assert(" "UseBMI2Instructions" ") failed", "should be used only when BMI2 is available"
); ::breakpoint(); } } while (0)
;
5809
5810 // jlong carry, x[], y[], z[];
5811 // int kdx = ystart+1;
5812 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5813 // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry;
5814 // jlong carry2 = (jlong)(tmp3 >>> 64);
5815 // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2;
5816 // carry = (jlong)(tmp4 >>> 64);
5817 // z[kdx+idx+1] = (jlong)tmp3;
5818 // z[kdx+idx] = (jlong)tmp4;
5819 // }
5820 // idx += 2;
5821 // if (idx > 0) {
5822 // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry;
5823 // z[kdx+idx] = (jlong)yz_idx1;
5824 // carry = (jlong)(yz_idx1 >>> 64);
5825 // }
5826 //
5827
5828 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5829
5830 movl(jdx, idx);
5831 andl(jdx, 0xFFFFFFFC);
5832 shrl(jdx, 2);
5833
5834 bind(L_third_loop);
5835 subl(jdx, 1);
5836 jcc(Assembler::negative, L_third_loop_exit);
5837 subl(idx, 4);
5838
5839 movq(yz_idx1, Address(y, idx, Address::times_4, 8));
5840 rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
5841 movq(yz_idx2, Address(y, idx, Address::times_4, 0));
5842 rorxq(yz_idx2, yz_idx2, 32);
5843
5844 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3
5845 mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp
5846
5847 movq(yz_idx1, Address(z, idx, Address::times_4, 8));
5848 rorxq(yz_idx1, yz_idx1, 32);
5849 movq(yz_idx2, Address(z, idx, Address::times_4, 0));
5850 rorxq(yz_idx2, yz_idx2, 32);
5851
5852 if (VM_Version::supports_adx()) {
5853 adcxq(tmp3, carry);
5854 adoxq(tmp3, yz_idx1);
5855
5856 adcxq(tmp4, tmp);
5857 adoxq(tmp4, yz_idx2);
5858
5859 movl(carry, 0); // does not affect flags
5860 adcxq(carry2, carry);
5861 adoxq(carry2, carry);
5862 } else {
5863 add2_with_carry(tmp4, tmp3, carry, yz_idx1);
5864 add2_with_carry(carry2, tmp4, tmp, yz_idx2);
5865 }
5866 movq(carry, carry2);
5867
5868 movl(Address(z, idx, Address::times_4, 12), tmp3);
5869 shrq(tmp3, 32);
5870 movl(Address(z, idx, Address::times_4, 8), tmp3);
5871
5872 movl(Address(z, idx, Address::times_4, 4), tmp4);
5873 shrq(tmp4, 32);
5874 movl(Address(z, idx, Address::times_4, 0), tmp4);
5875
5876 jmp(L_third_loop);
5877
5878 bind (L_third_loop_exit);
5879
5880 andl (idx, 0x3);
5881 jcc(Assembler::zero, L_post_third_loop_done);
5882
5883 Label L_check_1;
5884 subl(idx, 2);
5885 jcc(Assembler::negative, L_check_1);
5886
5887 movq(yz_idx1, Address(y, idx, Address::times_4, 0));
5888 rorxq(yz_idx1, yz_idx1, 32);
5889 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3
5890 movq(yz_idx2, Address(z, idx, Address::times_4, 0));
5891 rorxq(yz_idx2, yz_idx2, 32);
5892
5893 add2_with_carry(tmp4, tmp3, carry, yz_idx2);
5894
5895 movl(Address(z, idx, Address::times_4, 4), tmp3);
5896 shrq(tmp3, 32);
5897 movl(Address(z, idx, Address::times_4, 0), tmp3);
5898 movq(carry, tmp4);
5899
5900 bind (L_check_1);
5901 addl (idx, 0x2);
5902 andl (idx, 0x1);
5903 subl(idx, 1);
5904 jcc(Assembler::negative, L_post_third_loop_done);
5905 movl(tmp4, Address(y, idx, Address::times_4, 0));
5906 mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3
5907 movl(tmp4, Address(z, idx, Address::times_4, 0));
5908
5909 add2_with_carry(carry2, tmp3, tmp4, carry);
5910
5911 movl(Address(z, idx, Address::times_4, 0), tmp3);
5912 shrq(tmp3, 32);
5913
5914 shlq(carry2, 32);
5915 orq(tmp3, carry2);
5916 movq(carry, tmp3);
5917
5918 bind(L_post_third_loop_done);
5919}
5920
5921/**
5922 * Code for BigInteger::multiplyToLen() instrinsic.
5923 *
5924 * rdi: x
5925 * rax: xlen
5926 * rsi: y
5927 * rcx: ylen
5928 * r8: z
5929 * r11: zlen
5930 * r12: tmp1
5931 * r13: tmp2
5932 * r14: tmp3
5933 * r15: tmp4
5934 * rbx: tmp5
5935 *
5936 */
5937void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
5938 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
5939 ShortBranchVerifier sbv(this);
5940 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx);
5941
5942 push(tmp1);
5943 push(tmp2);
5944 push(tmp3);
5945 push(tmp4);
5946 push(tmp5);
5947
5948 push(xlen);
5949 push(zlen);
5950
5951 const Register idx = tmp1;
5952 const Register kdx = tmp2;
5953 const Register xstart = tmp3;
5954
5955 const Register y_idx = tmp4;
5956 const Register carry = tmp5;
5957 const Register product = xlen;
5958 const Register x_xstart = zlen; // reuse register
5959
5960 // First Loop.
5961 //
5962 // final static long LONG_MASK = 0xffffffffL;
5963 // int xstart = xlen - 1;
5964 // int ystart = ylen - 1;
5965 // long carry = 0;
5966 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5967 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
5968 // z[kdx] = (int)product;
5969 // carry = product >>> 32;
5970 // }
5971 // z[xstart] = (int)carry;
5972 //
5973
5974 movl(idx, ylen); // idx = ylen;
5975 movl(kdx, zlen); // kdx = xlen+ylen;
5976 xorq(carry, carry); // carry = 0;
5977
5978 Label L_done;
5979
5980 movl(xstart, xlen);
5981 decrementl(xstart);
5982 jcc(Assembler::negative, L_done);
5983
5984 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
5985
5986 Label L_second_loop;
5987 testl(kdx, kdx);
5988 jcc(Assembler::zero, L_second_loop);
5989
5990 Label L_carry;
5991 subl(kdx, 1);
5992 jcc(Assembler::zero, L_carry);
5993
5994 movl(Address(z, kdx, Address::times_4, 0), carry);
5995 shrq(carry, 32);
5996 subl(kdx, 1);
5997
5998 bind(L_carry);
5999 movl(Address(z, kdx, Address::times_4, 0), carry);
6000
6001 // Second and third (nested) loops.
6002 //
6003 // for (int i = xstart-1; i >= 0; i--) { // Second loop
6004 // carry = 0;
6005 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
6006 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
6007 // (z[k] & LONG_MASK) + carry;
6008 // z[k] = (int)product;
6009 // carry = product >>> 32;
6010 // }
6011 // z[i] = (int)carry;
6012 // }
6013 //
6014 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
6015
6016 const Register jdx = tmp1;
6017
6018 bind(L_second_loop);
6019 xorl(carry, carry); // carry = 0;
6020 movl(jdx, ylen); // j = ystart+1
6021
6022 subl(xstart, 1); // i = xstart-1;
6023 jcc(Assembler::negative, L_done);
6024
6025 push (z);
6026
6027 Label L_last_x;
6028 lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j
6029 subl(xstart, 1); // i = xstart-1;
6030 jcc(Assembler::negative, L_last_x);
6031
6032 if (UseBMI2Instructions) {
6033 movq(rdx, Address(x, xstart, Address::times_4, 0));
6034 rorxq(rdx, rdx, 32); // convert big-endian to little-endian
6035 } else {
6036 movq(x_xstart, Address(x, xstart, Address::times_4, 0));
6037 rorq(x_xstart, 32); // convert big-endian to little-endian
6038 }
6039
6040 Label L_third_loop_prologue;
6041 bind(L_third_loop_prologue);
6042
6043 push (x);
6044 push (xstart);
6045 push (ylen);
6046
6047
6048 if (UseBMI2Instructions) {
6049 multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4);
6050 } else { // !UseBMI2Instructions
6051 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
6052 }
6053
6054 pop(ylen);
6055 pop(xlen);
6056 pop(x);
6057 pop(z);
6058
6059 movl(tmp3, xlen);
6060 addl(tmp3, 1);
6061 movl(Address(z, tmp3, Address::times_4, 0), carry);
6062 subl(tmp3, 1);
6063 jccb(Assembler::negative, L_done)jccb_0(Assembler::negative, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6063)
;
6064
6065 shrq(carry, 32);
6066 movl(Address(z, tmp3, Address::times_4, 0), carry);
6067 jmp(L_second_loop);
6068
6069 // Next infrequent code is moved outside loops.
6070 bind(L_last_x);
6071 if (UseBMI2Instructions) {
6072 movl(rdx, Address(x, 0));
6073 } else {
6074 movl(x_xstart, Address(x, 0));
6075 }
6076 jmp(L_third_loop_prologue);
6077
6078 bind(L_done);
6079
6080 pop(zlen);
6081 pop(xlen);
6082
6083 pop(tmp5);
6084 pop(tmp4);
6085 pop(tmp3);
6086 pop(tmp2);
6087 pop(tmp1);
6088}
6089
6090void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
6091 Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
6092 assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6092, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 must be enabled."
); ::breakpoint(); } } while (0)
;
6093 Label VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
6094 Label VECTOR8_TAIL, VECTOR4_TAIL;
6095 Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
6096 Label SAME_TILL_END, DONE;
6097 Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
6098
6099 //scale is in rcx in both Win64 and Unix
6100 ShortBranchVerifier sbv(this);
6101
6102 shlq(length);
6103 xorq(result, result);
6104
6105 if ((AVX3Threshold == 0) && (UseAVX > 2) &&
6106 VM_Version::supports_avx512vlbw()) {
6107 Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
6108
6109 cmpq(length, 64);
6110 jcc(Assembler::less, VECTOR32_TAIL);
6111
6112 movq(tmp1, length);
6113 andq(tmp1, 0x3F); // tail count
6114 andq(length, ~(0x3F)); //vector count
6115
6116 bind(VECTOR64_LOOP);
6117 // AVX512 code to compare 64 byte vectors.
6118 evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
6119 evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
6120 kortestql(k7, k7);
6121 jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
6122 addq(result, 64);
6123 subq(length, 64);
6124 jccb(Assembler::notZero, VECTOR64_LOOP)jccb_0(Assembler::notZero, VECTOR64_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6124)
;
6125
6126 //bind(VECTOR64_TAIL);
6127 testq(tmp1, tmp1);
6128 jcc(Assembler::zero, SAME_TILL_END);
6129
6130 //bind(VECTOR64_TAIL);
6131 // AVX512 code to compare upto 63 byte vectors.
6132 mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
6133 shlxq(tmp2, tmp2, tmp1);
6134 notq(tmp2);
6135 kmovql(k3, tmp2);
6136
6137 evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
6138 evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
6139
6140 ktestql(k7, k3);
6141 jcc(Assembler::below, SAME_TILL_END); // not mismatch
6142
6143 bind(VECTOR64_NOT_EQUAL);
6144 kmovql(tmp1, k7);
6145 notq(tmp1);
6146 tzcntq(tmp1, tmp1);
6147 addq(result, tmp1);
6148 shrq(result);
6149 jmp(DONE);
6150 bind(VECTOR32_TAIL);
6151 }
6152
6153 cmpq(length, 8);
6154 jcc(Assembler::equal, VECTOR8_LOOP);
6155 jcc(Assembler::less, VECTOR4_TAIL);
6156
6157 if (UseAVX >= 2) {
6158 Label VECTOR16_TAIL, VECTOR32_LOOP;
6159
6160 cmpq(length, 16);
6161 jcc(Assembler::equal, VECTOR16_LOOP);
6162 jcc(Assembler::less, VECTOR8_LOOP);
6163
6164 cmpq(length, 32);
6165 jccb(Assembler::less, VECTOR16_TAIL)jccb_0(Assembler::less, VECTOR16_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6165)
;
6166
6167 subq(length, 32);
6168 bind(VECTOR32_LOOP);
6169 vmovdqu(rymm0, Address(obja, result));
6170 vmovdqu(rymm1, Address(objb, result));
6171 vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit);
6172 vptest(rymm2, rymm2);
6173 jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
6174 addq(result, 32);
6175 subq(length, 32);
6176 jcc(Assembler::greaterEqual, VECTOR32_LOOP);
6177 addq(length, 32);
6178 jcc(Assembler::equal, SAME_TILL_END);
6179 //falling through if less than 32 bytes left //close the branch here.
6180
6181 bind(VECTOR16_TAIL);
6182 cmpq(length, 16);
6183 jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6183)
;
6184 bind(VECTOR16_LOOP);
6185 movdqu(rymm0, Address(obja, result));
6186 movdqu(rymm1, Address(objb, result));
6187 vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit);
6188 ptest(rymm2, rymm2);
6189 jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
6190 addq(result, 16);
6191 subq(length, 16);
6192 jcc(Assembler::equal, SAME_TILL_END);
6193 //falling through if less than 16 bytes left
6194 } else {//regular intrinsics
6195
6196 cmpq(length, 16);
6197 jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6197)
;
6198
6199 subq(length, 16);
6200 bind(VECTOR16_LOOP);
6201 movdqu(rymm0, Address(obja, result));
6202 movdqu(rymm1, Address(objb, result));
6203 pxor(rymm0, rymm1);
6204 ptest(rymm0, rymm0);
6205 jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
6206 addq(result, 16);
6207 subq(length, 16);
6208 jccb(Assembler::greaterEqual, VECTOR16_LOOP)jccb_0(Assembler::greaterEqual, VECTOR16_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6208)
;
6209 addq(length, 16);
6210 jcc(Assembler::equal, SAME_TILL_END);
6211 //falling through if less than 16 bytes left
6212 }
6213
6214 bind(VECTOR8_TAIL);
6215 cmpq(length, 8);
6216 jccb(Assembler::less, VECTOR4_TAIL)jccb_0(Assembler::less, VECTOR4_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6216)
;
6217 bind(VECTOR8_LOOP);
6218 movq(tmp1, Address(obja, result));
6219 movq(tmp2, Address(objb, result));
6220 xorq(tmp1, tmp2);
6221 testq(tmp1, tmp1);
6222 jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found
6223 addq(result, 8);
6224 subq(length, 8);
6225 jcc(Assembler::equal, SAME_TILL_END);
6226 //falling through if less than 8 bytes left
6227
6228 bind(VECTOR4_TAIL);
6229 cmpq(length, 4);
6230 jccb(Assembler::less, BYTES_TAIL)jccb_0(Assembler::less, BYTES_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6230)
;
6231 bind(VECTOR4_LOOP);
6232 movl(tmp1, Address(obja, result));
6233 xorl(tmp1, Address(objb, result));
6234 testl(tmp1, tmp1);
6235 jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found
6236 addq(result, 4);
6237 subq(length, 4);
6238 jcc(Assembler::equal, SAME_TILL_END);
6239 //falling through if less than 4 bytes left
6240
6241 bind(BYTES_TAIL);
6242 bind(BYTES_LOOP);
6243 load_unsigned_byte(tmp1, Address(obja, result));
6244 load_unsigned_byte(tmp2, Address(objb, result));
6245 xorl(tmp1, tmp2);
6246 testl(tmp1, tmp1);
6247 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6248 decq(length);
6249 jcc(Assembler::zero, SAME_TILL_END);
6250 incq(result);
6251 load_unsigned_byte(tmp1, Address(obja, result));
6252 load_unsigned_byte(tmp2, Address(objb, result));
6253 xorl(tmp1, tmp2);
6254 testl(tmp1, tmp1);
6255 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6256 decq(length);
6257 jcc(Assembler::zero, SAME_TILL_END);
6258 incq(result);
6259 load_unsigned_byte(tmp1, Address(obja, result));
6260 load_unsigned_byte(tmp2, Address(objb, result));
6261 xorl(tmp1, tmp2);
6262 testl(tmp1, tmp1);
6263 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6264 jmp(SAME_TILL_END);
6265
6266 if (UseAVX >= 2) {
6267 bind(VECTOR32_NOT_EQUAL);
6268 vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
6269 vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
6270 vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit);
6271 vpmovmskb(tmp1, rymm0);
6272 bsfq(tmp1, tmp1);
6273 addq(result, tmp1);
6274 shrq(result);
6275 jmp(DONE);
6276 }
6277
6278 bind(VECTOR16_NOT_EQUAL);
6279 if (UseAVX >= 2) {
6280 vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
6281 vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
6282 pxor(rymm0, rymm2);
6283 } else {
6284 pcmpeqb(rymm2, rymm2);
6285 pxor(rymm0, rymm1);
6286 pcmpeqb(rymm0, rymm1);
6287 pxor(rymm0, rymm2);
6288 }
6289 pmovmskb(tmp1, rymm0);
6290 bsfq(tmp1, tmp1);
6291 addq(result, tmp1);
6292 shrq(result);
6293 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6293)
;
6294
6295 bind(VECTOR8_NOT_EQUAL);
6296 bind(VECTOR4_NOT_EQUAL);
6297 bsfq(tmp1, tmp1);
6298 shrq(tmp1, 3);
6299 addq(result, tmp1);
6300 bind(BYTES_NOT_EQUAL);
6301 shrq(result);
6302 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6302)
;
6303
6304 bind(SAME_TILL_END);
6305 mov64(result, -1);
6306
6307 bind(DONE);
6308}
6309
6310//Helper functions for square_to_len()
6311
6312/**
6313 * Store the squares of x[], right shifted one bit (divided by 2) into z[]
6314 * Preserves x and z and modifies rest of the registers.
6315 */
6316void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6317 // Perform square and right shift by 1
6318 // Handle odd xlen case first, then for even xlen do the following
6319 // jlong carry = 0;
6320 // for (int j=0, i=0; j < xlen; j+=2, i+=4) {
6321 // huge_128 product = x[j:j+1] * x[j:j+1];
6322 // z[i:i+1] = (carry << 63) | (jlong)(product >>> 65);
6323 // z[i+2:i+3] = (jlong)(product >>> 1);
6324 // carry = (jlong)product;
6325 // }
6326
6327 xorq(tmp5, tmp5); // carry
6328 xorq(rdxReg, rdxReg);
6329 xorl(tmp1, tmp1); // index for x
6330 xorl(tmp4, tmp4); // index for z
6331
6332 Label L_first_loop, L_first_loop_exit;
6333
6334 testl(xlen, 1);
6335 jccb(Assembler::zero, L_first_loop)jccb_0(Assembler::zero, L_first_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6335)
; //jump if xlen is even
6336
6337 // Square and right shift by 1 the odd element using 32 bit multiply
6338 movl(raxReg, Address(x, tmp1, Address::times_4, 0));
6339 imulq(raxReg, raxReg);
6340 shrq(raxReg, 1);
6341 adcq(tmp5, 0);
6342 movq(Address(z, tmp4, Address::times_4, 0), raxReg);
6343 incrementl(tmp1);
6344 addl(tmp4, 2);
6345
6346 // Square and right shift by 1 the rest using 64 bit multiply
6347 bind(L_first_loop);
6348 cmpptr(tmp1, xlen);
6349 jccb(Assembler::equal, L_first_loop_exit)jccb_0(Assembler::equal, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6349)
;
6350
6351 // Square
6352 movq(raxReg, Address(x, tmp1, Address::times_4, 0));
6353 rorq(raxReg, 32); // convert big-endian to little-endian
6354 mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax
6355
6356 // Right shift by 1 and save carry
6357 shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1
6358 rcrq(rdxReg, 1);
6359 rcrq(raxReg, 1);
6360 adcq(tmp5, 0);
6361
6362 // Store result in z
6363 movq(Address(z, tmp4, Address::times_4, 0), rdxReg);
6364 movq(Address(z, tmp4, Address::times_4, 8), raxReg);
6365
6366 // Update indices for x and z
6367 addl(tmp1, 2);
6368 addl(tmp4, 4);
6369 jmp(L_first_loop);
6370
6371 bind(L_first_loop_exit);
6372}
6373
6374
6375/**
6376 * Perform the following multiply add operation using BMI2 instructions
6377 * carry:sum = sum + op1*op2 + carry
6378 * op2 should be in rdx
6379 * op2 is preserved, all other registers are modified
6380 */
6381void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) {
6382 // assert op2 is rdx
6383 mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1
6384 addq(sum, carry);
6385 adcq(tmp2, 0);
6386 addq(sum, op1);
6387 adcq(tmp2, 0);
6388 movq(carry, tmp2);
6389}
6390
6391/**
6392 * Perform the following multiply add operation:
6393 * carry:sum = sum + op1*op2 + carry
6394 * Preserves op1, op2 and modifies rest of registers
6395 */
6396void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) {
6397 // rdx:rax = op1 * op2
6398 movq(raxReg, op2);
6399 mulq(op1);
6400
6401 // rdx:rax = sum + carry + rdx:rax
6402 addq(sum, carry);
6403 adcq(rdxReg, 0);
6404 addq(sum, raxReg);
6405 adcq(rdxReg, 0);
6406
6407 // carry:sum = rdx:sum
6408 movq(carry, rdxReg);
6409}
6410
6411/**
6412 * Add 64 bit long carry into z[] with carry propogation.
6413 * Preserves z and carry register values and modifies rest of registers.
6414 *
6415 */
6416void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) {
6417 Label L_fourth_loop, L_fourth_loop_exit;
6418
6419 movl(tmp1, 1);
6420 subl(zlen, 2);
6421 addq(Address(z, zlen, Address::times_4, 0), carry);
6422
6423 bind(L_fourth_loop);
6424 jccb(Assembler::carryClear, L_fourth_loop_exit)jccb_0(Assembler::carryClear, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6424)
;
6425 subl(zlen, 2);
6426 jccb(Assembler::negative, L_fourth_loop_exit)jccb_0(Assembler::negative, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6426)
;
6427 addq(Address(z, zlen, Address::times_4, 0), tmp1);
6428 jmp(L_fourth_loop);
6429 bind(L_fourth_loop_exit);
6430}
6431
6432/**
6433 * Shift z[] left by 1 bit.
6434 * Preserves x, len, z and zlen registers and modifies rest of the registers.
6435 *
6436 */
6437void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
6438
6439 Label L_fifth_loop, L_fifth_loop_exit;
6440
6441 // Fifth loop
6442 // Perform primitiveLeftShift(z, zlen, 1)
6443
6444 const Register prev_carry = tmp1;
6445 const Register new_carry = tmp4;
6446 const Register value = tmp2;
6447 const Register zidx = tmp3;
6448
6449 // int zidx, carry;
6450 // long value;
6451 // carry = 0;
6452 // for (zidx = zlen-2; zidx >=0; zidx -= 2) {
6453 // (carry:value) = (z[i] << 1) | carry ;
6454 // z[i] = value;
6455 // }
6456
6457 movl(zidx, zlen);
6458 xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register
6459
6460 bind(L_fifth_loop);
6461 decl(zidx); // Use decl to preserve carry flag
6462 decl(zidx);
6463 jccb(Assembler::negative, L_fifth_loop_exit)jccb_0(Assembler::negative, L_fifth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6463)
;
6464
6465 if (UseBMI2Instructions) {
6466 movq(value, Address(z, zidx, Address::times_4, 0));
6467 rclq(value, 1);
6468 rorxq(value, value, 32);
6469 movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
6470 }
6471 else {
6472 // clear new_carry
6473 xorl(new_carry, new_carry);
6474
6475 // Shift z[i] by 1, or in previous carry and save new carry
6476 movq(value, Address(z, zidx, Address::times_4, 0));
6477 shlq(value, 1);
6478 adcl(new_carry, 0);
6479
6480 orq(value, prev_carry);
6481 rorq(value, 0x20);
6482 movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
6483
6484 // Set previous carry = new carry
6485 movl(prev_carry, new_carry);
6486 }
6487 jmp(L_fifth_loop);
6488
6489 bind(L_fifth_loop_exit);
6490}
6491
6492
6493/**
6494 * Code for BigInteger::squareToLen() intrinsic
6495 *
6496 * rdi: x
6497 * rsi: len
6498 * r8: z
6499 * rcx: zlen
6500 * r12: tmp1
6501 * r13: tmp2
6502 * r14: tmp3
6503 * r15: tmp4
6504 * rbx: tmp5
6505 *
6506 */
6507void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6508
6509 Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, L_last_x, L_multiply;
6510 push(tmp1);
6511 push(tmp2);
6512 push(tmp3);
6513 push(tmp4);
6514 push(tmp5);
6515
6516 // First loop
6517 // Store the squares, right shifted one bit (i.e., divided by 2).
6518 square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg);
6519
6520 // Add in off-diagonal sums.
6521 //
6522 // Second, third (nested) and fourth loops.
6523 // zlen +=2;
6524 // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) {
6525 // carry = 0;
6526 // long op2 = x[xidx:xidx+1];
6527 // for (int j=xidx-2,k=zidx; j >= 0; j-=2) {
6528 // k -= 2;
6529 // long op1 = x[j:j+1];
6530 // long sum = z[k:k+1];
6531 // carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs);
6532 // z[k:k+1] = sum;
6533 // }
6534 // add_one_64(z, k, carry, tmp_regs);
6535 // }
6536
6537 const Register carry = tmp5;
6538 const Register sum = tmp3;
6539 const Register op1 = tmp4;
6540 Register op2 = tmp2;
6541
6542 push(zlen);
6543 push(len);
6544 addl(zlen,2);
6545 bind(L_second_loop);
6546 xorq(carry, carry);
6547 subl(zlen, 4);
6548 subl(len, 2);
6549 push(zlen);
6550 push(len);
6551 cmpl(len, 0);
6552 jccb(Assembler::lessEqual, L_second_loop_exit)jccb_0(Assembler::lessEqual, L_second_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6552)
;
6553
6554 // Multiply an array by one 64 bit long.
6555 if (UseBMI2Instructions) {
6556 op2 = rdxReg;
6557 movq(op2, Address(x, len, Address::times_4, 0));
6558 rorxq(op2, op2, 32);
6559 }
6560 else {
6561 movq(op2, Address(x, len, Address::times_4, 0));
6562 rorq(op2, 32);
6563 }
6564
6565 bind(L_third_loop);
6566 decrementl(len);
6567 jccb(Assembler::negative, L_third_loop_exit)jccb_0(Assembler::negative, L_third_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6567)
;
6568 decrementl(len);
6569 jccb(Assembler::negative, L_last_x)jccb_0(Assembler::negative, L_last_x, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6569)
;
6570
6571 movq(op1, Address(x, len, Address::times_4, 0));
6572 rorq(op1, 32);
6573
6574 bind(L_multiply);
6575 subl(zlen, 2);
6576 movq(sum, Address(z, zlen, Address::times_4, 0));
6577
6578 // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry.
6579 if (UseBMI2Instructions) {
6580 multiply_add_64_bmi2(sum, op1, op2, carry, tmp2);
6581 }
6582 else {
6583 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6584 }
6585
6586 movq(Address(z, zlen, Address::times_4, 0), sum);
6587
6588 jmp(L_third_loop);
6589 bind(L_third_loop_exit);
6590
6591 // Fourth loop
6592 // Add 64 bit long carry into z with carry propogation.
6593 // Uses offsetted zlen.
6594 add_one_64(z, zlen, carry, tmp1);
6595
6596 pop(len);
6597 pop(zlen);
6598 jmp(L_second_loop);
6599
6600 // Next infrequent code is moved outside loops.
6601 bind(L_last_x);
6602 movl(op1, Address(x, 0));
6603 jmp(L_multiply);
6604
6605 bind(L_second_loop_exit);
6606 pop(len);
6607 pop(zlen);
6608 pop(len);
6609 pop(zlen);
6610
6611 // Fifth loop
6612 // Shift z left 1 bit.
6613 lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4);
6614
6615 // z[zlen-1] |= x[len-1] & 1;
6616 movl(tmp3, Address(x, len, Address::times_4, -4));
6617 andl(tmp3, 1);
6618 orl(Address(z, zlen, Address::times_4, -4), tmp3);
6619
6620 pop(tmp5);
6621 pop(tmp4);
6622 pop(tmp3);
6623 pop(tmp2);
6624 pop(tmp1);
6625}
6626
6627/**
6628 * Helper function for mul_add()
6629 * Multiply the in[] by int k and add to out[] starting at offset offs using
6630 * 128 bit by 32 bit multiply and return the carry in tmp5.
6631 * Only quad int aligned length of in[] is operated on in this function.
6632 * k is in rdxReg for BMI2Instructions, for others it is in tmp2.
6633 * This function preserves out, in and k registers.
6634 * len and offset point to the appropriate index in "in" & "out" correspondingly
6635 * tmp5 has the carry.
6636 * other registers are temporary and are modified.
6637 *
6638 */
6639void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in,
6640 Register offset, Register len, Register tmp1, Register tmp2, Register tmp3,
6641 Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6642
6643 Label L_first_loop, L_first_loop_exit;
6644
6645 movl(tmp1, len);
6646 shrl(tmp1, 2);
6647
6648 bind(L_first_loop);
6649 subl(tmp1, 1);
6650 jccb(Assembler::negative, L_first_loop_exit)jccb_0(Assembler::negative, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6650)
;
6651
6652 subl(len, 4);
6653 subl(offset, 4);
6654
6655 Register op2 = tmp2;
6656 const Register sum = tmp3;
6657 const Register op1 = tmp4;
6658 const Register carry = tmp5;
6659
6660 if (UseBMI2Instructions) {
6661 op2 = rdxReg;
6662 }
6663
6664 movq(op1, Address(in, len, Address::times_4, 8));
6665 rorq(op1, 32);
6666 movq(sum, Address(out, offset, Address::times_4, 8));
6667 rorq(sum, 32);
6668 if (UseBMI2Instructions) {
6669 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6670 }
6671 else {
6672 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6673 }
6674 // Store back in big endian from little endian
6675 rorq(sum, 0x20);
6676 movq(Address(out, offset, Address::times_4, 8), sum);
6677
6678 movq(op1, Address(in, len, Address::times_4, 0));
6679 rorq(op1, 32);
6680 movq(sum, Address(out, offset, Address::times_4, 0));
6681 rorq(sum, 32);
6682 if (UseBMI2Instructions) {
6683 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6684 }
6685 else {
6686 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6687 }
6688 // Store back in big endian from little endian
6689 rorq(sum, 0x20);
6690 movq(Address(out, offset, Address::times_4, 0), sum);
6691
6692 jmp(L_first_loop);
6693 bind(L_first_loop_exit);
6694}
6695
6696/**
6697 * Code for BigInteger::mulAdd() intrinsic
6698 *
6699 * rdi: out
6700 * rsi: in
6701 * r11: offs (out.length - offset)
6702 * rcx: len
6703 * r8: k
6704 * r12: tmp1
6705 * r13: tmp2
6706 * r14: tmp3
6707 * r15: tmp4
6708 * rbx: tmp5
6709 * Multiply the in[] by word k and add to out[], return the carry in rax
6710 */
6711void MacroAssembler::mul_add(Register out, Register in, Register offs,
6712 Register len, Register k, Register tmp1, Register tmp2, Register tmp3,
6713 Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6714
6715 Label L_carry, L_last_in, L_done;
6716
6717// carry = 0;
6718// for (int j=len-1; j >= 0; j--) {
6719// long product = (in[j] & LONG_MASK) * kLong +
6720// (out[offs] & LONG_MASK) + carry;
6721// out[offs--] = (int)product;
6722// carry = product >>> 32;
6723// }
6724//
6725 push(tmp1);
6726 push(tmp2);
6727 push(tmp3);
6728 push(tmp4);
6729 push(tmp5);
6730
6731 Register op2 = tmp2;
6732 const Register sum = tmp3;
6733 const Register op1 = tmp4;
6734 const Register carry = tmp5;
6735
6736 if (UseBMI2Instructions) {
6737 op2 = rdxReg;
6738 movl(op2, k);
6739 }
6740 else {
6741 movl(op2, k);
6742 }
6743
6744 xorq(carry, carry);
6745
6746 //First loop
6747
6748 //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply
6749 //The carry is in tmp5
6750 mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
6751
6752 //Multiply the trailing in[] entry using 64 bit by 32 bit, if any
6753 decrementl(len);
6754 jccb(Assembler::negative, L_carry)jccb_0(Assembler::negative, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6754)
;
6755 decrementl(len);
6756 jccb(Assembler::negative, L_last_in)jccb_0(Assembler::negative, L_last_in, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6756)
;
6757
6758 movq(op1, Address(in, len, Address::times_4, 0));
6759 rorq(op1, 32);
6760
6761 subl(offs, 2);
6762 movq(sum, Address(out, offs, Address::times_4, 0));
6763 rorq(sum, 32);
6764
6765 if (UseBMI2Instructions) {
6766 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6767 }
6768 else {
6769 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6770 }
6771
6772 // Store back in big endian from little endian
6773 rorq(sum, 0x20);
6774 movq(Address(out, offs, Address::times_4, 0), sum);
6775
6776 testl(len, len);
6777 jccb(Assembler::zero, L_carry)jccb_0(Assembler::zero, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6777)
;
6778
6779 //Multiply the last in[] entry, if any
6780 bind(L_last_in);
6781 movl(op1, Address(in, 0));
6782 movl(sum, Address(out, offs, Address::times_4, -4));
6783
6784 movl(raxReg, k);
6785 mull(op1); //tmp4 * eax -> edx:eax
6786 addl(sum, carry);
6787 adcl(rdxReg, 0);
6788 addl(sum, raxReg);
6789 adcl(rdxReg, 0);
6790 movl(carry, rdxReg);
6791
6792 movl(Address(out, offs, Address::times_4, -4), sum);
6793
6794 bind(L_carry);
6795 //return tmp5/carry as carry in rax
6796 movl(rax, carry);
6797
6798 bind(L_done);
6799 pop(tmp5);
6800 pop(tmp4);
6801 pop(tmp3);
6802 pop(tmp2);
6803 pop(tmp1);
6804}
6805#endif
6806
6807/**
6808 * Emits code to update CRC-32 with a byte value according to constants in table
6809 *
6810 * @param [in,out]crc Register containing the crc.
6811 * @param [in]val Register containing the byte to fold into the CRC.
6812 * @param [in]table Register containing the table of crc constants.
6813 *
6814 * uint32_t crc;
6815 * val = crc_table[(val ^ crc) & 0xFF];
6816 * crc = val ^ (crc >> 8);
6817 *
6818 */
6819void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
6820 xorl(val, crc);
6821 andl(val, 0xFF);
6822 shrl(crc, 8); // unsigned shift
6823 xorl(crc, Address(table, val, Address::times_4, 0));
6824}
6825
6826/**
6827 * Fold 128-bit data chunk
6828 */
6829void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
6830 if (UseAVX > 0) {
6831 vpclmulhdq(xtmp, xK, xcrc); // [123:64]
6832 vpclmulldq(xcrc, xK, xcrc); // [63:0]
6833 vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */);
6834 pxor(xcrc, xtmp);
6835 } else {
6836 movdqa(xtmp, xcrc);
6837 pclmulhdq(xtmp, xK); // [123:64]
6838 pclmulldq(xcrc, xK); // [63:0]
6839 pxor(xcrc, xtmp);
6840 movdqu(xtmp, Address(buf, offset));
6841 pxor(xcrc, xtmp);
6842 }
6843}
6844
6845void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
6846 if (UseAVX > 0) {
6847 vpclmulhdq(xtmp, xK, xcrc);
6848 vpclmulldq(xcrc, xK, xcrc);
6849 pxor(xcrc, xbuf);
6850 pxor(xcrc, xtmp);
6851 } else {
6852 movdqa(xtmp, xcrc);
6853 pclmulhdq(xtmp, xK);
6854 pclmulldq(xcrc, xK);
6855 pxor(xcrc, xbuf);
6856 pxor(xcrc, xtmp);
6857 }
6858}
6859
6860/**
6861 * 8-bit folds to compute 32-bit CRC
6862 *
6863 * uint64_t xcrc;
6864 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
6865 */
6866void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
6867 movdl(tmp, xcrc);
6868 andl(tmp, 0xFF);
6869 movdl(xtmp, Address(table, tmp, Address::times_4, 0));
6870 psrldq(xcrc, 1); // unsigned shift one byte
6871 pxor(xcrc, xtmp);
6872}
6873
6874/**
6875 * uint32_t crc;
6876 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6877 */
6878void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
6879 movl(tmp, crc);
6880 andl(tmp, 0xFF);
6881 shrl(crc, 8);
6882 xorl(crc, Address(table, tmp, Address::times_4, 0));
6883}
6884
6885/**
6886 * @param crc register containing existing CRC (32-bit)
6887 * @param buf register pointing to input byte buffer (byte*)
6888 * @param len register containing number of bytes
6889 * @param table register that will contain address of CRC table
6890 * @param tmp scratch register
6891 */
6892void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) {
6893 assert_different_registers(crc, buf, len, table, tmp, rax);
6894
6895 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
6896 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
6897
6898 // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
6899 // context for the registers used, where all instructions below are using 128-bit mode
6900 // On EVEX without VL and BW, these instructions will all be AVX.
6901 lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
6902 notl(crc); // ~crc
6903 cmpl(len, 16);
6904 jcc(Assembler::less, L_tail);
6905
6906 // Align buffer to 16 bytes
6907 movl(tmp, buf);
6908 andl(tmp, 0xF);
6909 jccb(Assembler::zero, L_aligned)jccb_0(Assembler::zero, L_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6909)
;
6910 subl(tmp, 16);
6911 addl(len, tmp);
6912
6913 align(4);
6914 BIND(L_align_loop);
6915 movsbl(rax, Address(buf, 0)); // load byte with sign extension
6916 update_byte_crc32(crc, rax, table);
6917 increment(buf);
6918 incrementl(tmp);
6919 jccb(Assembler::less, L_align_loop)jccb_0(Assembler::less, L_align_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6919)
;
6920
6921 BIND(L_aligned);
6922 movl(tmp, len); // save
6923 shrl(len, 4);
6924 jcc(Assembler::zero, L_tail_restore);
6925
6926 // Fold crc into first bytes of vector
6927 movdqa(xmm1, Address(buf, 0));
6928 movdl(rax, xmm1);
6929 xorl(crc, rax);
6930 if (VM_Version::supports_sse4_1()) {
6931 pinsrd(xmm1, crc, 0);
6932 } else {
6933 pinsrw(xmm1, crc, 0);
6934 shrl(crc, 16);
6935 pinsrw(xmm1, crc, 1);
6936 }
6937 addptr(buf, 16);
6938 subl(len, 4); // len > 0
6939 jcc(Assembler::less, L_fold_tail);
6940
6941 movdqa(xmm2, Address(buf, 0));
6942 movdqa(xmm3, Address(buf, 16));
6943 movdqa(xmm4, Address(buf, 32));
6944 addptr(buf, 48);
6945 subl(len, 3);
6946 jcc(Assembler::lessEqual, L_fold_512b);
6947
6948 // Fold total 512 bits of polynomial on each iteration,
6949 // 128 bits per each of 4 parallel streams.
6950 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
6951
6952 align32();
6953 BIND(L_fold_512b_loop);
6954 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
6955 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16);
6956 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32);
6957 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48);
6958 addptr(buf, 64);
6959 subl(len, 4);
6960 jcc(Assembler::greater, L_fold_512b_loop);
6961
6962 // Fold 512 bits to 128 bits.
6963 BIND(L_fold_512b);
6964 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6965 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
6966 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
6967 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
6968
6969 // Fold the rest of 128 bits data chunks
6970 BIND(L_fold_tail);
6971 addl(len, 3);
6972 jccb(Assembler::lessEqual, L_fold_128b)jccb_0(Assembler::lessEqual, L_fold_128b, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6972)
;
6973 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6974
6975 BIND(L_fold_tail_loop);
6976 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
6977 addptr(buf, 16);
6978 decrementl(len);
6979 jccb(Assembler::greater, L_fold_tail_loop)jccb_0(Assembler::greater, L_fold_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6979)
;
6980
6981 // Fold 128 bits in xmm1 down into 32 bits in crc register.
6982 BIND(L_fold_128b);
6983 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
6984 if (UseAVX > 0) {
6985 vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
6986 vpand(xmm3, xmm0, xmm2, 0 /* vector_len */);
6987 vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
6988 } else {
6989 movdqa(xmm2, xmm0);
6990 pclmulqdq(xmm2, xmm1, 0x1);
6991 movdqa(xmm3, xmm0);
6992 pand(xmm3, xmm2);
6993 pclmulqdq(xmm0, xmm3, 0x1);
6994 }
6995 psrldq(xmm1, 8);
6996 psrldq(xmm2, 4);
6997 pxor(xmm0, xmm1);
6998 pxor(xmm0, xmm2);
6999
7000 // 8 8-bit folds to compute 32-bit CRC.
7001 for (int j = 0; j < 4; j++) {
7002 fold_8bit_crc32(xmm0, table, xmm1, rax);
7003 }
7004 movdl(crc, xmm0); // mov 32 bits to general register
7005 for (int j = 0; j < 4; j++) {
7006 fold_8bit_crc32(crc, table, rax);
7007 }
7008
7009 BIND(L_tail_restore);
7010 movl(len, tmp); // restore
7011 BIND(L_tail);
7012 andl(len, 0xf);
7013 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7013)
;
7014
7015 // Fold the rest of bytes
7016 align(4);
7017 BIND(L_tail_loop);
7018 movsbl(rax, Address(buf, 0)); // load byte with sign extension
7019 update_byte_crc32(crc, rax, table);
7020 increment(buf);
7021 decrementl(len);
7022 jccb(Assembler::greater, L_tail_loop)jccb_0(Assembler::greater, L_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7022)
;
7023
7024 BIND(L_exit);
7025 notl(crc); // ~c
7026}
7027
7028#ifdef _LP641
7029// Helper function for AVX 512 CRC32
7030// Fold 512-bit data chunks
7031void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf,
7032 Register pos, int offset) {
7033 evmovdquq(xmm3, Address(buf, pos, Address::times_1, offset), Assembler::AVX_512bit);
7034 evpclmulqdq(xtmp, xcrc, xK, 0x10, Assembler::AVX_512bit); // [123:64]
7035 evpclmulqdq(xmm2, xcrc, xK, 0x01, Assembler::AVX_512bit); // [63:0]
7036 evpxorq(xcrc, xtmp, xmm2, Assembler::AVX_512bit /* vector_len */);
7037 evpxorq(xcrc, xcrc, xmm3, Assembler::AVX_512bit /* vector_len */);
7038}
7039
7040// Helper function for AVX 512 CRC32
7041// Compute CRC32 for < 256B buffers
7042void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register table, Register pos,
7043 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
7044 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup) {
7045
7046 Label L_less_than_32, L_exact_16_left, L_less_than_16_left;
7047 Label L_less_than_8_left, L_less_than_4_left, L_less_than_2_left, L_zero_left;
7048 Label L_only_less_than_4, L_only_less_than_3, L_only_less_than_2;
7049
7050 // check if there is enough buffer to be able to fold 16B at a time
7051 cmpl(len, 32);
7052 jcc(Assembler::less, L_less_than_32);
7053
7054 // if there is, load the constants
7055 movdqu(xmm10, Address(table, 1 * 16)); //rk1 and rk2 in xmm10
7056 movdl(xmm0, crc); // get the initial crc value
7057 movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext
7058 pxor(xmm7, xmm0);
7059
7060 // update the buffer pointer
7061 addl(pos, 16);
7062 //update the counter.subtract 32 instead of 16 to save one instruction from the loop
7063 subl(len, 32);
7064 jmp(L_16B_reduction_loop);
7065
7066 bind(L_less_than_32);
7067 //mov initial crc to the return value. this is necessary for zero - length buffers.
7068 movl(rax, crc);
7069 testl(len, len);
7070 jcc(Assembler::equal, L_cleanup);
7071
7072 movdl(xmm0, crc); //get the initial crc value
7073
7074 cmpl(len, 16);
7075 jcc(Assembler::equal, L_exact_16_left);
7076 jcc(Assembler::less, L_less_than_16_left);
7077
7078 movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext
7079 pxor(xmm7, xmm0); //xor the initial crc value
7080 addl(pos, 16);
7081 subl(len, 16);
7082 movdqu(xmm10, Address(table, 1 * 16)); // rk1 and rk2 in xmm10
7083 jmp(L_get_last_two_xmms);
7084
7085 bind(L_less_than_16_left);
7086 //use stack space to load data less than 16 bytes, zero - out the 16B in memory first.
7087 pxor(xmm1, xmm1);
7088 movptr(tmp1, rsp);
7089 movdqu(Address(tmp1, 0 * 16), xmm1);
7090
7091 cmpl(len, 4);
7092 jcc(Assembler::less, L_only_less_than_4);
7093
7094 //backup the counter value
7095 movl(tmp2, len);
7096 cmpl(len, 8);
7097 jcc(Assembler::less, L_less_than_8_left);
7098
7099 //load 8 Bytes
7100 movq(rax, Address(buf, pos, Address::times_1, 0 * 16));
7101 movq(Address(tmp1, 0 * 16), rax);
7102 addptr(tmp1, 8);
7103 subl(len, 8);
7104 addl(pos, 8);
7105
7106 bind(L_less_than_8_left);
7107 cmpl(len, 4);
7108 jcc(Assembler::less, L_less_than_4_left);
7109
7110 //load 4 Bytes
7111 movl(rax, Address(buf, pos, Address::times_1, 0));
7112 movl(Address(tmp1, 0 * 16), rax);
7113 addptr(tmp1, 4);
7114 subl(len, 4);
7115 addl(pos, 4);
7116
7117 bind(L_less_than_4_left);
7118 cmpl(len, 2);
7119 jcc(Assembler::less, L_less_than_2_left);
7120
7121 // load 2 Bytes
7122 movw(rax, Address(buf, pos, Address::times_1, 0));
7123 movl(Address(tmp1, 0 * 16), rax);
7124 addptr(tmp1, 2);
7125 subl(len, 2);
7126 addl(pos, 2);
7127
7128 bind(L_less_than_2_left);
7129 cmpl(len, 1);
7130 jcc(Assembler::less, L_zero_left);
7131
7132 // load 1 Byte
7133 movb(rax, Address(buf, pos, Address::times_1, 0));
7134 movb(Address(tmp1, 0 * 16), rax);
7135
7136 bind(L_zero_left);
7137 movdqu(xmm7, Address(rsp, 0));
7138 pxor(xmm7, xmm0); //xor the initial crc value
7139
7140 lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr()));
7141 movdqu(xmm0, Address(rax, tmp2));
7142 pshufb(xmm7, xmm0);
7143 jmp(L_128_done);
7144
7145 bind(L_exact_16_left);
7146 movdqu(xmm7, Address(buf, pos, Address::times_1, 0));
7147 pxor(xmm7, xmm0); //xor the initial crc value
7148 jmp(L_128_done);
7149
7150 bind(L_only_less_than_4);
7151 cmpl(len, 3);
7152 jcc(Assembler::less, L_only_less_than_3);
7153
7154 // load 3 Bytes
7155 movb(rax, Address(buf, pos, Address::times_1, 0));
7156 movb(Address(tmp1, 0), rax);
7157
7158 movb(rax, Address(buf, pos, Address::times_1, 1));
7159 movb(Address(tmp1, 1), rax);
7160
7161 movb(rax, Address(buf, pos, Address::times_1, 2));
7162 movb(Address(tmp1, 2), rax);
7163
7164 movdqu(xmm7, Address(rsp, 0));
7165 pxor(xmm7, xmm0); //xor the initial crc value
7166
7167 pslldq(xmm7, 0x5);
7168 jmp(L_barrett);
7169 bind(L_only_less_than_3);
7170 cmpl(len, 2);
7171 jcc(Assembler::less, L_only_less_than_2);
7172
7173 // load 2 Bytes
7174 movb(rax, Address(buf, pos, Address::times_1, 0));
7175 movb(Address(tmp1, 0), rax);
7176
7177 movb(rax, Address(buf, pos, Address::times_1, 1));
7178 movb(Address(tmp1, 1), rax);
7179
7180 movdqu(xmm7, Address(rsp, 0));
7181 pxor(xmm7, xmm0); //xor the initial crc value
7182
7183 pslldq(xmm7, 0x6);
7184 jmp(L_barrett);
7185
7186 bind(L_only_less_than_2);
7187 //load 1 Byte
7188 movb(rax, Address(buf, pos, Address::times_1, 0));
7189 movb(Address(tmp1, 0), rax);
7190
7191 movdqu(xmm7, Address(rsp, 0));
7192 pxor(xmm7, xmm0); //xor the initial crc value
7193
7194 pslldq(xmm7, 0x7);
7195}
7196
7197/**
7198* Compute CRC32 using AVX512 instructions
7199* param crc register containing existing CRC (32-bit)
7200* param buf register pointing to input byte buffer (byte*)
7201* param len register containing number of bytes
7202* param table address of crc or crc32c table
7203* param tmp1 scratch register
7204* param tmp2 scratch register
7205* return rax result register
7206*
7207* This routine is identical for crc32c with the exception of the precomputed constant
7208* table which will be passed as the table argument. The calculation steps are
7209* the same for both variants.
7210*/
7211void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2) {
7212 assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax, r12);
7213
7214 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
7215 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
7216 Label L_less_than_256, L_fold_128_B_loop, L_fold_256_B_loop;
7217 Label L_fold_128_B_register, L_final_reduction_for_128, L_16B_reduction_loop;
7218 Label L_128_done, L_get_last_two_xmms, L_barrett, L_cleanup;
7219
7220 const Register pos = r12;
7221 push(r12);
7222 subptr(rsp, 16 * 2 + 8);
7223
7224 // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
7225 // context for the registers used, where all instructions below are using 128-bit mode
7226 // On EVEX without VL and BW, these instructions will all be AVX.
7227 movl(pos, 0);
7228
7229 // check if smaller than 256B
7230 cmpl(len, 256);
7231 jcc(Assembler::less, L_less_than_256);
7232
7233 // load the initial crc value
7234 movdl(xmm10, crc);
7235
7236 // receive the initial 64B data, xor the initial crc value
7237 evmovdquq(xmm0, Address(buf, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
7238 evmovdquq(xmm4, Address(buf, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
7239 evpxorq(xmm0, xmm0, xmm10, Assembler::AVX_512bit);
7240 evbroadcasti32x4(xmm10, Address(table, 2 * 16), Assembler::AVX_512bit); //zmm10 has rk3 and rk4
7241
7242 subl(len, 256);
7243 cmpl(len, 256);
7244 jcc(Assembler::less, L_fold_128_B_loop);
7245
7246 evmovdquq(xmm7, Address(buf, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
7247 evmovdquq(xmm8, Address(buf, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
7248 evbroadcasti32x4(xmm16, Address(table, 0 * 16), Assembler::AVX_512bit); //zmm16 has rk-1 and rk-2
7249 subl(len, 256);
7250
7251 bind(L_fold_256_B_loop);
7252 addl(pos, 256);
7253 fold512bit_crc32_avx512(xmm0, xmm16, xmm1, buf, pos, 0 * 64);
7254 fold512bit_crc32_avx512(xmm4, xmm16, xmm1, buf, pos, 1 * 64);
7255 fold512bit_crc32_avx512(xmm7, xmm16, xmm1, buf, pos, 2 * 64);
7256 fold512bit_crc32_avx512(xmm8, xmm16, xmm1, buf, pos, 3 * 64);
7257
7258 subl(len, 256);
7259 jcc(Assembler::greaterEqual, L_fold_256_B_loop);
7260
7261 // Fold 256 into 128
7262 addl(pos, 256);
7263 evpclmulqdq(xmm1, xmm0, xmm10, 0x01, Assembler::AVX_512bit);
7264 evpclmulqdq(xmm2, xmm0, xmm10, 0x10, Assembler::AVX_512bit);
7265 vpternlogq(xmm7, 0x96, xmm1, xmm2, Assembler::AVX_512bit); // xor ABC
7266
7267 evpclmulqdq(xmm5, xmm4, xmm10, 0x01, Assembler::AVX_512bit);
7268 evpclmulqdq(xmm6, xmm4, xmm10, 0x10, Assembler::AVX_512bit);
7269 vpternlogq(xmm8, 0x96, xmm5, xmm6, Assembler::AVX_512bit); // xor ABC
7270
7271 evmovdquq(xmm0, xmm7, Assembler::AVX_512bit);
1
Passing null pointer value via 1st parameter 'dst'
2
Calling 'MacroAssembler::evmovdquq'
7272 evmovdquq(xmm4, xmm8, Assembler::AVX_512bit);
7273
7274 addl(len, 128);
7275 jmp(L_fold_128_B_register);
7276
7277 // at this section of the code, there is 128 * x + y(0 <= y<128) bytes of buffer.The fold_128_B_loop
7278 // loop will fold 128B at a time until we have 128 + y Bytes of buffer
7279
7280 // fold 128B at a time.This section of the code folds 8 xmm registers in parallel
7281 bind(L_fold_128_B_loop);
7282 addl(pos, 128);
7283 fold512bit_crc32_avx512(xmm0, xmm10, xmm1, buf, pos, 0 * 64);
7284 fold512bit_crc32_avx512(xmm4, xmm10, xmm1, buf, pos, 1 * 64);
7285
7286 subl(len, 128);
7287 jcc(Assembler::greaterEqual, L_fold_128_B_loop);
7288
7289 addl(pos, 128);
7290
7291 // at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
7292 // the 128B of folded data is in 8 of the xmm registers : xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
7293 bind(L_fold_128_B_register);
7294 evmovdquq(xmm16, Address(table, 5 * 16), Assembler::AVX_512bit); // multiply by rk9-rk16
7295 evmovdquq(xmm11, Address(table, 9 * 16), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0
7296 evpclmulqdq(xmm1, xmm0, xmm16, 0x01, Assembler::AVX_512bit);
7297 evpclmulqdq(xmm2, xmm0, xmm16, 0x10, Assembler::AVX_512bit);
7298 // save last that has no multiplicand
7299 vextracti64x2(xmm7, xmm4, 3);
7300
7301 evpclmulqdq(xmm5, xmm4, xmm11, 0x01, Assembler::AVX_512bit);
7302 evpclmulqdq(xmm6, xmm4, xmm11, 0x10, Assembler::AVX_512bit);
7303 // Needed later in reduction loop
7304 movdqu(xmm10, Address(table, 1 * 16));
7305 vpternlogq(xmm1, 0x96, xmm2, xmm5, Assembler::AVX_512bit); // xor ABC
7306 vpternlogq(xmm1, 0x96, xmm6, xmm7, Assembler::AVX_512bit); // xor ABC
7307
7308 // Swap 1,0,3,2 - 01 00 11 10
7309 evshufi64x2(xmm8, xmm1, xmm1, 0x4e, Assembler::AVX_512bit);
7310 evpxorq(xmm8, xmm8, xmm1, Assembler::AVX_256bit);
7311 vextracti128(xmm5, xmm8, 1);
7312 evpxorq(xmm7, xmm5, xmm8, Assembler::AVX_128bit);
7313
7314 // instead of 128, we add 128 - 16 to the loop counter to save 1 instruction from the loop
7315 // instead of a cmp instruction, we use the negative flag with the jl instruction
7316 addl(len, 128 - 16);
7317 jcc(Assembler::less, L_final_reduction_for_128);
7318
7319 bind(L_16B_reduction_loop);
7320 vpclmulqdq(xmm8, xmm7, xmm10, 0x01);
7321 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7322 vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit);
7323 movdqu(xmm0, Address(buf, pos, Address::times_1, 0 * 16));
7324 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7325 addl(pos, 16);
7326 subl(len, 16);
7327 jcc(Assembler::greaterEqual, L_16B_reduction_loop);
7328
7329 bind(L_final_reduction_for_128);
7330 addl(len, 16);
7331 jcc(Assembler::equal, L_128_done);
7332
7333 bind(L_get_last_two_xmms);
7334 movdqu(xmm2, xmm7);
7335 addl(pos, len);
7336 movdqu(xmm1, Address(buf, pos, Address::times_1, -16));
7337 subl(pos, len);
7338
7339 // get rid of the extra data that was loaded before
7340 // load the shift constant
7341 lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr()));
7342 movdqu(xmm0, Address(rax, len));
7343 addl(rax, len);
7344
7345 vpshufb(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7346 //Change mask to 512
7347 vpxor(xmm0, xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 2 * 16), Assembler::AVX_128bit, tmp2);
7348 vpshufb(xmm2, xmm2, xmm0, Assembler::AVX_128bit);
7349
7350 blendvpb(xmm2, xmm2, xmm1, xmm0, Assembler::AVX_128bit);
7351 vpclmulqdq(xmm8, xmm7, xmm10, 0x01);
7352 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7353 vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit);
7354 vpxor(xmm7, xmm7, xmm2, Assembler::AVX_128bit);
7355
7356 bind(L_128_done);
7357 // compute crc of a 128-bit value
7358 movdqu(xmm10, Address(table, 3 * 16));
7359 movdqu(xmm0, xmm7);
7360
7361 // 64b fold
7362 vpclmulqdq(xmm7, xmm7, xmm10, 0x0);
7363 vpsrldq(xmm0, xmm0, 0x8, Assembler::AVX_128bit);
7364 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7365
7366 // 32b fold
7367 movdqu(xmm0, xmm7);
7368 vpslldq(xmm7, xmm7, 0x4, Assembler::AVX_128bit);
7369 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7370 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7371 jmp(L_barrett);
7372
7373 bind(L_less_than_256);
7374 kernel_crc32_avx512_256B(crc, buf, len, table, pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup);
7375
7376 //barrett reduction
7377 bind(L_barrett);
7378 vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 1 * 16), Assembler::AVX_128bit, tmp2);
7379 movdqu(xmm1, xmm7);
7380 movdqu(xmm2, xmm7);
7381 movdqu(xmm10, Address(table, 4 * 16));
7382
7383 pclmulqdq(xmm7, xmm10, 0x0);
7384 pxor(xmm7, xmm2);
7385 vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr()), Assembler::AVX_128bit, tmp2);
7386 movdqu(xmm2, xmm7);
7387 pclmulqdq(xmm7, xmm10, 0x10);
7388 pxor(xmm7, xmm2);
7389 pxor(xmm7, xmm1);
7390 pextrd(crc, xmm7, 2);
7391
7392 bind(L_cleanup);
7393 addptr(rsp, 16 * 2 + 8);
7394 pop(r12);
7395}
7396
7397// S. Gueron / Information Processing Letters 112 (2012) 184
7398// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
7399// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
7400// Output: the 64-bit carry-less product of B * CONST
7401void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
7402 Register tmp1, Register tmp2, Register tmp3) {
7403 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
7404 if (n > 0) {
7405 addq(tmp3, n * 256 * 8);
7406 }
7407 // Q1 = TABLEExt[n][B & 0xFF];
7408 movl(tmp1, in);
7409 andl(tmp1, 0x000000FF);
7410 shll(tmp1, 3);
7411 addq(tmp1, tmp3);
7412 movq(tmp1, Address(tmp1, 0));
7413
7414 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
7415 movl(tmp2, in);
7416 shrl(tmp2, 8);
7417 andl(tmp2, 0x000000FF);
7418 shll(tmp2, 3);
7419 addq(tmp2, tmp3);
7420 movq(tmp2, Address(tmp2, 0));
7421
7422 shlq(tmp2, 8);
7423 xorq(tmp1, tmp2);
7424
7425 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
7426 movl(tmp2, in);
7427 shrl(tmp2, 16);
7428 andl(tmp2, 0x000000FF);
7429 shll(tmp2, 3);
7430 addq(tmp2, tmp3);
7431 movq(tmp2, Address(tmp2, 0));
7432
7433 shlq(tmp2, 16);
7434 xorq(tmp1, tmp2);
7435
7436 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
7437 shrl(in, 24);
7438 andl(in, 0x000000FF);
7439 shll(in, 3);
7440 addq(in, tmp3);
7441 movq(in, Address(in, 0));
7442
7443 shlq(in, 24);
7444 xorq(in, tmp1);
7445 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
7446}
7447
7448void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
7449 Register in_out,
7450 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
7451 XMMRegister w_xtmp2,
7452 Register tmp1,
7453 Register n_tmp2, Register n_tmp3) {
7454 if (is_pclmulqdq_supported) {
7455 movdl(w_xtmp1, in_out); // modified blindly
7456
7457 movl(tmp1, const_or_pre_comp_const_index);
7458 movdl(w_xtmp2, tmp1);
7459 pclmulqdq(w_xtmp1, w_xtmp2, 0);
7460
7461 movdq(in_out, w_xtmp1);
7462 } else {
7463 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
7464 }
7465}
7466
7467// Recombination Alternative 2: No bit-reflections
7468// T1 = (CRC_A * U1) << 1
7469// T2 = (CRC_B * U2) << 1
7470// C1 = T1 >> 32
7471// C2 = T2 >> 32
7472// T1 = T1 & 0xFFFFFFFF
7473// T2 = T2 & 0xFFFFFFFF
7474// T1 = CRC32(0, T1)
7475// T2 = CRC32(0, T2)
7476// C1 = C1 ^ T1
7477// C2 = C2 ^ T2
7478// CRC = C1 ^ C2 ^ CRC_C
7479void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
7480 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7481 Register tmp1, Register tmp2,
7482 Register n_tmp3) {
7483 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7484 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7485 shlq(in_out, 1);
7486 movl(tmp1, in_out);
7487 shrq(in_out, 32);
7488 xorl(tmp2, tmp2);
7489 crc32(tmp2, tmp1, 4);
7490 xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
7491 shlq(in1, 1);
7492 movl(tmp1, in1);
7493 shrq(in1, 32);
7494 xorl(tmp2, tmp2);
7495 crc32(tmp2, tmp1, 4);
7496 xorl(in1, tmp2);
7497 xorl(in_out, in1);
7498 xorl(in_out, in2);
7499}
7500
7501// Set N to predefined value
7502// Subtract from a lenght of a buffer
7503// execute in a loop:
7504// CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
7505// for i = 1 to N do
7506// CRC_A = CRC32(CRC_A, A[i])
7507// CRC_B = CRC32(CRC_B, B[i])
7508// CRC_C = CRC32(CRC_C, C[i])
7509// end for
7510// Recombine
7511void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
7512 Register in_out1, Register in_out2, Register in_out3,
7513 Register tmp1, Register tmp2, Register tmp3,
7514 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7515 Register tmp4, Register tmp5,
7516 Register n_tmp6) {
7517 Label L_processPartitions;
7518 Label L_processPartition;
7519 Label L_exit;
7520
7521 bind(L_processPartitions);
7522 cmpl(in_out1, 3 * size);
7523 jcc(Assembler::less, L_exit);
7524 xorl(tmp1, tmp1);
7525 xorl(tmp2, tmp2);
7526 movq(tmp3, in_out2);
7527 addq(tmp3, size);
7528
7529 bind(L_processPartition);
7530 crc32(in_out3, Address(in_out2, 0), 8);
7531 crc32(tmp1, Address(in_out2, size), 8);
7532 crc32(tmp2, Address(in_out2, size * 2), 8);
7533 addq(in_out2, 8);
7534 cmpq(in_out2, tmp3);
7535 jcc(Assembler::less, L_processPartition);
7536 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
7537 w_xtmp1, w_xtmp2, w_xtmp3,
7538 tmp4, tmp5,
7539 n_tmp6);
7540 addq(in_out2, 2 * size);
7541 subl(in_out1, 3 * size);
7542 jmp(L_processPartitions);
7543
7544 bind(L_exit);
7545}
7546#else
7547void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
7548 Register tmp1, Register tmp2, Register tmp3,
7549 XMMRegister xtmp1, XMMRegister xtmp2) {
7550 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
7551 if (n > 0) {
7552 addl(tmp3, n * 256 * 8);
7553 }
7554 // Q1 = TABLEExt[n][B & 0xFF];
7555 movl(tmp1, in_out);
7556 andl(tmp1, 0x000000FF);
7557 shll(tmp1, 3);
7558 addl(tmp1, tmp3);
7559 movq(xtmp1, Address(tmp1, 0));
7560
7561 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
7562 movl(tmp2, in_out);
7563 shrl(tmp2, 8);
7564 andl(tmp2, 0x000000FF);
7565 shll(tmp2, 3);
7566 addl(tmp2, tmp3);
7567 movq(xtmp2, Address(tmp2, 0));
7568
7569 psllq(xtmp2, 8);
7570 pxor(xtmp1, xtmp2);
7571
7572 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
7573 movl(tmp2, in_out);
7574 shrl(tmp2, 16);
7575 andl(tmp2, 0x000000FF);
7576 shll(tmp2, 3);
7577 addl(tmp2, tmp3);
7578 movq(xtmp2, Address(tmp2, 0));
7579
7580 psllq(xtmp2, 16);
7581 pxor(xtmp1, xtmp2);
7582
7583 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
7584 shrl(in_out, 24);
7585 andl(in_out, 0x000000FF);
7586 shll(in_out, 3);
7587 addl(in_out, tmp3);
7588 movq(xtmp2, Address(in_out, 0));
7589
7590 psllq(xtmp2, 24);
7591 pxor(xtmp1, xtmp2); // Result in CXMM
7592 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
7593}
7594
7595void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
7596 Register in_out,
7597 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
7598 XMMRegister w_xtmp2,
7599 Register tmp1,
7600 Register n_tmp2, Register n_tmp3) {
7601 if (is_pclmulqdq_supported) {
7602 movdl(w_xtmp1, in_out);
7603
7604 movl(tmp1, const_or_pre_comp_const_index);
7605 movdl(w_xtmp2, tmp1);
7606 pclmulqdq(w_xtmp1, w_xtmp2, 0);
7607 // Keep result in XMM since GPR is 32 bit in length
7608 } else {
7609 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
7610 }
7611}
7612
7613void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
7614 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7615 Register tmp1, Register tmp2,
7616 Register n_tmp3) {
7617 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7618 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7619
7620 psllq(w_xtmp1, 1);
7621 movdl(tmp1, w_xtmp1);
7622 psrlq(w_xtmp1, 32);
7623 movdl(in_out, w_xtmp1);
7624
7625 xorl(tmp2, tmp2);
7626 crc32(tmp2, tmp1, 4);
7627 xorl(in_out, tmp2);
7628
7629 psllq(w_xtmp2, 1);
7630 movdl(tmp1, w_xtmp2);
7631 psrlq(w_xtmp2, 32);
7632 movdl(in1, w_xtmp2);
7633
7634 xorl(tmp2, tmp2);
7635 crc32(tmp2, tmp1, 4);
7636 xorl(in1, tmp2);
7637 xorl(in_out, in1);
7638 xorl(in_out, in2);
7639}
7640
7641void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
7642 Register in_out1, Register in_out2, Register in_out3,
7643 Register tmp1, Register tmp2, Register tmp3,
7644 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7645 Register tmp4, Register tmp5,
7646 Register n_tmp6) {
7647 Label L_processPartitions;
7648 Label L_processPartition;
7649 Label L_exit;
7650
7651 bind(L_processPartitions);
7652 cmpl(in_out1, 3 * size);
7653 jcc(Assembler::less, L_exit);
7654 xorl(tmp1, tmp1);
7655 xorl(tmp2, tmp2);
7656 movl(tmp3, in_out2);
7657 addl(tmp3, size);
7658
7659 bind(L_processPartition);
7660 crc32(in_out3, Address(in_out2, 0), 4);
7661 crc32(tmp1, Address(in_out2, size), 4);
7662 crc32(tmp2, Address(in_out2, size*2), 4);
7663 crc32(in_out3, Address(in_out2, 0+4), 4);
7664 crc32(tmp1, Address(in_out2, size+4), 4);
7665 crc32(tmp2, Address(in_out2, size*2+4), 4);
7666 addl(in_out2, 8);
7667 cmpl(in_out2, tmp3);
7668 jcc(Assembler::less, L_processPartition);
7669
7670 push(tmp3);
7671 push(in_out1);
7672 push(in_out2);
7673 tmp4 = tmp3;
7674 tmp5 = in_out1;
7675 n_tmp6 = in_out2;
7676
7677 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
7678 w_xtmp1, w_xtmp2, w_xtmp3,
7679 tmp4, tmp5,
7680 n_tmp6);
7681
7682 pop(in_out2);
7683 pop(in_out1);
7684 pop(tmp3);
7685
7686 addl(in_out2, 2 * size);
7687 subl(in_out1, 3 * size);
7688 jmp(L_processPartitions);
7689
7690 bind(L_exit);
7691}
7692#endif //LP64
7693
7694#ifdef _LP641
7695// Algorithm 2: Pipelined usage of the CRC32 instruction.
7696// Input: A buffer I of L bytes.
7697// Output: the CRC32C value of the buffer.
7698// Notations:
7699// Write L = 24N + r, with N = floor (L/24).
7700// r = L mod 24 (0 <= r < 24).
7701// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
7702// N quadwords, and R consists of r bytes.
7703// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
7704// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
7705// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
7706// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
7707void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
7708 Register tmp1, Register tmp2, Register tmp3,
7709 Register tmp4, Register tmp5, Register tmp6,
7710 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7711 bool is_pclmulqdq_supported) {
7712 uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
7713 Label L_wordByWord;
7714 Label L_byteByByteProlog;
7715 Label L_byteByByte;
7716 Label L_exit;
7717
7718 if (is_pclmulqdq_supported ) {
7719 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
7720 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
7721
7722 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
7723 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
7724
7725 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
7726 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
7727 assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"")do { if (!((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7727, "assert(" "(CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5"
") failed", "Checking whether you declared all of the constants based on the number of \"chunks\""
); ::breakpoint(); } } while (0)
;
7728 } else {
7729 const_or_pre_comp_const_index[0] = 1;
7730 const_or_pre_comp_const_index[1] = 0;
7731
7732 const_or_pre_comp_const_index[2] = 3;
7733 const_or_pre_comp_const_index[3] = 2;
7734
7735 const_or_pre_comp_const_index[4] = 5;
7736 const_or_pre_comp_const_index[5] = 4;
7737 }
7738 crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
7739 in2, in1, in_out,
7740 tmp1, tmp2, tmp3,
7741 w_xtmp1, w_xtmp2, w_xtmp3,
7742 tmp4, tmp5,
7743 tmp6);
7744 crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
7745 in2, in1, in_out,
7746 tmp1, tmp2, tmp3,
7747 w_xtmp1, w_xtmp2, w_xtmp3,
7748 tmp4, tmp5,
7749 tmp6);
7750 crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
7751 in2, in1, in_out,
7752 tmp1, tmp2, tmp3,
7753 w_xtmp1, w_xtmp2, w_xtmp3,
7754 tmp4, tmp5,
7755 tmp6);
7756 movl(tmp1, in2);
7757 andl(tmp1, 0x00000007);
7758 negl(tmp1);
7759 addl(tmp1, in2);
7760 addq(tmp1, in1);
7761
7762 BIND(L_wordByWord);
7763 cmpq(in1, tmp1);
7764 jcc(Assembler::greaterEqual, L_byteByByteProlog);
7765 crc32(in_out, Address(in1, 0), 4);
7766 addq(in1, 4);
7767 jmp(L_wordByWord);
7768
7769 BIND(L_byteByByteProlog);
7770 andl(in2, 0x00000007);
7771 movl(tmp2, 1);
7772
7773 BIND(L_byteByByte);
7774 cmpl(tmp2, in2);
7775 jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7775)
;
7776 crc32(in_out, Address(in1, 0), 1);
7777 incq(in1);
7778 incl(tmp2);
7779 jmp(L_byteByByte);
7780
7781 BIND(L_exit);
7782}
7783#else
7784void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
7785 Register tmp1, Register tmp2, Register tmp3,
7786 Register tmp4, Register tmp5, Register tmp6,
7787 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7788 bool is_pclmulqdq_supported) {
7789 uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
7790 Label L_wordByWord;
7791 Label L_byteByByteProlog;
7792 Label L_byteByByte;
7793 Label L_exit;
7794
7795 if (is_pclmulqdq_supported) {
7796 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
7797 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
7798
7799 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
7800 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
7801
7802 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
7803 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
7804 } else {
7805 const_or_pre_comp_const_index[0] = 1;
7806 const_or_pre_comp_const_index[1] = 0;
7807
7808 const_or_pre_comp_const_index[2] = 3;
7809 const_or_pre_comp_const_index[3] = 2;
7810
7811 const_or_pre_comp_const_index[4] = 5;
7812 const_or_pre_comp_const_index[5] = 4;
7813 }
7814 crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
7815 in2, in1, in_out,
7816 tmp1, tmp2, tmp3,
7817 w_xtmp1, w_xtmp2, w_xtmp3,
7818 tmp4, tmp5,
7819 tmp6);
7820 crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
7821 in2, in1, in_out,
7822 tmp1, tmp2, tmp3,
7823 w_xtmp1, w_xtmp2, w_xtmp3,
7824 tmp4, tmp5,
7825 tmp6);
7826 crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
7827 in2, in1, in_out,
7828 tmp1, tmp2, tmp3,
7829 w_xtmp1, w_xtmp2, w_xtmp3,
7830 tmp4, tmp5,
7831 tmp6);
7832 movl(tmp1, in2);
7833 andl(tmp1, 0x00000007);
7834 negl(tmp1);
7835 addl(tmp1, in2);
7836 addl(tmp1, in1);
7837
7838 BIND(L_wordByWord);
7839 cmpl(in1, tmp1);
7840 jcc(Assembler::greaterEqual, L_byteByByteProlog);
7841 crc32(in_out, Address(in1,0), 4);
7842 addl(in1, 4);
7843 jmp(L_wordByWord);
7844
7845 BIND(L_byteByByteProlog);
7846 andl(in2, 0x00000007);
7847 movl(tmp2, 1);
7848
7849 BIND(L_byteByByte);
7850 cmpl(tmp2, in2);
7851 jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7851)
;
7852 movb(tmp1, Address(in1, 0));
7853 crc32(in_out, tmp1, 1);
7854 incl(in1);
7855 incl(tmp2);
7856 jmp(L_byteByByte);
7857
7858 BIND(L_exit);
7859}
7860#endif // LP64
7861#undef BIND
7862#undef BLOCK_COMMENT
7863
7864// Compress char[] array to byte[].
7865// ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
7866// @IntrinsicCandidate
7867// private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
7868// for (int i = 0; i < len; i++) {
7869// int c = src[srcOff++];
7870// if (c >>> 8 != 0) {
7871// return 0;
7872// }
7873// dst[dstOff++] = (byte)c;
7874// }
7875// return len;
7876// }
7877void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
7878 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
7879 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
7880 Register tmp5, Register result, KRegister mask1, KRegister mask2) {
7881 Label copy_chars_loop, return_length, return_zero, done;
7882
7883 // rsi: src
7884 // rdi: dst
7885 // rdx: len
7886 // rcx: tmp5
7887 // rax: result
7888
7889 // rsi holds start addr of source char[] to be compressed
7890 // rdi holds start addr of destination byte[]
7891 // rdx holds length
7892
7893 assert(len != result, "")do { if (!(len != result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7893, "assert(" "len != result" ") failed", ""); ::breakpoint
(); } } while (0)
;
7894
7895 // save length for return
7896 push(len);
7897
7898 if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
7899 VM_Version::supports_avx512vlbw() &&
7900 VM_Version::supports_bmi2()) {
7901
7902 Label copy_32_loop, copy_loop_tail, below_threshold;
7903
7904 // alignment
7905 Label post_alignment;
7906
7907 // if length of the string is less than 16, handle it in an old fashioned way
7908 testl(len, -32);
7909 jcc(Assembler::zero, below_threshold);
7910
7911 // First check whether a character is compressable ( <= 0xFF).
7912 // Create mask to test for Unicode chars inside zmm vector
7913 movl(result, 0x00FF);
7914 evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7915
7916 testl(len, -64);
7917 jcc(Assembler::zero, post_alignment);
7918
7919 movl(tmp5, dst);
7920 andl(tmp5, (32 - 1));
7921 negl(tmp5);
7922 andl(tmp5, (32 - 1));
7923
7924 // bail out when there is nothing to be done
7925 testl(tmp5, 0xFFFFFFFF);
7926 jcc(Assembler::zero, post_alignment);
7927
7928 // ~(~0 << len), where len is the # of remaining elements to process
7929 movl(result, 0xFFFFFFFF);
7930 shlxl(result, result, tmp5);
7931 notl(result);
7932 kmovdl(mask2, result);
7933
7934 evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7935 evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
7936 ktestd(mask1, mask2);
7937 jcc(Assembler::carryClear, return_zero);
7938
7939 evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
7940
7941 addptr(src, tmp5);
7942 addptr(src, tmp5);
7943 addptr(dst, tmp5);
7944 subl(len, tmp5);
7945
7946 bind(post_alignment);
7947 // end of alignment
7948
7949 movl(tmp5, len);
7950 andl(tmp5, (32 - 1)); // tail count (in chars)
7951 andl(len, ~(32 - 1)); // vector count (in chars)
7952 jcc(Assembler::zero, copy_loop_tail);
7953
7954 lea(src, Address(src, len, Address::times_2));
7955 lea(dst, Address(dst, len, Address::times_1));
7956 negptr(len);
7957
7958 bind(copy_32_loop);
7959 evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
7960 evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7961 kortestdl(mask1, mask1);
7962 jcc(Assembler::carryClear, return_zero);
7963
7964 // All elements in current processed chunk are valid candidates for
7965 // compression. Write a truncated byte elements to the memory.
7966 evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7967 addptr(len, 32);
7968 jcc(Assembler::notZero, copy_32_loop);
7969
7970 bind(copy_loop_tail);
7971 // bail out when there is nothing to be done
7972 testl(tmp5, 0xFFFFFFFF);
7973 jcc(Assembler::zero, return_length);
7974
7975 movl(len, tmp5);
7976
7977 // ~(~0 << len), where len is the # of remaining elements to process
7978 movl(result, 0xFFFFFFFF);
7979 shlxl(result, result, len);
7980 notl(result);
7981
7982 kmovdl(mask2, result);
7983
7984 evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7985 evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
7986 ktestd(mask1, mask2);
7987 jcc(Assembler::carryClear, return_zero);
7988
7989 evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
7990 jmp(return_length);
7991
7992 bind(below_threshold);
7993 }
7994
7995 if (UseSSE42Intrinsics) {
7996 Label copy_32_loop, copy_16, copy_tail;
7997
7998 movl(result, len);
7999
8000 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
8001
8002 // vectored compression
8003 andl(len, 0xfffffff0); // vector count (in chars)
8004 andl(result, 0x0000000f); // tail count (in chars)
8005 testl(len, len);
8006 jcc(Assembler::zero, copy_16);
8007
8008 // compress 16 chars per iter
8009 movdl(tmp1Reg, tmp5);
8010 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
8011 pxor(tmp4Reg, tmp4Reg);
8012
8013 lea(src, Address(src, len, Address::times_2));
8014 lea(dst, Address(dst, len, Address::times_1));
8015 negptr(len);
8016
8017 bind(copy_32_loop);
8018 movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters
8019 por(tmp4Reg, tmp2Reg);
8020 movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
8021 por(tmp4Reg, tmp3Reg);
8022 ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
8023 jcc(Assembler::notZero, return_zero);
8024 packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
8025 movdqu(Address(dst, len, Address::times_1), tmp2Reg);
8026 addptr(len, 16);
8027 jcc(Assembler::notZero, copy_32_loop);
8028
8029 // compress next vector of 8 chars (if any)
8030 bind(copy_16);
8031 movl(len, result);
8032 andl(len, 0xfffffff8); // vector count (in chars)
8033 andl(result, 0x00000007); // tail count (in chars)
8034 testl(len, len);
8035 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8035)
;
8036
8037 movdl(tmp1Reg, tmp5);
8038 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
8039 pxor(tmp3Reg, tmp3Reg);
8040
8041 movdqu(tmp2Reg, Address(src, 0));
8042 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
8043 jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8043)
;
8044 packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
8045 movq(Address(dst, 0), tmp2Reg);
8046 addptr(src, 16);
8047 addptr(dst, 8);
8048
8049 bind(copy_tail);
8050 movl(len, result);
8051 }
8052 // compress 1 char per iter
8053 testl(len, len);
8054 jccb(Assembler::zero, return_length)jccb_0(Assembler::zero, return_length, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8054)
;
8055 lea(src, Address(src, len, Address::times_2));
8056 lea(dst, Address(dst, len, Address::times_1));
8057 negptr(len);
8058
8059 bind(copy_chars_loop);
8060 load_unsigned_short(result, Address(src, len, Address::times_2));
8061 testl(result, 0xff00); // check if Unicode char
8062 jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8062)
;
8063 movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
8064 increment(len);
8065 jcc(Assembler::notZero, copy_chars_loop);
8066
8067 // if compression succeeded, return length
8068 bind(return_length);
8069 pop(result);
8070 jmpb(done)jmpb_0(done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8070)
;
8071
8072 // if compression failed, return 0
8073 bind(return_zero);
8074 xorl(result, result);
8075 addptr(rsp, wordSize);
8076
8077 bind(done);
8078}
8079
8080// Inflate byte[] array to char[].
8081// ..\jdk\src\java.base\share\classes\java\lang\StringLatin1.java
8082// @IntrinsicCandidate
8083// private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
8084// for (int i = 0; i < len; i++) {
8085// dst[dstOff++] = (char)(src[srcOff++] & 0xff);
8086// }
8087// }
8088void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
8089 XMMRegister tmp1, Register tmp2, KRegister mask) {
8090 Label copy_chars_loop, done, below_threshold, avx3_threshold;
8091 // rsi: src
8092 // rdi: dst
8093 // rdx: len
8094 // rcx: tmp2
8095
8096 // rsi holds start addr of source byte[] to be inflated
8097 // rdi holds start addr of destination char[]
8098 // rdx holds length
8099 assert_different_registers(src, dst, len, tmp2);
8100 movl(tmp2, len);
8101 if ((UseAVX > 2) && // AVX512
8102 VM_Version::supports_avx512vlbw() &&
8103 VM_Version::supports_bmi2()) {
8104
8105 Label copy_32_loop, copy_tail;
8106 Register tmp3_aliased = len;
8107
8108 // if length of the string is less than 16, handle it in an old fashioned way
8109 testl(len, -16);
8110 jcc(Assembler::zero, below_threshold);
8111
8112 testl(len, -1 * AVX3Threshold);
8113 jcc(Assembler::zero, avx3_threshold);
8114
8115 // In order to use only one arithmetic operation for the main loop we use
8116 // this pre-calculation
8117 andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
8118 andl(len, -32); // vector count
8119 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8119)
;
8120
8121 lea(src, Address(src, len, Address::times_1));
8122 lea(dst, Address(dst, len, Address::times_2));
8123 negptr(len);
8124
8125
8126 // inflate 32 chars per iter
8127 bind(copy_32_loop);
8128 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
8129 evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
8130 addptr(len, 32);
8131 jcc(Assembler::notZero, copy_32_loop);
8132
8133 bind(copy_tail);
8134 // bail out when there is nothing to be done
8135 testl(tmp2, -1); // we don't destroy the contents of tmp2 here
8136 jcc(Assembler::zero, done);
8137
8138 // ~(~0 << length), where length is the # of remaining elements to process
8139 movl(tmp3_aliased, -1);
8140 shlxl(tmp3_aliased, tmp3_aliased, tmp2);
8141 notl(tmp3_aliased);
8142 kmovdl(mask, tmp3_aliased);
8143 evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit);
8144 evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit);
8145
8146 jmp(done);
8147 bind(avx3_threshold);
8148 }
8149 if (UseSSE42Intrinsics) {
8150 Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
8151
8152 if (UseAVX > 1) {
8153 andl(tmp2, (16 - 1));
8154 andl(len, -16);
8155 jccb(Assembler::zero, copy_new_tail)jccb_0(Assembler::zero, copy_new_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8155)
;
8156 } else {
8157 andl(tmp2, 0x00000007); // tail count (in chars)
8158 andl(len, 0xfffffff8); // vector count (in chars)
8159 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8159)
;
8160 }
8161
8162 // vectored inflation
8163 lea(src, Address(src, len, Address::times_1));
8164 lea(dst, Address(dst, len, Address::times_2));
8165 negptr(len);
8166
8167 if (UseAVX > 1) {
8168 bind(copy_16_loop);
8169 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_256bit);
8170 vmovdqu(Address(dst, len, Address::times_2), tmp1);
8171 addptr(len, 16);
8172 jcc(Assembler::notZero, copy_16_loop);
8173
8174 bind(below_threshold);
8175 bind(copy_new_tail);
8176 movl(len, tmp2);
8177 andl(tmp2, 0x00000007);
8178 andl(len, 0xFFFFFFF8);
8179 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8179)
;
8180
8181 pmovzxbw(tmp1, Address(src, 0));
8182 movdqu(Address(dst, 0), tmp1);
8183 addptr(src, 8);
8184 addptr(dst, 2 * 8);
8185
8186 jmp(copy_tail, true);
8187 }
8188
8189 // inflate 8 chars per iter
8190 bind(copy_8_loop);
8191 pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words
8192 movdqu(Address(dst, len, Address::times_2), tmp1);
8193 addptr(len, 8);
8194 jcc(Assembler::notZero, copy_8_loop);
8195
8196 bind(copy_tail);
8197 movl(len, tmp2);
8198
8199 cmpl(len, 4);
8200 jccb(Assembler::less, copy_bytes)jccb_0(Assembler::less, copy_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8200)
;
8201
8202 movdl(tmp1, Address(src, 0)); // load 4 byte chars
8203 pmovzxbw(tmp1, tmp1);
8204 movq(Address(dst, 0), tmp1);
8205 subptr(len, 4);
8206 addptr(src, 4);
8207 addptr(dst, 8);
8208
8209 bind(copy_bytes);
8210 } else {
8211 bind(below_threshold);
8212 }
8213
8214 testl(len, len);
8215 jccb(Assembler::zero, done)jccb_0(Assembler::zero, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8215)
;
8216 lea(src, Address(src, len, Address::times_1));
8217 lea(dst, Address(dst, len, Address::times_2));
8218 negptr(len);
8219
8220 // inflate 1 char per iter
8221 bind(copy_chars_loop);
8222 load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char
8223 movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word
8224 increment(len);
8225 jcc(Assembler::notZero, copy_chars_loop);
8226
8227 bind(done);
8228}
8229
8230
8231void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
8232 switch(type) {
8233 case T_BYTE:
8234 case T_BOOLEAN:
8235 evmovdqub(dst, kmask, src, false, vector_len);
8236 break;
8237 case T_CHAR:
8238 case T_SHORT:
8239 evmovdquw(dst, kmask, src, false, vector_len);
8240 break;
8241 case T_INT:
8242 case T_FLOAT:
8243 evmovdqul(dst, kmask, src, false, vector_len);
8244 break;
8245 case T_LONG:
8246 case T_DOUBLE:
8247 evmovdquq(dst, kmask, src, false, vector_len);
8248 break;
8249 default:
8250 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8250, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8251 break;
8252 }
8253}
8254
8255void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
8256 switch(type) {
8257 case T_BYTE:
8258 case T_BOOLEAN:
8259 evmovdqub(dst, kmask, src, true, vector_len);
8260 break;
8261 case T_CHAR:
8262 case T_SHORT:
8263 evmovdquw(dst, kmask, src, true, vector_len);
8264 break;
8265 case T_INT:
8266 case T_FLOAT:
8267 evmovdqul(dst, kmask, src, true, vector_len);
8268 break;
8269 case T_LONG:
8270 case T_DOUBLE:
8271 evmovdquq(dst, kmask, src, true, vector_len);
8272 break;
8273 default:
8274 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8274, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8275 break;
8276 }
8277}
8278
8279void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) {
8280 switch(masklen) {
8281 case 2:
8282 knotbl(dst, src);
8283 movl(rtmp, 3);
8284 kmovbl(ktmp, rtmp);
8285 kandbl(dst, ktmp, dst);
8286 break;
8287 case 4:
8288 knotbl(dst, src);
8289 movl(rtmp, 15);
8290 kmovbl(ktmp, rtmp);
8291 kandbl(dst, ktmp, dst);
8292 break;
8293 case 8:
8294 knotbl(dst, src);
8295 break;
8296 case 16:
8297 knotwl(dst, src);
8298 break;
8299 case 32:
8300 knotdl(dst, src);
8301 break;
8302 case 64:
8303 knotql(dst, src);
8304 break;
8305 default:
8306 fatal("Unexpected vector length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8306, "Unexpected vector length %d", masklen); ::breakpoint
(); } while (0)
;
8307 break;
8308 }
8309}
8310
8311void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8312 switch(type) {
8313 case T_BOOLEAN:
8314 case T_BYTE:
8315 kandbl(dst, src1, src2);
8316 break;
8317 case T_CHAR:
8318 case T_SHORT:
8319 kandwl(dst, src1, src2);
8320 break;
8321 case T_INT:
8322 case T_FLOAT:
8323 kanddl(dst, src1, src2);
8324 break;
8325 case T_LONG:
8326 case T_DOUBLE:
8327 kandql(dst, src1, src2);
8328 break;
8329 default:
8330 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8330, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8331 break;
8332 }
8333}
8334
8335void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8336 switch(type) {
8337 case T_BOOLEAN:
8338 case T_BYTE:
8339 korbl(dst, src1, src2);
8340 break;
8341 case T_CHAR:
8342 case T_SHORT:
8343 korwl(dst, src1, src2);
8344 break;
8345 case T_INT:
8346 case T_FLOAT:
8347 kordl(dst, src1, src2);
8348 break;
8349 case T_LONG:
8350 case T_DOUBLE:
8351 korql(dst, src1, src2);
8352 break;
8353 default:
8354 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8354, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8355 break;
8356 }
8357}
8358
8359void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8360 switch(type) {
8361 case T_BOOLEAN:
8362 case T_BYTE:
8363 kxorbl(dst, src1, src2);
8364 break;
8365 case T_CHAR:
8366 case T_SHORT:
8367 kxorwl(dst, src1, src2);
8368 break;
8369 case T_INT:
8370 case T_FLOAT:
8371 kxordl(dst, src1, src2);
8372 break;
8373 case T_LONG:
8374 case T_DOUBLE:
8375 kxorql(dst, src1, src2);
8376 break;
8377 default:
8378 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8378, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8379 break;
8380 }
8381}
8382
8383void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8384 switch(type) {
8385 case T_BOOLEAN:
8386 case T_BYTE:
8387 evpermb(dst, mask, nds, src, merge, vector_len); break;
8388 case T_CHAR:
8389 case T_SHORT:
8390 evpermw(dst, mask, nds, src, merge, vector_len); break;
8391 case T_INT:
8392 case T_FLOAT:
8393 evpermd(dst, mask, nds, src, merge, vector_len); break;
8394 case T_LONG:
8395 case T_DOUBLE:
8396 evpermq(dst, mask, nds, src, merge, vector_len); break;
8397 default:
8398 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8398, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8399 }
8400}
8401
8402void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8403 switch(type) {
8404 case T_BOOLEAN:
8405 case T_BYTE:
8406 evpermb(dst, mask, nds, src, merge, vector_len); break;
8407 case T_CHAR:
8408 case T_SHORT:
8409 evpermw(dst, mask, nds, src, merge, vector_len); break;
8410 case T_INT:
8411 case T_FLOAT:
8412 evpermd(dst, mask, nds, src, merge, vector_len); break;
8413 case T_LONG:
8414 case T_DOUBLE:
8415 evpermq(dst, mask, nds, src, merge, vector_len); break;
8416 default:
8417 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8417, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8418 }
8419}
8420
8421void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8422 switch(type) {
8423 case T_BYTE:
8424 evpminsb(dst, mask, nds, src, merge, vector_len); break;
8425 case T_SHORT:
8426 evpminsw(dst, mask, nds, src, merge, vector_len); break;
8427 case T_INT:
8428 evpminsd(dst, mask, nds, src, merge, vector_len); break;
8429 case T_LONG:
8430 evpminsq(dst, mask, nds, src, merge, vector_len); break;
8431 default:
8432 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8432, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8433 }
8434}
8435
8436void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8437 switch(type) {
8438 case T_BYTE:
8439 evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
8440 case T_SHORT:
8441 evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
8442 case T_INT:
8443 evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
8444 case T_LONG:
8445 evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8446 default:
8447 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8447, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8448 }
8449}
8450
8451void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8452 switch(type) {
8453 case T_BYTE:
8454 evpminsb(dst, mask, nds, src, merge, vector_len); break;
8455 case T_SHORT:
8456 evpminsw(dst, mask, nds, src, merge, vector_len); break;
8457 case T_INT:
8458 evpminsd(dst, mask, nds, src, merge, vector_len); break;
8459 case T_LONG:
8460 evpminsq(dst, mask, nds, src, merge, vector_len); break;
8461 default:
8462 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8462, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8463 }
8464}
8465
8466void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8467 switch(type) {
8468 case T_BYTE:
8469 evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
8470 case T_SHORT:
8471 evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
8472 case T_INT:
8473 evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
8474 case T_LONG:
8475 evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8476 default:
8477 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8477, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8478 }
8479}
8480
8481void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8482 switch(type) {
8483 case T_INT:
8484 evpxord(dst, mask, nds, src, merge, vector_len); break;
8485 case T_LONG:
8486 evpxorq(dst, mask, nds, src, merge, vector_len); break;
8487 default:
8488 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8488, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8489 }
8490}
8491
8492void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8493 switch(type) {
8494 case T_INT:
8495 evpxord(dst, mask, nds, src, merge, vector_len); break;
8496 case T_LONG:
8497 evpxorq(dst, mask, nds, src, merge, vector_len); break;
8498 default:
8499 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8499, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8500 }
8501}
8502
8503void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8504 switch(type) {
8505 case T_INT:
8506 Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
8507 case T_LONG:
8508 evporq(dst, mask, nds, src, merge, vector_len); break;
8509 default:
8510 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8510, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8511 }
8512}
8513
8514void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8515 switch(type) {
8516 case T_INT:
8517 Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
8518 case T_LONG:
8519 evporq(dst, mask, nds, src, merge, vector_len); break;
8520 default:
8521 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8521, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8522 }
8523}
8524
8525void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8526 switch(type) {
8527 case T_INT:
8528 evpandd(dst, mask, nds, src, merge, vector_len); break;
8529 case T_LONG:
8530 evpandq(dst, mask, nds, src, merge, vector_len); break;
8531 default:
8532 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8532, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8533 }
8534}
8535
8536void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8537 switch(type) {
8538 case T_INT:
8539 evpandd(dst, mask, nds, src, merge, vector_len); break;
8540 case T_LONG:
8541 evpandq(dst, mask, nds, src, merge, vector_len); break;
8542 default:
8543 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8543, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8544 }
8545}
8546
8547void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) {
8548 masklen = masklen < 8 ? 8 : masklen;
8549 ktest(masklen, src1, src2);
8550 setb(Assembler::notZero, dst);
8551 movzbl(dst, dst);
8552}
8553
8554void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) {
8555 if (masklen < 8) {
8556 knotbl(kscratch, src2);
8557 kortestbl(src1, kscratch);
8558 setb(Assembler::carrySet, dst);
8559 movzbl(dst, dst);
8560 } else {
8561 ktest(masklen, src1, src2);
8562 setb(Assembler::carrySet, dst);
8563 movzbl(dst, dst);
8564 }
8565}
8566
8567void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
8568 switch(masklen) {
8569 case 8:
8570 kortestbl(src1, src2);
8571 break;
8572 case 16:
8573 kortestwl(src1, src2);
8574 break;
8575 case 32:
8576 kortestdl(src1, src2);
8577 break;
8578 case 64:
8579 kortestql(src1, src2);
8580 break;
8581 default:
8582 fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8582, "Unexpected mask length %d", masklen); ::breakpoint()
; } while (0)
;
8583 break;
8584 }
8585}
8586
8587
8588void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) {
8589 switch(masklen) {
8590 case 8:
8591 ktestbl(src1, src2);
8592 break;
8593 case 16:
8594 ktestwl(src1, src2);
8595 break;
8596 case 32:
8597 ktestdl(src1, src2);
8598 break;
8599 case 64:
8600 ktestql(src1, src2);
8601 break;
8602 default:
8603 fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8603, "Unexpected mask length %d", masklen); ::breakpoint()
; } while (0)
;
8604 break;
8605 }
8606}
8607
8608void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
8609 switch(type) {
8610 case T_INT:
8611 evprold(dst, mask, src, shift, merge, vlen_enc); break;
8612 case T_LONG:
8613 evprolq(dst, mask, src, shift, merge, vlen_enc); break;
8614 default:
8615 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8615, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8616 break;
8617 }
8618}
8619
8620void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
8621 switch(type) {
8622 case T_INT:
8623 evprord(dst, mask, src, shift, merge, vlen_enc); break;
8624 case T_LONG:
8625 evprorq(dst, mask, src, shift, merge, vlen_enc); break;
8626 default:
8627 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8627, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8628 }
8629}
8630
8631void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
8632 switch(type) {
8633 case T_INT:
8634 evprolvd(dst, mask, src1, src2, merge, vlen_enc); break;
8635 case T_LONG:
8636 evprolvq(dst, mask, src1, src2, merge, vlen_enc); break;
8637 default:
8638 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8638, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8639 }
8640}
8641
8642void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
8643 switch(type) {
8644 case T_INT:
8645 evprorvd(dst, mask, src1, src2, merge, vlen_enc); break;
8646 case T_LONG:
8647 evprorvq(dst, mask, src1, src2, merge, vlen_enc); break;
8648 default:
8649 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8649, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8650 }
8651}
8652#if COMPILER2_OR_JVMCI1
8653
8654void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
8655 Register length, Register temp, int vec_enc) {
8656 // Computing mask for predicated vector store.
8657 movptr(temp, -1);
8658 bzhiq(temp, temp, length);
8659 kmov(mask, temp);
8660 evmovdqu(bt, mask, dst, xmm, vec_enc);
8661}
8662
8663// Set memory operation for length "less than" 64 bytes.
8664void MacroAssembler::fill64_masked(uint shift, Register dst, int disp,
8665 XMMRegister xmm, KRegister mask, Register length,
8666 Register temp, bool use64byteVector) {
8667 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8667, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8668 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
8669 if (!use64byteVector) {
8670 fill32(dst, disp, xmm);
8671 subptr(length, 32 >> shift);
8672 fill32_masked(shift, dst, disp + 32, xmm, mask, length, temp);
8673 } else {
8674 assert(MaxVectorSize == 64, "vector length != 64")do { if (!(MaxVectorSize == 64)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8674, "assert(" "MaxVectorSize == 64" ") failed", "vector length != 64"
); ::breakpoint(); } } while (0)
;
8675 fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_512bit);
8676 }
8677}
8678
8679
8680void MacroAssembler::fill32_masked(uint shift, Register dst, int disp,
8681 XMMRegister xmm, KRegister mask, Register length,
8682 Register temp) {
8683 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8683, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8684 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
8685 fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_256bit);
8686}
8687
8688
8689void MacroAssembler::fill32(Register dst, int disp, XMMRegister xmm) {
8690 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8690, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8691 vmovdqu(Address(dst, disp), xmm);
8692}
8693
8694void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64byteVector) {
8695 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8695, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8696 BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG};
8697 if (!use64byteVector) {
8698 fill32(dst, disp, xmm);
8699 fill32(dst, disp + 32, xmm);
8700 } else {
8701 evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit);
8702 }
8703}
8704
8705#ifdef _LP641
8706void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value,
8707 Register count, Register rtmp, XMMRegister xtmp) {
8708 Label L_exit;
8709 Label L_fill_start;
8710 Label L_fill_64_bytes;
8711 Label L_fill_96_bytes;
8712 Label L_fill_128_bytes;
8713 Label L_fill_128_bytes_loop;
8714 Label L_fill_128_loop_header;
8715 Label L_fill_128_bytes_loop_header;
8716 Label L_fill_128_bytes_loop_pre_header;
8717 Label L_fill_zmm_sequence;
8718
8719 int shift = -1;
8720 int avx3threshold = VM_Version::avx3_threshold();
8721 switch(type) {
8722 case T_BYTE: shift = 0;
8723 break;
8724 case T_SHORT: shift = 1;
8725 break;
8726 case T_INT: shift = 2;
8727 break;
8728 /* Uncomment when LONG fill stubs are supported.
8729 case T_LONG: shift = 3;
8730 break;
8731 */
8732 default:
8733 fatal("Unhandled type: %s\n", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8733, "Unhandled type: %s\n", type2name(type)); ::breakpoint
(); } while (0)
;
8734 }
8735
8736 if ((avx3threshold != 0) || (MaxVectorSize == 32)) {
8737
8738 if (MaxVectorSize == 64) {
8739 cmpq(count, avx3threshold >> shift);
8740 jcc(Assembler::greater, L_fill_zmm_sequence);
8741 }
8742
8743 evpbroadcast(type, xtmp, value, Assembler::AVX_256bit);
8744
8745 bind(L_fill_start);
8746
8747 cmpq(count, 32 >> shift);
8748 jccb(Assembler::greater, L_fill_64_bytes)jccb_0(Assembler::greater, L_fill_64_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8748)
;
8749 fill32_masked(shift, to, 0, xtmp, k2, count, rtmp);
8750 jmp(L_exit);
8751
8752 bind(L_fill_64_bytes);
8753 cmpq(count, 64 >> shift);
8754 jccb(Assembler::greater, L_fill_96_bytes)jccb_0(Assembler::greater, L_fill_96_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8754)
;
8755 fill64_masked(shift, to, 0, xtmp, k2, count, rtmp);
8756 jmp(L_exit);
8757
8758 bind(L_fill_96_bytes);
8759 cmpq(count, 96 >> shift);
8760 jccb(Assembler::greater, L_fill_128_bytes)jccb_0(Assembler::greater, L_fill_128_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8760)
;
8761 fill64(to, 0, xtmp);
8762 subq(count, 64 >> shift);
8763 fill32_masked(shift, to, 64, xtmp, k2, count, rtmp);
8764 jmp(L_exit);
8765
8766 bind(L_fill_128_bytes);
8767 cmpq(count, 128 >> shift);
8768 jccb(Assembler::greater, L_fill_128_bytes_loop_pre_header)jccb_0(Assembler::greater, L_fill_128_bytes_loop_pre_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8768)
;
8769 fill64(to, 0, xtmp);
8770 fill32(to, 64, xtmp);
8771 subq(count, 96 >> shift);
8772 fill32_masked(shift, to, 96, xtmp, k2, count, rtmp);
8773 jmp(L_exit);
8774
8775 bind(L_fill_128_bytes_loop_pre_header);
8776 {
8777 mov(rtmp, to);
8778 andq(rtmp, 31);
8779 jccb(Assembler::zero, L_fill_128_bytes_loop_header)jccb_0(Assembler::zero, L_fill_128_bytes_loop_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8779)
;
8780 negq(rtmp);
8781 addq(rtmp, 32);
8782 mov64(r8, -1L);
8783 bzhiq(r8, r8, rtmp);
8784 kmovql(k2, r8);
8785 evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit);
8786 addq(to, rtmp);
8787 shrq(rtmp, shift);
8788 subq(count, rtmp);
8789 }
8790
8791 cmpq(count, 128 >> shift);
8792 jcc(Assembler::less, L_fill_start);
8793
8794 bind(L_fill_128_bytes_loop_header);
8795 subq(count, 128 >> shift);
8796
8797 align32();
8798 bind(L_fill_128_bytes_loop);
8799 fill64(to, 0, xtmp);
8800 fill64(to, 64, xtmp);
8801 addq(to, 128);
8802 subq(count, 128 >> shift);
8803 jccb(Assembler::greaterEqual, L_fill_128_bytes_loop)jccb_0(Assembler::greaterEqual, L_fill_128_bytes_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8803)
;
8804
8805 addq(count, 128 >> shift);
8806 jcc(Assembler::zero, L_exit);
8807 jmp(L_fill_start);
8808 }
8809
8810 if (MaxVectorSize == 64) {
8811 // Sequence using 64 byte ZMM register.
8812 Label L_fill_128_bytes_zmm;
8813 Label L_fill_192_bytes_zmm;
8814 Label L_fill_192_bytes_loop_zmm;
8815 Label L_fill_192_bytes_loop_header_zmm;
8816 Label L_fill_192_bytes_loop_pre_header_zmm;
8817 Label L_fill_start_zmm_sequence;
8818
8819 bind(L_fill_zmm_sequence);
8820 evpbroadcast(type, xtmp, value, Assembler::AVX_512bit);
8821
8822 bind(L_fill_start_zmm_sequence);
8823 cmpq(count, 64 >> shift);
8824 jccb(Assembler::greater, L_fill_128_bytes_zmm)jccb_0(Assembler::greater, L_fill_128_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8824)
;
8825 fill64_masked(shift, to, 0, xtmp, k2, count, rtmp, true);
8826 jmp(L_exit);
8827
8828 bind(L_fill_128_bytes_zmm);
8829 cmpq(count, 128 >> shift);
8830 jccb(Assembler::greater, L_fill_192_bytes_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8830)
;
8831 fill64(to, 0, xtmp, true);
8832 subq(count, 64 >> shift);
8833 fill64_masked(shift, to, 64, xtmp, k2, count, rtmp, true);
8834 jmp(L_exit);
8835
8836 bind(L_fill_192_bytes_zmm);
8837 cmpq(count, 192 >> shift);
8838 jccb(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8838)
;
8839 fill64(to, 0, xtmp, true);
8840 fill64(to, 64, xtmp, true);
8841 subq(count, 128 >> shift);
8842 fill64_masked(shift, to, 128, xtmp, k2, count, rtmp, true);
8843 jmp(L_exit);
8844
8845 bind(L_fill_192_bytes_loop_pre_header_zmm);
8846 {
8847 movq(rtmp, to);
8848 andq(rtmp, 63);
8849 jccb(Assembler::zero, L_fill_192_bytes_loop_header_zmm)jccb_0(Assembler::zero, L_fill_192_bytes_loop_header_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8849)
;
8850 negq(rtmp);
8851 addq(rtmp, 64);
8852 mov64(r8, -1L);
8853 bzhiq(r8, r8, rtmp);
8854 kmovql(k2, r8);
8855 evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit);
8856 addq(to, rtmp);
8857 shrq(rtmp, shift);
8858 subq(count, rtmp);
8859 }
8860
8861 cmpq(count, 192 >> shift);
8862 jcc(Assembler::less, L_fill_start_zmm_sequence);
8863
8864 bind(L_fill_192_bytes_loop_header_zmm);
8865 subq(count, 192 >> shift);
8866
8867 align32();
8868 bind(L_fill_192_bytes_loop_zmm);
8869 fill64(to, 0, xtmp, true);
8870 fill64(to, 64, xtmp, true);
8871 fill64(to, 128, xtmp, true);
8872 addq(to, 192);
8873 subq(count, 192 >> shift);
8874 jccb(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm)jccb_0(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8874)
;
8875
8876 addq(count, 192 >> shift);
8877 jcc(Assembler::zero, L_exit);
8878 jmp(L_fill_start_zmm_sequence);
8879 }
8880 bind(L_exit);
8881}
8882#endif
8883#endif //COMPILER2_OR_JVMCI
8884
8885
8886#ifdef _LP641
8887void MacroAssembler::convert_f2i(Register dst, XMMRegister src) {
8888 Label done;
8889 cvttss2sil(dst, src);
8890 // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
8891 cmpl(dst, 0x80000000); // float_sign_flip
8892 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8892)
;
8893 subptr(rsp, 8);
8894 movflt(Address(rsp, 0), src);
8895 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())((address)((address_word)(StubRoutines::x86::f2i_fixup())))));
8896 pop(dst);
8897 bind(done);
8898}
8899
8900void MacroAssembler::convert_d2i(Register dst, XMMRegister src) {
8901 Label done;
8902 cvttsd2sil(dst, src);
8903 // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
8904 cmpl(dst, 0x80000000); // float_sign_flip
8905 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8905)
;
8906 subptr(rsp, 8);
8907 movdbl(Address(rsp, 0), src);
8908 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())((address)((address_word)(StubRoutines::x86::d2i_fixup())))));
8909 pop(dst);
8910 bind(done);
8911}
8912
8913void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
8914 Label done;
8915 cvttss2siq(dst, src);
8916 cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
8917 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8917)
;
8918 subptr(rsp, 8);
8919 movflt(Address(rsp, 0), src);
8920 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())((address)((address_word)(StubRoutines::x86::f2l_fixup())))));
8921 pop(dst);
8922 bind(done);
8923}
8924
8925void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
8926 Label done;
8927 cvttsd2siq(dst, src);
8928 cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
8929 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8929)
;
8930 subptr(rsp, 8);
8931 movdbl(Address(rsp, 0), src);
8932 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())((address)((address_word)(StubRoutines::x86::d2l_fixup())))));
8933 pop(dst);
8934 bind(done);
8935}
8936
8937void MacroAssembler::cache_wb(Address line)
8938{
8939 // 64 bit cpus always support clflush
8940 assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8940, "assert(" "VM_Version::supports_clflush()" ") failed"
, "clflush should be available"); ::breakpoint(); } } while (
0)
;
8941 bool optimized = VM_Version::supports_clflushopt();
8942 bool no_evict = VM_Version::supports_clwb();
8943
8944 // prefer clwb (writeback without evict) otherwise
8945 // prefer clflushopt (potentially parallel writeback with evict)
8946 // otherwise fallback on clflush (serial writeback with evict)
8947
8948 if (optimized) {
8949 if (no_evict) {
8950 clwb(line);
8951 } else {
8952 clflushopt(line);
8953 }
8954 } else {
8955 // no need for fence when using CLFLUSH
8956 clflush(line);
8957 }
8958}
8959
8960void MacroAssembler::cache_wbsync(bool is_pre)
8961{
8962 assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8962, "assert(" "VM_Version::supports_clflush()" ") failed"
, "clflush should be available"); ::breakpoint(); } } while (
0)
;
8963 bool optimized = VM_Version::supports_clflushopt();
8964 bool no_evict = VM_Version::supports_clwb();
8965
8966 // pick the correct implementation
8967
8968 if (!is_pre && (optimized || no_evict)) {
8969 // need an sfence for post flush when using clflushopt or clwb
8970 // otherwise no no need for any synchroniaztion
8971
8972 sfence();
8973 }
8974}
8975
8976#endif // _LP64
8977
8978Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8979 switch (cond) {
8980 // Note some conditions are synonyms for others
8981 case Assembler::zero: return Assembler::notZero;
8982 case Assembler::notZero: return Assembler::zero;
8983 case Assembler::less: return Assembler::greaterEqual;
8984 case Assembler::lessEqual: return Assembler::greater;
8985 case Assembler::greater: return Assembler::lessEqual;
8986 case Assembler::greaterEqual: return Assembler::less;
8987 case Assembler::below: return Assembler::aboveEqual;
8988 case Assembler::belowEqual: return Assembler::above;
8989 case Assembler::above: return Assembler::belowEqual;
8990 case Assembler::aboveEqual: return Assembler::below;
8991 case Assembler::overflow: return Assembler::noOverflow;
8992 case Assembler::noOverflow: return Assembler::overflow;
8993 case Assembler::negative: return Assembler::positive;
8994 case Assembler::positive: return Assembler::negative;
8995 case Assembler::parity: return Assembler::noParity;
8996 case Assembler::noParity: return Assembler::parity;
8997 }
8998 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8998); ::breakpoint(); } while (0)
; return Assembler::overflow;
8999}
9000
9001SkipIfEqual::SkipIfEqual(
9002 MacroAssembler* masm, const bool* flag_addr, bool value) {
9003 _masm = masm;
9004 _masm->cmp8(ExternalAddress((address)flag_addr), value);
9005 _masm->jcc(Assembler::equal, _label);
9006}
9007
9008SkipIfEqual::~SkipIfEqual() {
9009 _masm->bind(_label);
9010}
9011
9012// 32-bit Windows has its own fast-path implementation
9013// of get_thread
9014#if !defined(WIN32) || defined(_LP641)
9015
9016// This is simply a call to Thread::current()
9017void MacroAssembler::get_thread(Register thread) {
9018 if (thread != rax) {
9019 push(rax);
9020 }
9021 LP64_ONLY(push(rdi);)push(rdi);
9022 LP64_ONLY(push(rsi);)push(rsi);
9023 push(rdx);
9024 push(rcx);
9025#ifdef _LP641
9026 push(r8);
9027 push(r9);
9028 push(r10);
9029 push(r11);
9030#endif
9031
9032 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current)((address)((address_word)(Thread::current))), 0);
9033
9034#ifdef _LP641
9035 pop(r11);
9036 pop(r10);
9037 pop(r9);
9038 pop(r8);
9039#endif
9040 pop(rcx);
9041 pop(rdx);
9042 LP64_ONLY(pop(rsi);)pop(rsi);
9043 LP64_ONLY(pop(rdi);)pop(rdi);
9044 if (thread != rax) {
9045 mov(thread, rax);
9046 pop(rax);
9047 }
9048}
9049
9050
9051#endif // !WIN32 || _LP64

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_MACROASSEMBLER_X86_HPP
26#define CPU_X86_MACROASSEMBLER_X86_HPP
27
28#include "asm/assembler.hpp"
29#include "code/vmreg.inline.hpp"
30#include "compiler/oopMap.hpp"
31#include "utilities/macros.hpp"
32#include "runtime/rtmLocking.hpp"
33#include "runtime/vm_version.hpp"
34
35// MacroAssembler extends Assembler by frequently used macros.
36//
37// Instructions for which a 'better' code sequence exists depending
38// on arguments should also go in here.
39
40class MacroAssembler: public Assembler {
41 friend class LIR_Assembler;
42 friend class Runtime1; // as_Address()
43
44 public:
45 // Support for VM calls
46 //
47 // This is the base routine called by the different versions of call_VM_leaf. The interpreter
48 // may customize this version by overriding it for its purposes (e.g., to save/restore
49 // additional registers when doing a VM call).
50
51 virtual void call_VM_leaf_base(
52 address entry_point, // the entry point
53 int number_of_arguments // the number of arguments to pop after the call
54 );
55
56 protected:
57 // This is the base routine called by the different versions of call_VM. The interpreter
58 // may customize this version by overriding it for its purposes (e.g., to save/restore
59 // additional registers when doing a VM call).
60 //
61 // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base
62 // returns the register which contains the thread upon return. If a thread register has been
63 // specified, the return value will correspond to that register. If no last_java_sp is specified
64 // (noreg) than rsp will be used instead.
65 virtual void call_VM_base( // returns the register containing the thread upon return
66 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
67 Register java_thread, // the thread if computed before ; use noreg otherwise
68 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
69 address entry_point, // the entry point
70 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
71 bool check_exceptions // whether to check for pending exceptions after return
72 );
73
74 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
75
76 // helpers for FPU flag access
77 // tmp is a temporary register, if none is available use noreg
78 void save_rax (Register tmp);
79 void restore_rax(Register tmp);
80
81 public:
82 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
83
84 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
85 // The implementation is only non-empty for the InterpreterMacroAssembler,
86 // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
87 virtual void check_and_handle_popframe(Register java_thread);
88 virtual void check_and_handle_earlyret(Register java_thread);
89
90 Address as_Address(AddressLiteral adr);
91 Address as_Address(ArrayAddress adr);
92
93 // Support for NULL-checks
94 //
95 // Generates code that causes a NULL OS exception if the content of reg is NULL.
96 // If the accessed location is M[reg + offset] and the offset is known, provide the
97 // offset. No explicit code generation is needed if the offset is within a certain
98 // range (0 <= offset <= page_size).
99
100 void null_check(Register reg, int offset = -1);
101 static bool needs_explicit_null_check(intptr_t offset);
102 static bool uses_implicit_null_check(void* address);
103
104 // Required platform-specific helpers for Label::patch_instructions.
105 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
106 void pd_patch_instruction(address branch, address target, const char* file, int line) {
107 unsigned char op = branch[0];
108 assert(op == 0xE8 /* call */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
109 op == 0xE9 /* jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
110 op == 0xEB /* short jmp */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
111 (op & 0xF0) == 0x70 /* short jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
112 op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */ ||do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
113 op == 0xC7 && branch[1] == 0xF8 /* xbegin */,do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
114 "Invalid opcode at patch point")do { if (!(op == 0xE8 || op == 0xE9 || op == 0xEB || (op &
0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0
) == 0x80 || op == 0xC7 && branch[1] == 0xF8)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 114, "assert(" "op == 0xE8 || op == 0xE9 || op == 0xEB || (op & 0xF0) == 0x70 || op == 0x0F && (branch[1] & 0xF0) == 0x80 || op == 0xC7 && branch[1] == 0xF8"
") failed", "Invalid opcode at patch point"); ::breakpoint()
; } } while (0)
;
115
116 if (op == 0xEB || (op & 0xF0) == 0x70) {
117 // short offset operators (jmp and jcc)
118 char* disp = (char*) &branch[1];
119 int imm8 = target - (address) &disp[1];
120 guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset at %s:%d",do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
121 file == NULL ? "<NULL>" : file, line)do { if (!(this->is8bit(imm8))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 121, "guarantee(" "this->is8bit(imm8)" ") failed", "Short forward jump exceeds 8-bit offset at %s:%d"
, file == __null ? "<NULL>" : file, line); ::breakpoint
(); } } while (0)
;
122 *disp = imm8;
123 } else {
124 int* disp = (int*) &branch[(op == 0x0F || op == 0xC7)? 2: 1];
125 int imm32 = target - (address) &disp[1];
126 *disp = imm32;
127 }
128 }
129
130 // The following 4 methods return the offset of the appropriate move instruction
131
132 // Support for fast byte/short loading with zero extension (depending on particular CPU)
133 int load_unsigned_byte(Register dst, Address src);
134 int load_unsigned_short(Register dst, Address src);
135
136 // Support for fast byte/short loading with sign extension (depending on particular CPU)
137 int load_signed_byte(Register dst, Address src);
138 int load_signed_short(Register dst, Address src);
139
140 // Support for sign-extension (hi:lo = extend_sign(lo))
141 void extend_sign(Register hi, Register lo);
142
143 // Load and store values by size and signed-ness
144 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
145 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
146
147 // Support for inc/dec with optimal instruction selection depending on value
148
149 void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value))incrementq(reg, value) NOT_LP64(incrementl(reg, value)) ; }
150 void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value))decrementq(reg, value) NOT_LP64(decrementl(reg, value)) ; }
151
152 void decrementl(Address dst, int value = 1);
153 void decrementl(Register reg, int value = 1);
154
155 void decrementq(Register reg, int value = 1);
156 void decrementq(Address dst, int value = 1);
157
158 void incrementl(Address dst, int value = 1);
159 void incrementl(Register reg, int value = 1);
160
161 void incrementq(Register reg, int value = 1);
162 void incrementq(Address dst, int value = 1);
163
164 // Support optimal SSE move instructions.
165 void movflt(XMMRegister dst, XMMRegister src) {
166 if (dst-> encoding() == src->encoding()) return;
167 if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; }
168 else { movss (dst, src); return; }
169 }
170 void movflt(XMMRegister dst, Address src) { movss(dst, src); }
171 void movflt(XMMRegister dst, AddressLiteral src);
172 void movflt(Address dst, XMMRegister src) { movss(dst, src); }
173
174 // Move with zero extension
175 void movfltz(XMMRegister dst, XMMRegister src) { movss(dst, src); }
176
177 void movdbl(XMMRegister dst, XMMRegister src) {
178 if (dst-> encoding() == src->encoding()) return;
179 if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; }
180 else { movsd (dst, src); return; }
181 }
182
183 void movdbl(XMMRegister dst, AddressLiteral src);
184
185 void movdbl(XMMRegister dst, Address src) {
186 if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; }
187 else { movlpd(dst, src); return; }
188 }
189 void movdbl(Address dst, XMMRegister src) { movsd(dst, src); }
190
191 void incrementl(AddressLiteral dst);
192 void incrementl(ArrayAddress dst);
193
194 void incrementq(AddressLiteral dst);
195
196 // Alignment
197 void align32();
198 void align64();
199 void align(int modulus);
200 void align(int modulus, int target);
201
202 // A 5 byte nop that is safe for patching (see patch_verified_entry)
203 void fat_nop();
204
205 // Stack frame creation/removal
206 void enter();
207 void leave();
208
209 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
210 // The pointer will be loaded into the thread register.
211 void get_thread(Register thread);
212
213#ifdef _LP641
214 // Support for argument shuffling
215
216 void move32_64(VMRegPair src, VMRegPair dst);
217 void long_move(VMRegPair src, VMRegPair dst);
218 void float_move(VMRegPair src, VMRegPair dst);
219 void double_move(VMRegPair src, VMRegPair dst);
220 void move_ptr(VMRegPair src, VMRegPair dst);
221 void object_move(OopMap* map,
222 int oop_handle_offset,
223 int framesize_in_slots,
224 VMRegPair src,
225 VMRegPair dst,
226 bool is_receiver,
227 int* receiver_offset);
228#endif // _LP64
229
230 // Support for VM calls
231 //
232 // It is imperative that all calls into the VM are handled via the call_VM macros.
233 // They make sure that the stack linkage is setup correctly. call_VM's correspond
234 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
235
236
237 void call_VM(Register oop_result,
238 address entry_point,
239 bool check_exceptions = true);
240 void call_VM(Register oop_result,
241 address entry_point,
242 Register arg_1,
243 bool check_exceptions = true);
244 void call_VM(Register oop_result,
245 address entry_point,
246 Register arg_1, Register arg_2,
247 bool check_exceptions = true);
248 void call_VM(Register oop_result,
249 address entry_point,
250 Register arg_1, Register arg_2, Register arg_3,
251 bool check_exceptions = true);
252
253 // Overloadings with last_Java_sp
254 void call_VM(Register oop_result,
255 Register last_java_sp,
256 address entry_point,
257 int number_of_arguments = 0,
258 bool check_exceptions = true);
259 void call_VM(Register oop_result,
260 Register last_java_sp,
261 address entry_point,
262 Register arg_1, bool
263 check_exceptions = true);
264 void call_VM(Register oop_result,
265 Register last_java_sp,
266 address entry_point,
267 Register arg_1, Register arg_2,
268 bool check_exceptions = true);
269 void call_VM(Register oop_result,
270 Register last_java_sp,
271 address entry_point,
272 Register arg_1, Register arg_2, Register arg_3,
273 bool check_exceptions = true);
274
275 void get_vm_result (Register oop_result, Register thread);
276 void get_vm_result_2(Register metadata_result, Register thread);
277
278 // These always tightly bind to MacroAssembler::call_VM_base
279 // bypassing the virtual implementation
280 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
281 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
282 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
283 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
284 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
285
286 void call_VM_leaf0(address entry_point);
287 void call_VM_leaf(address entry_point,
288 int number_of_arguments = 0);
289 void call_VM_leaf(address entry_point,
290 Register arg_1);
291 void call_VM_leaf(address entry_point,
292 Register arg_1, Register arg_2);
293 void call_VM_leaf(address entry_point,
294 Register arg_1, Register arg_2, Register arg_3);
295
296 // These always tightly bind to MacroAssembler::call_VM_leaf_base
297 // bypassing the virtual implementation
298 void super_call_VM_leaf(address entry_point);
299 void super_call_VM_leaf(address entry_point, Register arg_1);
300 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
301 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
302 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
303
304 // last Java Frame (fills frame anchor)
305 void set_last_Java_frame(Register thread,
306 Register last_java_sp,
307 Register last_java_fp,
308 address last_java_pc);
309
310 // thread in the default location (r15_thread on 64bit)
311 void set_last_Java_frame(Register last_java_sp,
312 Register last_java_fp,
313 address last_java_pc);
314
315 void reset_last_Java_frame(Register thread, bool clear_fp);
316
317 // thread in the default location (r15_thread on 64bit)
318 void reset_last_Java_frame(bool clear_fp);
319
320 // jobjects
321 void clear_jweak_tag(Register possibly_jweak);
322 void resolve_jobject(Register value, Register thread, Register tmp);
323
324 // C 'boolean' to Java boolean: x == 0 ? 0 : 1
325 void c2bool(Register x);
326
327 // C++ bool manipulation
328
329 void movbool(Register dst, Address src);
330 void movbool(Address dst, bool boolconst);
331 void movbool(Address dst, Register src);
332 void testbool(Register dst);
333
334 void resolve_oop_handle(Register result, Register tmp = rscratch2);
335 void resolve_weak_handle(Register result, Register tmp);
336 void load_mirror(Register mirror, Register method, Register tmp = rscratch2);
337 void load_method_holder_cld(Register rresult, Register rmethod);
338
339 void load_method_holder(Register holder, Register method);
340
341 // oop manipulations
342 void load_klass(Register dst, Register src, Register tmp);
343 void store_klass(Register dst, Register src, Register tmp);
344
345 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
346 Register tmp1, Register thread_tmp);
347 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
348 Register tmp1, Register tmp2);
349
350 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
351 Register thread_tmp = noreg, DecoratorSet decorators = 0);
352 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
353 Register thread_tmp = noreg, DecoratorSet decorators = 0);
354 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
355 Register tmp2 = noreg, DecoratorSet decorators = 0);
356
357 // Used for storing NULL. All other oop constants should be
358 // stored using routines that take a jobject.
359 void store_heap_oop_null(Address dst);
360
361#ifdef _LP641
362 void store_klass_gap(Register dst, Register src);
363
364 // This dummy is to prevent a call to store_heap_oop from
365 // converting a zero (like NULL) into a Register by giving
366 // the compiler two choices it can't resolve
367
368 void store_heap_oop(Address dst, void* dummy);
369
370 void encode_heap_oop(Register r);
371 void decode_heap_oop(Register r);
372 void encode_heap_oop_not_null(Register r);
373 void decode_heap_oop_not_null(Register r);
374 void encode_heap_oop_not_null(Register dst, Register src);
375 void decode_heap_oop_not_null(Register dst, Register src);
376
377 void set_narrow_oop(Register dst, jobject obj);
378 void set_narrow_oop(Address dst, jobject obj);
379 void cmp_narrow_oop(Register dst, jobject obj);
380 void cmp_narrow_oop(Address dst, jobject obj);
381
382 void encode_klass_not_null(Register r, Register tmp);
383 void decode_klass_not_null(Register r, Register tmp);
384 void encode_and_move_klass_not_null(Register dst, Register src);
385 void decode_and_move_klass_not_null(Register dst, Register src);
386 void set_narrow_klass(Register dst, Klass* k);
387 void set_narrow_klass(Address dst, Klass* k);
388 void cmp_narrow_klass(Register dst, Klass* k);
389 void cmp_narrow_klass(Address dst, Klass* k);
390
391 // if heap base register is used - reinit it with the correct value
392 void reinit_heapbase();
393
394 DEBUG_ONLY(void verify_heapbase(const char* msg);)void verify_heapbase(const char* msg);
395
396#endif // _LP64
397
398 // Int division/remainder for Java
399 // (as idivl, but checks for special case as described in JVM spec.)
400 // returns idivl instruction offset for implicit exception handling
401 int corrected_idivl(Register reg);
402
403 // Long division/remainder for Java
404 // (as idivq, but checks for special case as described in JVM spec.)
405 // returns idivq instruction offset for implicit exception handling
406 int corrected_idivq(Register reg);
407
408 void int3();
409
410 // Long operation macros for a 32bit cpu
411 // Long negation for Java
412 void lneg(Register hi, Register lo);
413
414 // Long multiplication for Java
415 // (destroys contents of eax, ebx, ecx and edx)
416 void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y
417
418 // Long shifts for Java
419 // (semantics as described in JVM spec.)
420 void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f)
421 void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f)
422
423 // Long compare for Java
424 // (semantics as described in JVM spec.)
425 void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y)
426
427
428 // misc
429
430 // Sign extension
431 void sign_extend_short(Register reg);
432 void sign_extend_byte(Register reg);
433
434 // Division by power of 2, rounding towards 0
435 void division_with_shift(Register reg, int shift_value);
436
437#ifndef _LP641
438 // Compares the top-most stack entries on the FPU stack and sets the eflags as follows:
439 //
440 // CF (corresponds to C0) if x < y
441 // PF (corresponds to C2) if unordered
442 // ZF (corresponds to C3) if x = y
443 //
444 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
445 // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code)
446 void fcmp(Register tmp);
447 // Variant of the above which allows y to be further down the stack
448 // and which only pops x and y if specified. If pop_right is
449 // specified then pop_left must also be specified.
450 void fcmp(Register tmp, int index, bool pop_left, bool pop_right);
451
452 // Floating-point comparison for Java
453 // Compares the top-most stack entries on the FPU stack and stores the result in dst.
454 // The arguments are in reversed order on the stack (i.e., top of stack is first argument).
455 // (semantics as described in JVM spec.)
456 void fcmp2int(Register dst, bool unordered_is_less);
457 // Variant of the above which allows y to be further down the stack
458 // and which only pops x and y if specified. If pop_right is
459 // specified then pop_left must also be specified.
460 void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right);
461
462 // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards)
463 // tmp is a temporary register, if none is available use noreg
464 void fremr(Register tmp);
465
466 // only if +VerifyFPU
467 void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
468#endif // !LP64
469
470 // dst = c = a * b + c
471 void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
472 void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c);
473
474 void vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
475 void vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len);
476 void vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
477 void vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len);
478
479
480 // same as fcmp2int, but using SSE2
481 void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
482 void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less);
483
484 // branch to L if FPU flag C2 is set/not set
485 // tmp is a temporary register, if none is available use noreg
486 void jC2 (Register tmp, Label& L);
487 void jnC2(Register tmp, Label& L);
488
489 // Load float value from 'address'. If UseSSE >= 1, the value is loaded into
490 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
491 void load_float(Address src);
492
493 // Store float value to 'address'. If UseSSE >= 1, the value is stored
494 // from register xmm0. Otherwise, the value is stored from the FPU stack.
495 void store_float(Address dst);
496
497 // Load double value from 'address'. If UseSSE >= 2, the value is loaded into
498 // register xmm0. Otherwise, the value is loaded onto the FPU stack.
499 void load_double(Address src);
500
501 // Store double value to 'address'. If UseSSE >= 2, the value is stored
502 // from register xmm0. Otherwise, the value is stored from the FPU stack.
503 void store_double(Address dst);
504
505#ifndef _LP641
506 // Pop ST (ffree & fincstp combined)
507 void fpop();
508
509 void empty_FPU_stack();
510#endif // !_LP64
511
512 void push_IU_state();
513 void pop_IU_state();
514
515 void push_FPU_state();
516 void pop_FPU_state();
517
518 void push_CPU_state();
519 void pop_CPU_state();
520
521 // Round up to a power of two
522 void round_to(Register reg, int modulus);
523
524 // Callee saved registers handling
525 void push_callee_saved_registers();
526 void pop_callee_saved_registers();
527
528 // allocation
529 void eden_allocate(
530 Register thread, // Current thread
531 Register obj, // result: pointer to object after successful allocation
532 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
533 int con_size_in_bytes, // object size in bytes if known at compile time
534 Register t1, // temp register
535 Label& slow_case // continuation point if fast allocation fails
536 );
537 void tlab_allocate(
538 Register thread, // Current thread
539 Register obj, // result: pointer to object after successful allocation
540 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
541 int con_size_in_bytes, // object size in bytes if known at compile time
542 Register t1, // temp register
543 Register t2, // temp register
544 Label& slow_case // continuation point if fast allocation fails
545 );
546 void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp);
547
548 // interface method calling
549 void lookup_interface_method(Register recv_klass,
550 Register intf_klass,
551 RegisterOrConstant itable_index,
552 Register method_result,
553 Register scan_temp,
554 Label& no_such_interface,
555 bool return_method = true);
556
557 // virtual method calling
558 void lookup_virtual_method(Register recv_klass,
559 RegisterOrConstant vtable_index,
560 Register method_result);
561
562 // Test sub_klass against super_klass, with fast and slow paths.
563
564 // The fast path produces a tri-state answer: yes / no / maybe-slow.
565 // One of the three labels can be NULL, meaning take the fall-through.
566 // If super_check_offset is -1, the value is loaded up from super_klass.
567 // No registers are killed, except temp_reg.
568 void check_klass_subtype_fast_path(Register sub_klass,
569 Register super_klass,
570 Register temp_reg,
571 Label* L_success,
572 Label* L_failure,
573 Label* L_slow_path,
574 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
575
576 // The rest of the type check; must be wired to a corresponding fast path.
577 // It does not repeat the fast path logic, so don't use it standalone.
578 // The temp_reg and temp2_reg can be noreg, if no temps are available.
579 // Updates the sub's secondary super cache as necessary.
580 // If set_cond_codes, condition codes will be Z on success, NZ on failure.
581 void check_klass_subtype_slow_path(Register sub_klass,
582 Register super_klass,
583 Register temp_reg,
584 Register temp2_reg,
585 Label* L_success,
586 Label* L_failure,
587 bool set_cond_codes = false);
588
589 // Simplified, combined version, good for typical uses.
590 // Falls through on failure.
591 void check_klass_subtype(Register sub_klass,
592 Register super_klass,
593 Register temp_reg,
594 Label& L_success);
595
596 void clinit_barrier(Register klass,
597 Register thread,
598 Label* L_fast_path = NULL__null,
599 Label* L_slow_path = NULL__null);
600
601 // method handles (JSR 292)
602 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
603
604 // Debugging
605
606 // only if +VerifyOops
607 void _verify_oop(Register reg, const char* s, const char* file, int line);
608 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
609
610 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
611 if (VerifyOops) {
612 _verify_oop(reg, s, file, line);
613 }
614 }
615 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
616 if (VerifyOops) {
617 _verify_oop_addr(reg, s, file, line);
618 }
619 }
620
621 // TODO: verify method and klass metadata (compare against vptr?)
622 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
623 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
624
625#define verify_oop(reg)_verify_oop_checked(reg, "broken oop " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 625)
_verify_oop_checked(reg, "broken oop " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__625)
626#define verify_oop_msg(reg, msg)_verify_oop_checked(reg, "broken oop " "reg" ", " "msg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 626)
_verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__626)
627#define verify_oop_addr(addr)_verify_oop_addr_checked(addr, "broken oop addr " "addr", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 627)
_verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__627)
628#define verify_method_ptr(reg)_verify_method_ptr(reg, "broken method " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 628)
_verify_method_ptr(reg, "broken method " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__628)
629#define verify_klass_ptr(reg)_verify_klass_ptr(reg, "broken klass " "reg", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 629)
_verify_klass_ptr(reg, "broken klass " #reg, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp", __LINE__629)
630
631 // Verify or restore cpu control state after JNI call
632 void restore_cpu_control_state_after_jni();
633
634 // prints msg, dumps registers and stops execution
635 void stop(const char* msg);
636
637 // prints msg and continues
638 void warn(const char* msg);
639
640 // dumps registers and other state
641 void print_state();
642
643 static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
644 static void debug64(char* msg, int64_t pc, int64_t regs[]);
645 static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
646 static void print_state64(int64_t pc, int64_t regs[]);
647
648 void os_breakpoint();
649
650 void untested() { stop("untested"); }
651
652 void unimplemented(const char* what = "");
653
654 void should_not_reach_here() { stop("should not reach here"); }
655
656 void print_CPU_state();
657
658 // Stack overflow checking
659 void bang_stack_with_offset(int offset) {
660 // stack grows down, caller passes positive offset
661 assert(offset > 0, "must bang with negative offset")do { if (!(offset > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.hpp"
, 661, "assert(" "offset > 0" ") failed", "must bang with negative offset"
); ::breakpoint(); } } while (0)
;
662 movl(Address(rsp, (-offset)), rax);
663 }
664
665 // Writes to stack successive pages until offset reached to check for
666 // stack overflow + shadow pages. Also, clobbers tmp
667 void bang_stack_size(Register size, Register tmp);
668
669 // Check for reserved stack access in method being exited (for JIT)
670 void reserved_stack_check();
671
672 void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
673
674 void verify_tlab();
675
676 Condition negate_condition(Condition cond);
677
678 // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit
679 // operands. In general the names are modified to avoid hiding the instruction in Assembler
680 // so that we don't need to implement all the varieties in the Assembler with trivial wrappers
681 // here in MacroAssembler. The major exception to this rule is call
682
683 // Arithmetics
684
685
686 void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)) ; }
687 void addptr(Address dst, Register src);
688
689 void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); }
690 void addptr(Register dst, int32_t src);
691 void addptr(Register dst, Register src);
692 void addptr(Register dst, RegisterOrConstant src) {
693 if (src.is_constant()) addptr(dst, (int) src.as_constant());
694 else addptr(dst, src.as_register());
695 }
696
697 void andptr(Register dst, int32_t src);
698 void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2))andq(src1, src2) NOT_LP64(andl(src1, src2)) ; }
699
700 void cmp8(AddressLiteral src1, int imm);
701
702 // renamed to drag out the casting of address to int32_t/intptr_t
703 void cmp32(Register src1, int32_t imm);
704
705 void cmp32(AddressLiteral src1, int32_t imm);
706 // compare reg - mem, or reg - &mem
707 void cmp32(Register src1, AddressLiteral src2);
708
709 void cmp32(Register src1, Address src2);
710
711#ifndef _LP641
712 void cmpklass(Address dst, Metadata* obj);
713 void cmpklass(Register dst, Metadata* obj);
714 void cmpoop(Address dst, jobject obj);
715#endif // _LP64
716
717 void cmpoop(Register src1, Register src2);
718 void cmpoop(Register src1, Address src2);
719 void cmpoop(Register dst, jobject obj);
720
721 // NOTE src2 must be the lval. This is NOT an mem-mem compare
722 void cmpptr(Address src1, AddressLiteral src2);
723
724 void cmpptr(Register src1, AddressLiteral src2);
725
726 void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
727 void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
728 // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; }
729
730 void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
731 void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2))cmpq(src1, src2) NOT_LP64(cmpl(src1, src2)) ; }
732
733 // cmp64 to avoild hiding cmpq
734 void cmp64(Register src1, AddressLiteral src);
735
736 void cmpxchgptr(Register reg, Address adr);
737
738 void locked_cmpxchgptr(Register reg, AddressLiteral adr);
739
740
741 void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src))imulq(dst, src) NOT_LP64(imull(dst, src)); }
742 void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32))imulq(dst, src, imm32) NOT_LP64(imull(dst, src, imm32)); }
743
744
745 void negptr(Register dst) { LP64_ONLY(negq(dst))negq(dst) NOT_LP64(negl(dst)); }
746
747 void notptr(Register dst) { LP64_ONLY(notq(dst))notq(dst) NOT_LP64(notl(dst)); }
748
749 void shlptr(Register dst, int32_t shift);
750 void shlptr(Register dst) { LP64_ONLY(shlq(dst))shlq(dst) NOT_LP64(shll(dst)); }
751
752 void shrptr(Register dst, int32_t shift);
753 void shrptr(Register dst) { LP64_ONLY(shrq(dst))shrq(dst) NOT_LP64(shrl(dst)); }
754
755 void sarptr(Register dst) { LP64_ONLY(sarq(dst))sarq(dst) NOT_LP64(sarl(dst)); }
756 void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src))sarq(dst, src) NOT_LP64(sarl(dst, src)); }
757
758 void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
759
760 void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); }
761 void subptr(Register dst, int32_t src);
762 // Force generation of a 4 byte immediate value even if it fits into 8bit
763 void subptr_imm32(Register dst, int32_t src);
764 void subptr(Register dst, Register src);
765 void subptr(Register dst, RegisterOrConstant src) {
766 if (src.is_constant()) subptr(dst, (int) src.as_constant());
767 else subptr(dst, src.as_register());
768 }
769
770 void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
771 void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src))sbbq(dst, src) NOT_LP64(sbbl(dst, src)); }
772
773 void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
774 void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2))xchgq(src1, src2) NOT_LP64(xchgl(src1, src2)) ; }
775
776 void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2))xaddq(src1, src2) NOT_LP64(xaddl(src1, src2)) ; }
777
778
779
780 // Helper functions for statistics gathering.
781 // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes.
782 void cond_inc32(Condition cond, AddressLiteral counter_addr);
783 // Unconditional atomic increment.
784 void atomic_incl(Address counter_addr);
785 void atomic_incl(AddressLiteral counter_addr, Register scr = rscratch1);
786#ifdef _LP641
787 void atomic_incq(Address counter_addr);
788 void atomic_incq(AddressLiteral counter_addr, Register scr = rscratch1);
789#endif
790 void atomic_incptr(AddressLiteral counter_addr, Register scr = rscratch1) { LP64_ONLY(atomic_incq(counter_addr, scr))atomic_incq(counter_addr, scr) NOT_LP64(atomic_incl(counter_addr, scr)) ; }
791 void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr))atomic_incq(counter_addr) NOT_LP64(atomic_incl(counter_addr)) ; }
792
793 void lea(Register dst, AddressLiteral adr);
794 void lea(Address dst, AddressLiteral adr);
795 void lea(Register dst, Address adr) { Assembler::lea(dst, adr); }
796
797 void leal32(Register dst, Address src) { leal(dst, src); }
798
799 // Import other testl() methods from the parent class or else
800 // they will be hidden by the following overriding declaration.
801 using Assembler::testl;
802 void testl(Register dst, AddressLiteral src);
803
804 void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
805 void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
806 void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src))orq(dst, src) NOT_LP64(orl(dst, src)); }
807 void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32))orq(dst, imm32) NOT_LP64(orl(dst, imm32)); }
808
809 void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32))testq(src, imm32) NOT_LP64(testl(src, imm32)); }
810 void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2))testq(src1, src2) NOT_LP64(testl(src1, src2)); }
811 void testptr(Register src1, Register src2);
812
813 void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
814 void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src))xorq(dst, src) NOT_LP64(xorl(dst, src)); }
815
816 // Calls
817
818 void call(Label& L, relocInfo::relocType rtype);
819 void call(Register entry);
820 void call(Address addr) { Assembler::call(addr); }
821
822 // NOTE: this call transfers to the effective address of entry NOT
823 // the address contained by entry. This is because this is more natural
824 // for jumps/calls.
825 void call(AddressLiteral entry);
826
827 // Emit the CompiledIC call idiom
828 void ic_call(address entry, jint method_index = 0);
829
830 // Jumps
831
832 // NOTE: these jumps tranfer to the effective address of dst NOT
833 // the address contained by dst. This is because this is more natural
834 // for jumps/calls.
835 void jump(AddressLiteral dst);
836 void jump_cc(Condition cc, AddressLiteral dst);
837
838 // 32bit can do a case table jump in one instruction but we no longer allow the base
839 // to be installed in the Address class. This jump will tranfers to the address
840 // contained in the location described by entry (not the address of entry)
841 void jump(ArrayAddress entry);
842
843 // Floating
844
845 void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
846 void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
847 void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); }
848
849 void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
850 void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
851 void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
852
853 void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
854 void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
855 void comiss(XMMRegister dst, AddressLiteral src);
856
857 void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
858 void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
859 void comisd(XMMRegister dst, AddressLiteral src);
860
861#ifndef _LP641
862 void fadd_s(Address src) { Assembler::fadd_s(src); }
863 void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); }
864
865 void fldcw(Address src) { Assembler::fldcw(src); }
866 void fldcw(AddressLiteral src);
867
868 void fld_s(int index) { Assembler::fld_s(index); }
869 void fld_s(Address src) { Assembler::fld_s(src); }
870 void fld_s(AddressLiteral src);
871
872 void fld_d(Address src) { Assembler::fld_d(src); }
873 void fld_d(AddressLiteral src);
874
875 void fmul_s(Address src) { Assembler::fmul_s(src); }
876 void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); }
877#endif // _LP64
878
879 void fld_x(Address src) { Assembler::fld_x(src); }
880 void fld_x(AddressLiteral src);
881
882 void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
883 void ldmxcsr(AddressLiteral src);
884
885#ifdef _LP641
886 private:
887 void sha256_AVX2_one_round_compute(
888 Register reg_old_h,
889 Register reg_a,
890 Register reg_b,
891 Register reg_c,
892 Register reg_d,
893 Register reg_e,
894 Register reg_f,
895 Register reg_g,
896 Register reg_h,
897 int iter);
898 void sha256_AVX2_four_rounds_compute_first(int start);
899 void sha256_AVX2_four_rounds_compute_last(int start);
900 void sha256_AVX2_one_round_and_sched(
901 XMMRegister xmm_0, /* == ymm4 on 0, 1, 2, 3 iterations, then rotate 4 registers left on 4, 8, 12 iterations */
902 XMMRegister xmm_1, /* ymm5 */ /* full cycle is 16 iterations */
903 XMMRegister xmm_2, /* ymm6 */
904 XMMRegister xmm_3, /* ymm7 */
905 Register reg_a, /* == eax on 0 iteration, then rotate 8 register right on each next iteration */
906 Register reg_b, /* ebx */ /* full cycle is 8 iterations */
907 Register reg_c, /* edi */
908 Register reg_d, /* esi */
909 Register reg_e, /* r8d */
910 Register reg_f, /* r9d */
911 Register reg_g, /* r10d */
912 Register reg_h, /* r11d */
913 int iter);
914
915 void addm(int disp, Register r1, Register r2);
916 void gfmul(XMMRegister tmp0, XMMRegister t);
917 void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
918 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
919 void generateHtbl_one_block(Register htbl);
920 void generateHtbl_eight_blocks(Register htbl);
921 public:
922 void sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
923 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
924 Register buf, Register state, Register ofs, Register limit, Register rsp,
925 bool multi_block, XMMRegister shuf_mask);
926 void avx_ghash(Register state, Register htbl, Register data, Register blocks);
927#endif
928
929#ifdef _LP641
930 private:
931 void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d,
932 Register e, Register f, Register g, Register h, int iteration);
933
934 void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
935 Register a, Register b, Register c, Register d, Register e, Register f,
936 Register g, Register h, int iteration);
937
938 void addmq(int disp, Register r1, Register r2);
939 public:
940 void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
941 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
942 Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block,
943 XMMRegister shuf_mask);
944private:
945 void roundEnc(XMMRegister key, int rnum);
946 void lastroundEnc(XMMRegister key, int rnum);
947 void roundDec(XMMRegister key, int rnum);
948 void lastroundDec(XMMRegister key, int rnum);
949 void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
950 void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
951 void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
952 void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
953 XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
954 XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
955 bool final_reduction, int index, XMMRegister counter_inc_mask);
956public:
957 void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
958 void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
959 void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
960 Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
961 void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
962 Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
963
964#endif
965
966 void fast_md5(Register buf, Address state, Address ofs, Address limit,
967 bool multi_block);
968
969 void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
970 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
971 Register buf, Register state, Register ofs, Register limit, Register rsp,
972 bool multi_block);
973
974#ifdef _LP641
975 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
976 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
977 Register buf, Register state, Register ofs, Register limit, Register rsp,
978 bool multi_block, XMMRegister shuf_mask);
979#else
980 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
981 XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
982 Register buf, Register state, Register ofs, Register limit, Register rsp,
983 bool multi_block);
984#endif
985
986 void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
987 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
988 Register rax, Register rcx, Register rdx, Register tmp);
989
990#ifdef _LP641
991 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
992 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
993 Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
994
995 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
996 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
997 Register rax, Register rcx, Register rdx, Register r11);
998
999 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1000 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1001 Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
1002
1003 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1004 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1005 Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
1006 Register tmp3, Register tmp4);
1007
1008 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1009 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1010 Register rax, Register rcx, Register rdx, Register tmp1,
1011 Register tmp2, Register tmp3, Register tmp4);
1012 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1013 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1014 Register rax, Register rcx, Register rdx, Register tmp1,
1015 Register tmp2, Register tmp3, Register tmp4);
1016#else
1017 void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1018 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1019 Register rax, Register rcx, Register rdx, Register tmp1);
1020
1021 void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1022 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1023 Register rax, Register rcx, Register rdx, Register tmp);
1024
1025 void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
1026 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
1027 Register rdx, Register tmp);
1028
1029 void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1030 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1031 Register rax, Register rbx, Register rdx);
1032
1033 void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1034 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1035 Register rax, Register rcx, Register rdx, Register tmp);
1036
1037 void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1038 Register edx, Register ebx, Register esi, Register edi,
1039 Register ebp, Register esp);
1040
1041 void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
1042 Register esi, Register edi, Register ebp, Register esp);
1043
1044 void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
1045 Register edx, Register ebx, Register esi, Register edi,
1046 Register ebp, Register esp);
1047
1048 void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
1049 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
1050 Register rax, Register rcx, Register rdx, Register tmp);
1051#endif
1052
1053private:
1054
1055 // these are private because users should be doing movflt/movdbl
1056
1057 void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
1058 void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
1059 void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
1060 void movss(XMMRegister dst, AddressLiteral src);
1061
1062 void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
1063 void movlpd(XMMRegister dst, AddressLiteral src);
1064
1065public:
1066
1067 void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
1068 void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
1069 void addsd(XMMRegister dst, AddressLiteral src);
1070
1071 void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
1072 void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
1073 void addss(XMMRegister dst, AddressLiteral src);
1074
1075 void addpd(XMMRegister dst, XMMRegister src) { Assembler::addpd(dst, src); }
1076 void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
1077 void addpd(XMMRegister dst, AddressLiteral src);
1078
1079 void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
1080 void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
1081 void divsd(XMMRegister dst, AddressLiteral src);
1082
1083 void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
1084 void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
1085 void divss(XMMRegister dst, AddressLiteral src);
1086
1087 // Move Unaligned Double Quadword
1088 void movdqu(Address dst, XMMRegister src);
1089 void movdqu(XMMRegister dst, Address src);
1090 void movdqu(XMMRegister dst, XMMRegister src);
1091 void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
1092
1093 void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
1094 void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
1095 void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
1096 void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1097 void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
1098 void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
1099
1100 void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
1101 void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
1102 void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); }
1103 void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); }
1104 void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); }
1105 void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1106
1107 // Safe move operation, lowers down to 16bit moves for targets supporting
1108 // AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
1109 void kmov(Address dst, KRegister src);
1110 void kmov(KRegister dst, Address src);
1111 void kmov(KRegister dst, KRegister src);
1112 void kmov(Register dst, KRegister src);
1113 void kmov(KRegister dst, Register src);
1114
1115 // AVX Unaligned forms
1116 void vmovdqu(Address dst, XMMRegister src);
1117 void vmovdqu(XMMRegister dst, Address src);
1118 void vmovdqu(XMMRegister dst, XMMRegister src);
1119 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1120 void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);
1121
1122
1123 // AVX512 Unaligned
1124 void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len);
1125 void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len);
1126
1127 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1128 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1129 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
1130 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1131 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
1132 void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1133
1134 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1135 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1136 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
1137 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
1138 void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1139
1140 void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1141 void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
1142 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
1143 if (dst->encoding() == src->encoding()) return;
1144 Assembler::evmovdqul(dst, src, vector_len);
1145 }
1146 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1147 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
1148 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1149 if (dst->encoding() == src->encoding() && mask == k0) return;
1150 Assembler::evmovdqul(dst, mask, src, merge, vector_len);
1151 }
1152 void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1153
1154 void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1155 void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
1156 void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
1157 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
1158 if (dst->encoding() == src->encoding()) return;
3
Called C++ object pointer is null
1159 Assembler::evmovdquq(dst, src, vector_len);
1160 }
1161 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1162 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
1163 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
1164 if (dst->encoding() == src->encoding() && mask == k0) return;
1165 Assembler::evmovdquq(dst, mask, src, merge, vector_len);
1166 }
1167 void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1168
1169 // Move Aligned Double Quadword
1170 void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
1171 void movdqa(XMMRegister dst, XMMRegister src) { Assembler::movdqa(dst, src); }
1172 void movdqa(XMMRegister dst, AddressLiteral src);
1173
1174 void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
1175 void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
1176 void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
1177 void movsd(XMMRegister dst, AddressLiteral src);
1178
1179 void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
1180 void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
1181 void mulpd(XMMRegister dst, AddressLiteral src);
1182
1183 void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
1184 void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
1185 void mulsd(XMMRegister dst, AddressLiteral src);
1186
1187 void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
1188 void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
1189 void mulss(XMMRegister dst, AddressLiteral src);
1190
1191 // Carry-Less Multiplication Quadword
1192 void pclmulldq(XMMRegister dst, XMMRegister src) {
1193 // 0x00 - multiply lower 64 bits [0:63]
1194 Assembler::pclmulqdq(dst, src, 0x00);
1195 }
1196 void pclmulhdq(XMMRegister dst, XMMRegister src) {
1197 // 0x11 - multiply upper 64 bits [64:127]
1198 Assembler::pclmulqdq(dst, src, 0x11);
1199 }
1200
1201 void pcmpeqb(XMMRegister dst, XMMRegister src);
1202 void pcmpeqw(XMMRegister dst, XMMRegister src);
1203
1204 void pcmpestri(XMMRegister dst, Address src, int imm8);
1205 void pcmpestri(XMMRegister dst, XMMRegister src, int imm8);
1206
1207 void pmovzxbw(XMMRegister dst, XMMRegister src);
1208 void pmovzxbw(XMMRegister dst, Address src);
1209
1210 void pmovmskb(Register dst, XMMRegister src);
1211
1212 void ptest(XMMRegister dst, XMMRegister src);
1213
1214 void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
1215 void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
1216 void sqrtsd(XMMRegister dst, AddressLiteral src);
1217
1218 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1219 void roundsd(XMMRegister dst, Address src, int32_t rmode) { Assembler::roundsd(dst, src, rmode); }
1220 void roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg);
1221
1222 void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
1223 void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
1224 void sqrtss(XMMRegister dst, AddressLiteral src);
1225
1226 void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
1227 void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
1228 void subsd(XMMRegister dst, AddressLiteral src);
1229
1230 void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
1231 void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
1232 void subss(XMMRegister dst, AddressLiteral src);
1233
1234 void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
1235 void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
1236 void ucomiss(XMMRegister dst, AddressLiteral src);
1237
1238 void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
1239 void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
1240 void ucomisd(XMMRegister dst, AddressLiteral src);
1241
1242 // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
1243 void xorpd(XMMRegister dst, XMMRegister src);
1244 void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
1245 void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1246
1247 // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
1248 void xorps(XMMRegister dst, XMMRegister src);
1249 void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
1250 void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
1251
1252 // Shuffle Bytes
1253 void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
1254 void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); }
1255 void pshufb(XMMRegister dst, AddressLiteral src);
1256 // AVX 3-operands instructions
1257
1258 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
1259 void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); }
1260 void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1261
1262 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); }
1263 void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); }
1264 void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1265
1266 void vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1267 void vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len);
1268
1269 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1270 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1271 void vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1272
1273 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1274 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1275
1276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1277 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpaddd(dst, nds, src, vector_len); }
1278 void vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch);
1279
1280 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1281 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
1282 void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1283
1284 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
1285 void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
1286
1287 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1288
1289 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1290 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1291
1292 // Vector compares
1293 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1294 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1295 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1296 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1297 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1298 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1299 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1300 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1301 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1302 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1303 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1304 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1305 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
1306 int comparison, bool is_signed, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, is_signed, vector_len); }
1307 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
1308 int comparison, bool is_signed, int vector_len, Register scratch_reg);
1309
1310 void evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len);
1311
1312 // Emit comparison instruction for the specified comparison predicate.
1313 void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
1314 void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
1315
1316 void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
1317 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
1318
1319 void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit);
1320
1321 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1322 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1323 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1324 Assembler::vpmulld(dst, nds, src, vector_len);
1325 };
1326 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1327 Assembler::vpmulld(dst, nds, src, vector_len);
1328 }
1329 void vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1330
1331 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1332 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1333
1334 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1335 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1336
1337 void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1338 void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1339
1340 void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1341 void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1342
1343 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1344 if (!is_varshift) {
1345 Assembler::evpsllw(dst, mask, nds, src, merge, vector_len);
1346 } else {
1347 Assembler::evpsllvw(dst, mask, nds, src, merge, vector_len);
1348 }
1349 }
1350 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1351 if (!is_varshift) {
1352 Assembler::evpslld(dst, mask, nds, src, merge, vector_len);
1353 } else {
1354 Assembler::evpsllvd(dst, mask, nds, src, merge, vector_len);
1355 }
1356 }
1357 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1358 if (!is_varshift) {
1359 Assembler::evpsllq(dst, mask, nds, src, merge, vector_len);
1360 } else {
1361 Assembler::evpsllvq(dst, mask, nds, src, merge, vector_len);
1362 }
1363 }
1364 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1365 if (!is_varshift) {
1366 Assembler::evpsrlw(dst, mask, nds, src, merge, vector_len);
1367 } else {
1368 Assembler::evpsrlvw(dst, mask, nds, src, merge, vector_len);
1369 }
1370 }
1371 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1372 if (!is_varshift) {
1373 Assembler::evpsrld(dst, mask, nds, src, merge, vector_len);
1374 } else {
1375 Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
1376 }
1377 }
1378 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1379 if (!is_varshift) {
1380 Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
1381 } else {
1382 Assembler::evpsrlvq(dst, mask, nds, src, merge, vector_len);
1383 }
1384 }
1385 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1386 if (!is_varshift) {
1387 Assembler::evpsraw(dst, mask, nds, src, merge, vector_len);
1388 } else {
1389 Assembler::evpsravw(dst, mask, nds, src, merge, vector_len);
1390 }
1391 }
1392 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1393 if (!is_varshift) {
1394 Assembler::evpsrad(dst, mask, nds, src, merge, vector_len);
1395 } else {
1396 Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
1397 }
1398 }
1399 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
1400 if (!is_varshift) {
1401 Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
1402 } else {
1403 Assembler::evpsravq(dst, mask, nds, src, merge, vector_len);
1404 }
1405 }
1406
1407 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1408 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1409 void evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1410 void evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1411
1412 void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1413 void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1414
1415 void vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
1416 void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
1417
1418 void vptest(XMMRegister dst, XMMRegister src);
1419 void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
1420
1421 void punpcklbw(XMMRegister dst, XMMRegister src);
1422 void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
1423
1424 void pshufd(XMMRegister dst, Address src, int mode);
1425 void pshufd(XMMRegister dst, XMMRegister src, int mode) { Assembler::pshufd(dst, src, mode); }
1426
1427 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1428 void pshuflw(XMMRegister dst, Address src, int mode) { Assembler::pshuflw(dst, src, mode); }
1429
1430 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1431 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
1432 void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1433
1434 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1435 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
1436 void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1437
1438 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
1439
1440 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
1441 void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
1442 void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1443
1444 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); }
1445 void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); }
1446 void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1447
1448 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); }
1449 void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); }
1450 void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1451
1452 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); }
1453 void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); }
1454 void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1455
1456 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); }
1457 void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); }
1458 void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1459
1460 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); }
1461 void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); }
1462 void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1463
1464 void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1465 void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
1466
1467 // AVX Vector instructions
1468
1469 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1470 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
1471 void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1472
1473 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1474 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
1475 void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1476
1477 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1478 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1479 Assembler::vpxor(dst, nds, src, vector_len);
1480 else
1481 Assembler::vxorpd(dst, nds, src, vector_len);
1482 }
1483 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
1484 if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
1485 Assembler::vpxor(dst, nds, src, vector_len);
1486 else
1487 Assembler::vxorpd(dst, nds, src, vector_len);
1488 }
1489 void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
1490
1491 // Simple version for AVX2 256bit vectors
1492 void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
1493 void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
1494
1495 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
1496 void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
1497
1498 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
1499 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1500 Assembler::vinserti32x4(dst, nds, src, imm8);
1501 } else if (UseAVX > 1) {
1502 // vinserti128 is available only in AVX2
1503 Assembler::vinserti128(dst, nds, src, imm8);
1504 } else {
1505 Assembler::vinsertf128(dst, nds, src, imm8);
1506 }
1507 }
1508
1509 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
1510 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1511 Assembler::vinserti32x4(dst, nds, src, imm8);
1512 } else if (UseAVX > 1) {
1513 // vinserti128 is available only in AVX2
1514 Assembler::vinserti128(dst, nds, src, imm8);
1515 } else {
1516 Assembler::vinsertf128(dst, nds, src, imm8);
1517 }
1518 }
1519
1520 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1521 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1522 Assembler::vextracti32x4(dst, src, imm8);
1523 } else if (UseAVX > 1) {
1524 // vextracti128 is available only in AVX2
1525 Assembler::vextracti128(dst, src, imm8);
1526 } else {
1527 Assembler::vextractf128(dst, src, imm8);
1528 }
1529 }
1530
1531 void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
1532 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1533 Assembler::vextracti32x4(dst, src, imm8);
1534 } else if (UseAVX > 1) {
1535 // vextracti128 is available only in AVX2
1536 Assembler::vextracti128(dst, src, imm8);
1537 } else {
1538 Assembler::vextractf128(dst, src, imm8);
1539 }
1540 }
1541
1542 // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
1543 void vinserti128_high(XMMRegister dst, XMMRegister src) {
1544 vinserti128(dst, dst, src, 1);
1545 }
1546 void vinserti128_high(XMMRegister dst, Address src) {
1547 vinserti128(dst, dst, src, 1);
1548 }
1549 void vextracti128_high(XMMRegister dst, XMMRegister src) {
1550 vextracti128(dst, src, 1);
1551 }
1552 void vextracti128_high(Address dst, XMMRegister src) {
1553 vextracti128(dst, src, 1);
1554 }
1555
1556 void vinsertf128_high(XMMRegister dst, XMMRegister src) {
1557 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1558 Assembler::vinsertf32x4(dst, dst, src, 1);
1559 } else {
1560 Assembler::vinsertf128(dst, dst, src, 1);
1561 }
1562 }
1563
1564 void vinsertf128_high(XMMRegister dst, Address src) {
1565 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1566 Assembler::vinsertf32x4(dst, dst, src, 1);
1567 } else {
1568 Assembler::vinsertf128(dst, dst, src, 1);
1569 }
1570 }
1571
1572 void vextractf128_high(XMMRegister dst, XMMRegister src) {
1573 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1574 Assembler::vextractf32x4(dst, src, 1);
1575 } else {
1576 Assembler::vextractf128(dst, src, 1);
1577 }
1578 }
1579
1580 void vextractf128_high(Address dst, XMMRegister src) {
1581 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1582 Assembler::vextractf32x4(dst, src, 1);
1583 } else {
1584 Assembler::vextractf128(dst, src, 1);
1585 }
1586 }
1587
1588 // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
1589 void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
1590 Assembler::vinserti64x4(dst, dst, src, 1);
1591 }
1592 void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
1593 Assembler::vinsertf64x4(dst, dst, src, 1);
1594 }
1595 void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
1596 Assembler::vextracti64x4(dst, src, 1);
1597 }
1598 void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
1599 Assembler::vextractf64x4(dst, src, 1);
1600 }
1601 void vextractf64x4_high(Address dst, XMMRegister src) {
1602 Assembler::vextractf64x4(dst, src, 1);
1603 }
1604 void vinsertf64x4_high(XMMRegister dst, Address src) {
1605 Assembler::vinsertf64x4(dst, dst, src, 1);
1606 }
1607
1608 // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
1609 void vinserti128_low(XMMRegister dst, XMMRegister src) {
1610 vinserti128(dst, dst, src, 0);
1611 }
1612 void vinserti128_low(XMMRegister dst, Address src) {
1613 vinserti128(dst, dst, src, 0);
1614 }
1615 void vextracti128_low(XMMRegister dst, XMMRegister src) {
1616 vextracti128(dst, src, 0);
1617 }
1618 void vextracti128_low(Address dst, XMMRegister src) {
1619 vextracti128(dst, src, 0);
1620 }
1621
1622 void vinsertf128_low(XMMRegister dst, XMMRegister src) {
1623 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1624 Assembler::vinsertf32x4(dst, dst, src, 0);
1625 } else {
1626 Assembler::vinsertf128(dst, dst, src, 0);
1627 }
1628 }
1629
1630 void vinsertf128_low(XMMRegister dst, Address src) {
1631 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1632 Assembler::vinsertf32x4(dst, dst, src, 0);
1633 } else {
1634 Assembler::vinsertf128(dst, dst, src, 0);
1635 }
1636 }
1637
1638 void vextractf128_low(XMMRegister dst, XMMRegister src) {
1639 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1640 Assembler::vextractf32x4(dst, src, 0);
1641 } else {
1642 Assembler::vextractf128(dst, src, 0);
1643 }
1644 }
1645
1646 void vextractf128_low(Address dst, XMMRegister src) {
1647 if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
1648 Assembler::vextractf32x4(dst, src, 0);
1649 } else {
1650 Assembler::vextractf128(dst, src, 0);
1651 }
1652 }
1653
1654 // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
1655 void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
1656 Assembler::vinserti64x4(dst, dst, src, 0);
1657 }
1658 void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
1659 Assembler::vinsertf64x4(dst, dst, src, 0);
1660 }
1661 void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
1662 Assembler::vextracti64x4(dst, src, 0);
1663 }
1664 void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
1665 Assembler::vextractf64x4(dst, src, 0);
1666 }
1667 void vextractf64x4_low(Address dst, XMMRegister src) {
1668 Assembler::vextractf64x4(dst, src, 0);
1669 }
1670 void vinsertf64x4_low(XMMRegister dst, Address src) {
1671 Assembler::vinsertf64x4(dst, dst, src, 0);
1672 }
1673
1674 // Carry-Less Multiplication Quadword
1675 void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1676 // 0x00 - multiply lower 64 bits [0:63]
1677 Assembler::vpclmulqdq(dst, nds, src, 0x00);
1678 }
1679 void vpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1680 // 0x11 - multiply upper 64 bits [64:127]
1681 Assembler::vpclmulqdq(dst, nds, src, 0x11);
1682 }
1683 void vpclmullqhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1684 // 0x10 - multiply nds[0:63] and src[64:127]
1685 Assembler::vpclmulqdq(dst, nds, src, 0x10);
1686 }
1687 void vpclmulhqlqdq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1688 //0x01 - multiply nds[64:127] and src[0:63]
1689 Assembler::vpclmulqdq(dst, nds, src, 0x01);
1690 }
1691
1692 void evpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1693 // 0x00 - multiply lower 64 bits [0:63]
1694 Assembler::evpclmulqdq(dst, nds, src, 0x00, vector_len);
1695 }
1696 void evpclmulhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1697 // 0x11 - multiply upper 64 bits [64:127]
1698 Assembler::evpclmulqdq(dst, nds, src, 0x11, vector_len);
1699 }
1700
1701 // AVX-512 mask operations.
1702 void kand(BasicType etype, KRegister dst, KRegister src1, KRegister src2);
1703 void kor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1704 void knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp = knoreg, Register rtmp = noreg);
1705 void kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2);
1706 void kortest(uint masklen, KRegister src1, KRegister src2);
1707 void ktest(uint masklen, KRegister src1, KRegister src2);
1708
1709 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1710 void evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1711
1712 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1713 void evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1714
1715 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1716 void evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1717
1718 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
1719 void evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
1720
1721 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1722 void evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1723 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
1724 void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
1725
1726 void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
1727 void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
1728
1729 void cmov32( Condition cc, Register dst, Address src);
1730 void cmov32( Condition cc, Register dst, Register src);
1731
1732 void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); }
1733
1734 void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1735 void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src))cmovq(cc, dst, src) NOT_LP64(cmov32(cc, dst, src)); }
1736
1737 void movoop(Register dst, jobject obj);
1738 void movoop(Address dst, jobject obj);
1739
1740 void mov_metadata(Register dst, Metadata* obj);
1741 void mov_metadata(Address dst, Metadata* obj);
1742
1743 void movptr(ArrayAddress dst, Register src);
1744 // can this do an lea?
1745 void movptr(Register dst, ArrayAddress src);
1746
1747 void movptr(Register dst, Address src);
1748
1749#ifdef _LP641
1750 void movptr(Register dst, AddressLiteral src, Register scratch=rscratch1);
1751#else
1752 void movptr(Register dst, AddressLiteral src, Register scratch=noreg); // Scratch reg is ignored in 32-bit
1753#endif
1754
1755 void movptr(Register dst, intptr_t src);
1756 void movptr(Register dst, Register src);
1757 void movptr(Address dst, intptr_t src);
1758
1759 void movptr(Address dst, Register src);
1760
1761 void movptr(Register dst, RegisterOrConstant src) {
1762 if (src.is_constant()) movptr(dst, src.as_constant());
1763 else movptr(dst, src.as_register());
1764 }
1765
1766#ifdef _LP641
1767 // Generally the next two are only used for moving NULL
1768 // Although there are situations in initializing the mark word where
1769 // they could be used. They are dangerous.
1770
1771 // They only exist on LP64 so that int32_t and intptr_t are not the same
1772 // and we have ambiguous declarations.
1773
1774 void movptr(Address dst, int32_t imm32);
1775 void movptr(Register dst, int32_t imm32);
1776#endif // _LP64
1777
1778 // to avoid hiding movl
1779 void mov32(AddressLiteral dst, Register src);
1780 void mov32(Register dst, AddressLiteral src);
1781
1782 // to avoid hiding movb
1783 void movbyte(ArrayAddress dst, int src);
1784
1785 // Import other mov() methods from the parent class or else
1786 // they will be hidden by the following overriding declaration.
1787 using Assembler::movdl;
1788 using Assembler::movq;
1789 void movdl(XMMRegister dst, AddressLiteral src);
1790 void movq(XMMRegister dst, AddressLiteral src);
1791
1792 // Can push value or effective address
1793 void pushptr(AddressLiteral src);
1794
1795 void pushptr(Address src) { LP64_ONLY(pushq(src))pushq(src) NOT_LP64(pushl(src)); }
1796 void popptr(Address src) { LP64_ONLY(popq(src))popq(src) NOT_LP64(popl(src)); }
1797
1798 void pushoop(jobject obj);
1799 void pushklass(Metadata* obj);
1800
1801 // sign extend as need a l to ptr sized element
1802 void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(movl(dst, src)); }
1803 void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src))movslq(dst, src) NOT_LP64(if (dst != src) movl(dst, src)); }
1804
1805
1806 public:
1807 // C2 compiled method's prolog code.
1808 void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
1809
1810 // clear memory of size 'cnt' qwords, starting at 'base';
1811 // if 'is_large' is set, do not try to produce short loop
1812 void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
1813
1814 // clear memory initialization sequence for constant size;
1815 void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1816
1817 // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
1818 void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
1819
1820 // Fill primitive arrays
1821 void generate_fill(BasicType t, bool aligned,
1822 Register to, Register value, Register count,
1823 Register rtmp, XMMRegister xtmp);
1824
1825 void encode_iso_array(Register src, Register dst, Register len,
1826 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1827 XMMRegister tmp4, Register tmp5, Register result, bool ascii);
1828
1829#ifdef _LP641
1830 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2);
1831 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1832 Register y, Register y_idx, Register z,
1833 Register carry, Register product,
1834 Register idx, Register kdx);
1835 void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
1836 Register yz_idx, Register idx,
1837 Register carry, Register product, int offset);
1838 void multiply_128_x_128_bmi2_loop(Register y, Register z,
1839 Register carry, Register carry2,
1840 Register idx, Register jdx,
1841 Register yz_idx1, Register yz_idx2,
1842 Register tmp, Register tmp3, Register tmp4);
1843 void multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
1844 Register yz_idx, Register idx, Register jdx,
1845 Register carry, Register product,
1846 Register carry2);
1847 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
1848 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1849 void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
1850 Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1851 void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
1852 Register tmp2);
1853 void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
1854 Register rdxReg, Register raxReg);
1855 void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
1856 void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1857 Register tmp3, Register tmp4);
1858 void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
1859 Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
1860
1861 void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
1862 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1863 Register raxReg);
1864 void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
1865 Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
1866 Register raxReg);
1867 void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
1868 Register result, Register tmp1, Register tmp2,
1869 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
1870#endif
1871
1872 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1873 void update_byte_crc32(Register crc, Register val, Register table);
1874 void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
1875
1876
1877#ifdef _LP641
1878 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2);
1879 void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos,
1880 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
1881 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup);
1882 void updateBytesAdler32(Register adler32, Register buf, Register length, XMMRegister shuf0, XMMRegister shuf1, ExternalAddress scale);
1883#endif // _LP64
1884
1885 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
1886 // Note on a naming convention:
1887 // Prefix w = register only used on a Westmere+ architecture
1888 // Prefix n = register only used on a Nehalem architecture
1889#ifdef _LP641
1890 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1891 Register tmp1, Register tmp2, Register tmp3);
1892#else
1893 void crc32c_ipl_alg4(Register in_out, uint32_t n,
1894 Register tmp1, Register tmp2, Register tmp3,
1895 XMMRegister xtmp1, XMMRegister xtmp2);
1896#endif
1897 void crc32c_pclmulqdq(XMMRegister w_xtmp1,
1898 Register in_out,
1899 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
1900 XMMRegister w_xtmp2,
1901 Register tmp1,
1902 Register n_tmp2, Register n_tmp3);
1903 void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
1904 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1905 Register tmp1, Register tmp2,
1906 Register n_tmp3);
1907 void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
1908 Register in_out1, Register in_out2, Register in_out3,
1909 Register tmp1, Register tmp2, Register tmp3,
1910 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1911 Register tmp4, Register tmp5,
1912 Register n_tmp6);
1913 void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
1914 Register tmp1, Register tmp2, Register tmp3,
1915 Register tmp4, Register tmp5, Register tmp6,
1916 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
1917 bool is_pclmulqdq_supported);
1918 // Fold 128-bit data chunk
1919 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
1920 void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);
1921#ifdef _LP641
1922 // Fold 512-bit data chunk
1923 void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset);
1924#endif // _LP64
1925 // Fold 8-bit data
1926 void fold_8bit_crc32(Register crc, Register table, Register tmp);
1927 void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
1928
1929 // Compress char[] array to byte[].
1930 void char_array_compress(Register src, Register dst, Register len,
1931 XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
1932 XMMRegister tmp4, Register tmp5, Register result,
1933 KRegister mask1 = knoreg, KRegister mask2 = knoreg);
1934
1935 // Inflate byte[] array to char[].
1936 void byte_array_inflate(Register src, Register dst, Register len,
1937 XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
1938
1939 void fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
1940 Register length, Register temp, int vec_enc);
1941
1942 void fill64_masked(uint shift, Register dst, int disp,
1943 XMMRegister xmm, KRegister mask, Register length,
1944 Register temp, bool use64byteVector = false);
1945
1946 void fill32_masked(uint shift, Register dst, int disp,
1947 XMMRegister xmm, KRegister mask, Register length,
1948 Register temp);
1949
1950 void fill32(Register dst, int disp, XMMRegister xmm);
1951
1952 void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false);
1953
1954#ifdef _LP641
1955 void convert_f2i(Register dst, XMMRegister src);
1956 void convert_d2i(Register dst, XMMRegister src);
1957 void convert_f2l(Register dst, XMMRegister src);
1958 void convert_d2l(Register dst, XMMRegister src);
1959
1960 void cache_wb(Address line);
1961 void cache_wbsync(bool is_pre);
1962
1963#if COMPILER2_OR_JVMCI1
1964 void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
1965 Register to, Register count, int shift,
1966 Register index, Register temp,
1967 bool use64byteVector, Label& L_entry, Label& L_exit);
1968
1969 void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
1970 Register to, Register start_index, Register end_index,
1971 Register count, int shift, Register temp,
1972 bool use64byteVector, Label& L_entry, Label& L_exit);
1973
1974 void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
1975 KRegister mask, Register length, Register index,
1976 Register temp, int shift = Address::times_1, int offset = 0,
1977 bool use64byteVector = false);
1978
1979 void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
1980 KRegister mask, Register length, Register index,
1981 Register temp, int shift = Address::times_1, int offset = 0);
1982
1983 void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
1984 int shift = Address::times_1, int offset = 0);
1985
1986 void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
1987 bool conjoint, int shift = Address::times_1, int offset = 0,
1988 bool use64byteVector = false);
1989
1990 void generate_fill_avx3(BasicType type, Register to, Register value,
1991 Register count, Register rtmp, XMMRegister xtmp);
1992
1993#endif // COMPILER2_OR_JVMCI
1994
1995#endif // _LP64
1996
1997 void vallones(XMMRegister dst, int vector_len);
1998};
1999
2000/**
2001 * class SkipIfEqual:
2002 *
2003 * Instantiating this class will result in assembly code being output that will
2004 * jump around any code emitted between the creation of the instance and it's
2005 * automatic destruction at the end of a scope block, depending on the value of
2006 * the flag passed to the constructor, which will be checked at run-time.
2007 */
2008class SkipIfEqual {
2009 private:
2010 MacroAssembler* _masm;
2011 Label _label;
2012
2013 public:
2014 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
2015 ~SkipIfEqual();
2016};
2017
2018#endif // CPU_X86_MACROASSEMBLER_X86_HPP