Bug Summary

File:jdk/src/hotspot/cpu/x86/assembler_x86.hpp
Warning:line 233, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name macroAssembler_x86.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "jvm.h"
27#include "asm/assembler.hpp"
28#include "asm/assembler.inline.hpp"
29#include "compiler/compiler_globals.hpp"
30#include "compiler/disassembler.hpp"
31#include "gc/shared/barrierSet.hpp"
32#include "gc/shared/barrierSetAssembler.hpp"
33#include "gc/shared/collectedHeap.inline.hpp"
34#include "gc/shared/tlab_globals.hpp"
35#include "interpreter/bytecodeHistogram.hpp"
36#include "interpreter/interpreter.hpp"
37#include "memory/resourceArea.hpp"
38#include "memory/universe.hpp"
39#include "oops/accessDecorators.hpp"
40#include "oops/compressedOops.inline.hpp"
41#include "oops/klass.inline.hpp"
42#include "prims/methodHandles.hpp"
43#include "runtime/flags/flagSetting.hpp"
44#include "runtime/interfaceSupport.inline.hpp"
45#include "runtime/jniHandles.hpp"
46#include "runtime/objectMonitor.hpp"
47#include "runtime/os.hpp"
48#include "runtime/safepoint.hpp"
49#include "runtime/safepointMechanism.hpp"
50#include "runtime/sharedRuntime.hpp"
51#include "runtime/stubRoutines.hpp"
52#include "runtime/thread.hpp"
53#include "utilities/macros.hpp"
54#include "crc32c.h"
55
56#ifdef PRODUCT
57#define BLOCK_COMMENT(str) /* nothing */
58#define STOP(error)block_comment(error); stop(error) stop(error)
59#else
60#define BLOCK_COMMENT(str) block_comment(str)
61#define STOP(error)block_comment(error); stop(error) block_comment(error); stop(error)
62#endif
63
64#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
65
66#ifdef ASSERT1
67bool AbstractAssembler::pd_check_instruction_mark() { return true; }
68#endif
69
70static Assembler::Condition reverse[] = {
71 Assembler::noOverflow /* overflow = 0x0 */ ,
72 Assembler::overflow /* noOverflow = 0x1 */ ,
73 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
74 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
75 Assembler::notZero /* zero = 0x4, equal = 0x4 */ ,
76 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ ,
77 Assembler::above /* belowEqual = 0x6 */ ,
78 Assembler::belowEqual /* above = 0x7 */ ,
79 Assembler::positive /* negative = 0x8 */ ,
80 Assembler::negative /* positive = 0x9 */ ,
81 Assembler::noParity /* parity = 0xa */ ,
82 Assembler::parity /* noParity = 0xb */ ,
83 Assembler::greaterEqual /* less = 0xc */ ,
84 Assembler::less /* greaterEqual = 0xd */ ,
85 Assembler::greater /* lessEqual = 0xe */ ,
86 Assembler::lessEqual /* greater = 0xf, */
87
88};
89
90
91// Implementation of MacroAssembler
92
93// First all the versions that have distinct versions depending on 32/64 bit
94// Unless the difference is trivial (1 line or so).
95
96#ifndef _LP641
97
98// 32bit versions
99
100Address MacroAssembler::as_Address(AddressLiteral adr) {
101 return Address(adr.target(), adr.rspec());
102}
103
104Address MacroAssembler::as_Address(ArrayAddress adr) {
105 return Address::make_array(adr);
106}
107
108void MacroAssembler::call_VM_leaf_base(address entry_point,
109 int number_of_arguments) {
110 call(RuntimeAddress(entry_point));
111 increment(rsp, number_of_arguments * wordSize);
112}
113
114void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
115 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
116}
117
118
119void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
120 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
121}
122
123void MacroAssembler::cmpoop(Address src1, jobject obj) {
124 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
125}
126
127void MacroAssembler::cmpoop(Register src1, jobject obj) {
128 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
129}
130
131void MacroAssembler::extend_sign(Register hi, Register lo) {
132 // According to Intel Doc. AP-526, "Integer Divide", p.18.
133 if (VM_Version::is_P6() && hi == rdx && lo == rax) {
134 cdql();
135 } else {
136 movl(hi, lo);
137 sarl(hi, 31);
138 }
139}
140
141void MacroAssembler::jC2(Register tmp, Label& L) {
142 // set parity bit if FPU flag C2 is set (via rax)
143 save_rax(tmp);
144 fwait(); fnstsw_ax();
145 sahf();
146 restore_rax(tmp);
147 // branch
148 jcc(Assembler::parity, L);
149}
150
151void MacroAssembler::jnC2(Register tmp, Label& L) {
152 // set parity bit if FPU flag C2 is set (via rax)
153 save_rax(tmp);
154 fwait(); fnstsw_ax();
155 sahf();
156 restore_rax(tmp);
157 // branch
158 jcc(Assembler::noParity, L);
159}
160
161// 32bit can do a case table jump in one instruction but we no longer allow the base
162// to be installed in the Address class
163void MacroAssembler::jump(ArrayAddress entry) {
164 jmp(as_Address(entry));
165}
166
167// Note: y_lo will be destroyed
168void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
169 // Long compare for Java (semantics as described in JVM spec.)
170 Label high, low, done;
171
172 cmpl(x_hi, y_hi);
173 jcc(Assembler::less, low);
174 jcc(Assembler::greater, high);
175 // x_hi is the return register
176 xorl(x_hi, x_hi);
177 cmpl(x_lo, y_lo);
178 jcc(Assembler::below, low);
179 jcc(Assembler::equal, done);
180
181 bind(high);
182 xorl(x_hi, x_hi);
183 increment(x_hi);
184 jmp(done);
185
186 bind(low);
187 xorl(x_hi, x_hi);
188 decrementl(x_hi);
189
190 bind(done);
191}
192
193void MacroAssembler::lea(Register dst, AddressLiteral src) {
194 mov_literal32(dst, (int32_t)src.target(), src.rspec());
195}
196
197void MacroAssembler::lea(Address dst, AddressLiteral adr) {
198 // leal(dst, as_Address(adr));
199 // see note in movl as to why we must use a move
200 mov_literal32(dst, (int32_t) adr.target(), adr.rspec());
201}
202
203void MacroAssembler::leave() {
204 mov(rsp, rbp);
205 pop(rbp);
206}
207
208void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) {
209 // Multiplication of two Java long values stored on the stack
210 // as illustrated below. Result is in rdx:rax.
211 //
212 // rsp ---> [ ?? ] \ \
213 // .... | y_rsp_offset |
214 // [ y_lo ] / (in bytes) | x_rsp_offset
215 // [ y_hi ] | (in bytes)
216 // .... |
217 // [ x_lo ] /
218 // [ x_hi ]
219 // ....
220 //
221 // Basic idea: lo(result) = lo(x_lo * y_lo)
222 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
223 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset);
224 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset);
225 Label quick;
226 // load x_hi, y_hi and check if quick
227 // multiplication is possible
228 movl(rbx, x_hi);
229 movl(rcx, y_hi);
230 movl(rax, rbx);
231 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0
232 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply
233 // do full multiplication
234 // 1st step
235 mull(y_lo); // x_hi * y_lo
236 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx,
237 // 2nd step
238 movl(rax, x_lo);
239 mull(rcx); // x_lo * y_hi
240 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx,
241 // 3rd step
242 bind(quick); // note: rbx, = 0 if quick multiply!
243 movl(rax, x_lo);
244 mull(y_lo); // x_lo * y_lo
245 addl(rdx, rbx); // correct hi(x_lo * y_lo)
246}
247
248void MacroAssembler::lneg(Register hi, Register lo) {
249 negl(lo);
250 adcl(hi, 0);
251 negl(hi);
252}
253
254void MacroAssembler::lshl(Register hi, Register lo) {
255 // Java shift left long support (semantics as described in JVM spec., p.305)
256 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n))
257 // shift value is in rcx !
258 assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 258, "assert(" "hi != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
259 assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 259, "assert(" "lo != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
260 const Register s = rcx; // shift count
261 const int n = BitsPerWord;
262 Label L;
263 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
264 cmpl(s, n); // if (s < n)
265 jcc(Assembler::less, L); // else (s >= n)
266 movl(hi, lo); // x := x << n
267 xorl(lo, lo);
268 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
269 bind(L); // s (mod n) < n
270 shldl(hi, lo); // x := x << s
271 shll(lo);
272}
273
274
275void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) {
276 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310)
277 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n))
278 assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 278, "assert(" "hi != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
279 assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 279, "assert(" "lo != rcx" ") failed", "must not use rcx");
::breakpoint(); } } while (0)
;
280 const Register s = rcx; // shift count
281 const int n = BitsPerWord;
282 Label L;
283 andl(s, 0x3f); // s := s & 0x3f (s < 0x40)
284 cmpl(s, n); // if (s < n)
285 jcc(Assembler::less, L); // else (s >= n)
286 movl(lo, hi); // x := x >> n
287 if (sign_extension) sarl(hi, 31);
288 else xorl(hi, hi);
289 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n!
290 bind(L); // s (mod n) < n
291 shrdl(lo, hi); // x := x >> s
292 if (sign_extension) sarl(hi);
293 else shrl(hi);
294}
295
296void MacroAssembler::movoop(Register dst, jobject obj) {
297 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
298}
299
300void MacroAssembler::movoop(Address dst, jobject obj) {
301 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate());
302}
303
304void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
305 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
306}
307
308void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
309 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate());
310}
311
312void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
313 // scratch register is not used,
314 // it is defined to match parameters of 64-bit version of this method.
315 if (src.is_lval()) {
316 mov_literal32(dst, (intptr_t)src.target(), src.rspec());
317 } else {
318 movl(dst, as_Address(src));
319 }
320}
321
322void MacroAssembler::movptr(ArrayAddress dst, Register src) {
323 movl(as_Address(dst), src);
324}
325
326void MacroAssembler::movptr(Register dst, ArrayAddress src) {
327 movl(dst, as_Address(src));
328}
329
330// src should NEVER be a real pointer. Use AddressLiteral for true pointers
331void MacroAssembler::movptr(Address dst, intptr_t src) {
332 movl(dst, src);
333}
334
335
336void MacroAssembler::pop_callee_saved_registers() {
337 pop(rcx);
338 pop(rdx);
339 pop(rdi);
340 pop(rsi);
341}
342
343void MacroAssembler::push_callee_saved_registers() {
344 push(rsi);
345 push(rdi);
346 push(rdx);
347 push(rcx);
348}
349
350void MacroAssembler::pushoop(jobject obj) {
351 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate());
352}
353
354void MacroAssembler::pushklass(Metadata* obj) {
355 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate());
356}
357
358void MacroAssembler::pushptr(AddressLiteral src) {
359 if (src.is_lval()) {
360 push_literal32((int32_t)src.target(), src.rspec());
361 } else {
362 pushl(as_Address(src));
363 }
364}
365
366static void pass_arg0(MacroAssembler* masm, Register arg) {
367 masm->push(arg);
368}
369
370static void pass_arg1(MacroAssembler* masm, Register arg) {
371 masm->push(arg);
372}
373
374static void pass_arg2(MacroAssembler* masm, Register arg) {
375 masm->push(arg);
376}
377
378static void pass_arg3(MacroAssembler* masm, Register arg) {
379 masm->push(arg);
380}
381
382#ifndef PRODUCT
383extern "C" void findpc(intptr_t x);
384#endif
385
386void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) {
387 // In order to get locks to work, we need to fake a in_VM state
388 JavaThread* thread = JavaThread::current();
389 JavaThreadState saved_state = thread->thread_state();
390 thread->set_thread_state(_thread_in_vm);
391 if (ShowMessageBoxOnError) {
392 JavaThread* thread = JavaThread::current();
393 JavaThreadState saved_state = thread->thread_state();
394 thread->set_thread_state(_thread_in_vm);
395 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
396 ttyLocker ttyl;
397 BytecodeCounter::print();
398 }
399 // To see where a verify_oop failed, get $ebx+40/X for this frame.
400 // This is the value of eip which points to where verify_oop will return.
401 if (os::message_box(msg, "Execution stopped, print registers?")) {
402 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
403 BREAKPOINT::breakpoint();
404 }
405 }
406 fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 406, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0)
;
407}
408
409void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
410 ttyLocker ttyl;
411 FlagSetting fs(Debugging, true);
412 tty->print_cr("eip = 0x%08x", eip);
413#ifndef PRODUCT
414 if ((WizardMode || Verbose) && PrintMiscellaneous) {
415 tty->cr();
416 findpc(eip);
417 tty->cr();
418 }
419#endif
420#define PRINT_REG(rax) \
421 { tty->print("%s = ", #rax); os::print_location(tty, rax); }
422 PRINT_REG(rax);
423 PRINT_REG(rbx);
424 PRINT_REG(rcx);
425 PRINT_REG(rdx);
426 PRINT_REG(rdi);
427 PRINT_REG(rsi);
428 PRINT_REG(rbp);
429 PRINT_REG(rsp);
430#undef PRINT_REG
431 // Print some words near top of staack.
432 int* dump_sp = (int*) rsp;
433 for (int col1 = 0; col1 < 8; col1++) {
434 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
435 os::print_location(tty, *dump_sp++);
436 }
437 for (int row = 0; row < 16; row++) {
438 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
439 for (int col = 0; col < 8; col++) {
440 tty->print(" 0x%08x", *dump_sp++);
441 }
442 tty->cr();
443 }
444 // Print some instructions around pc:
445 Disassembler::decode((address)eip-64, (address)eip);
446 tty->print_cr("--------");
447 Disassembler::decode((address)eip, (address)eip+32);
448}
449
450void MacroAssembler::stop(const char* msg) {
451 ExternalAddress message((address)msg);
452 // push address of message
453 pushptr(message.addr());
454 { Label L; call(L, relocInfo::none); bind(L); } // push eip
455 pusha(); // push registers
456 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)((address)((address_word)(MacroAssembler::debug32)))));
457 hlt();
458}
459
460void MacroAssembler::warn(const char* msg) {
461 push_CPU_state();
462
463 ExternalAddress message((address) msg);
464 // push address of message
465 pushptr(message.addr());
466
467 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning)))));
468 addl(rsp, wordSize); // discard argument
469 pop_CPU_state();
470}
471
472void MacroAssembler::print_state() {
473 { Label L; call(L, relocInfo::none); bind(L); } // push eip
474 pusha(); // push registers
475
476 push_CPU_state();
477 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)((address)((address_word)(MacroAssembler::print_state32)))));
478 pop_CPU_state();
479
480 popa();
481 addl(rsp, wordSize);
482}
483
484#else // _LP64
485
486// 64 bit versions
487
488Address MacroAssembler::as_Address(AddressLiteral adr) {
489 // amd64 always does this as a pc-rel
490 // we can be absolute or disp based on the instruction type
491 // jmp/call are displacements others are absolute
492 assert(!adr.is_lval(), "must be rval")do { if (!(!adr.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 492, "assert(" "!adr.is_lval()" ") failed", "must be rval")
; ::breakpoint(); } } while (0)
;
493 assert(reachable(adr), "must be")do { if (!(reachable(adr))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 493, "assert(" "reachable(adr)" ") failed", "must be"); ::breakpoint
(); } } while (0)
;
494 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc());
495
496}
497
498Address MacroAssembler::as_Address(ArrayAddress adr) {
499 AddressLiteral base = adr.base();
500 lea(rscratch1, base);
501 Address index = adr.index();
502 assert(index._disp == 0, "must not have disp")do { if (!(index._disp == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 502, "assert(" "index._disp == 0" ") failed", "must not have disp"
); ::breakpoint(); } } while (0)
; // maybe it can?
503 Address array(rscratch1, index._index, index._scale, index._disp);
504 return array;
505}
506
507void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) {
508 Label L, E;
509
510#ifdef _WIN64
511 // Windows always allocates space for it's register args
512 assert(num_args <= 4, "only register arguments supported")do { if (!(num_args <= 4)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 512, "assert(" "num_args <= 4" ") failed", "only register arguments supported"
); ::breakpoint(); } } while (0)
;
513 subq(rsp, frame::arg_reg_save_area_bytes);
514#endif
515
516 // Align stack if necessary
517 testl(rsp, 15);
518 jcc(Assembler::zero, L);
519
520 subq(rsp, 8);
521 {
522 call(RuntimeAddress(entry_point));
523 }
524 addq(rsp, 8);
525 jmp(E);
526
527 bind(L);
528 {
529 call(RuntimeAddress(entry_point));
530 }
531
532 bind(E);
533
534#ifdef _WIN64
535 // restore stack pointer
536 addq(rsp, frame::arg_reg_save_area_bytes);
537#endif
538
539}
540
541void MacroAssembler::cmp64(Register src1, AddressLiteral src2) {
542 assert(!src2.is_lval(), "should use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 542, "assert(" "!src2.is_lval()" ") failed", "should use cmpptr"
); ::breakpoint(); } } while (0)
;
543
544 if (reachable(src2)) {
545 cmpq(src1, as_Address(src2));
546 } else {
547 lea(rscratch1, src2);
548 Assembler::cmpq(src1, Address(rscratch1, 0));
549 }
550}
551
552int MacroAssembler::corrected_idivq(Register reg) {
553 // Full implementation of Java ldiv and lrem; checks for special
554 // case as described in JVM spec., p.243 & p.271. The function
555 // returns the (pc) offset of the idivl instruction - may be needed
556 // for implicit exceptions.
557 //
558 // normal case special case
559 //
560 // input : rax: dividend min_long
561 // reg: divisor (may not be eax/edx) -1
562 //
563 // output: rax: quotient (= rax idiv reg) min_long
564 // rdx: remainder (= rax irem reg) 0
565 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 565, "assert(" "reg != rax && reg != rdx" ") failed"
, "reg cannot be rax or rdx register"); ::breakpoint(); } } while
(0)
;
566 static const int64_t min_long = 0x8000000000000000;
567 Label normal_case, special_case;
568
569 // check for special case
570 cmp64(rax, ExternalAddress((address) &min_long));
571 jcc(Assembler::notEqual, normal_case);
572 xorl(rdx, rdx); // prepare rdx for possible special case (where
573 // remainder = 0)
574 cmpq(reg, -1);
575 jcc(Assembler::equal, special_case);
576
577 // handle normal case
578 bind(normal_case);
579 cdqq();
580 int idivq_offset = offset();
581 idivq(reg);
582
583 // normal and special case exit
584 bind(special_case);
585
586 return idivq_offset;
587}
588
589void MacroAssembler::decrementq(Register reg, int value) {
590 if (value == min_jint) { subq(reg, value); return; }
591 if (value < 0) { incrementq(reg, -value); return; }
592 if (value == 0) { ; return; }
593 if (value == 1 && UseIncDec) { decq(reg) ; return; }
594 /* else */ { subq(reg, value) ; return; }
595}
596
597void MacroAssembler::decrementq(Address dst, int value) {
598 if (value == min_jint) { subq(dst, value); return; }
599 if (value < 0) { incrementq(dst, -value); return; }
600 if (value == 0) { ; return; }
601 if (value == 1 && UseIncDec) { decq(dst) ; return; }
602 /* else */ { subq(dst, value) ; return; }
603}
604
605void MacroAssembler::incrementq(AddressLiteral dst) {
606 if (reachable(dst)) {
607 incrementq(as_Address(dst));
608 } else {
609 lea(rscratch1, dst);
610 incrementq(Address(rscratch1, 0));
611 }
612}
613
614void MacroAssembler::incrementq(Register reg, int value) {
615 if (value == min_jint) { addq(reg, value); return; }
616 if (value < 0) { decrementq(reg, -value); return; }
617 if (value == 0) { ; return; }
618 if (value == 1 && UseIncDec) { incq(reg) ; return; }
619 /* else */ { addq(reg, value) ; return; }
620}
621
622void MacroAssembler::incrementq(Address dst, int value) {
623 if (value == min_jint) { addq(dst, value); return; }
624 if (value < 0) { decrementq(dst, -value); return; }
625 if (value == 0) { ; return; }
626 if (value == 1 && UseIncDec) { incq(dst) ; return; }
627 /* else */ { addq(dst, value) ; return; }
628}
629
630// 32bit can do a case table jump in one instruction but we no longer allow the base
631// to be installed in the Address class
632void MacroAssembler::jump(ArrayAddress entry) {
633 lea(rscratch1, entry.base());
634 Address dispatch = entry.index();
635 assert(dispatch._base == noreg, "must be")do { if (!(dispatch._base == noreg)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 635, "assert(" "dispatch._base == noreg" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
636 dispatch._base = rscratch1;
637 jmp(dispatch);
638}
639
640void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) {
641 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 641); ::breakpoint(); } while (0)
; // 64bit doesn't use two regs
642 cmpq(x_lo, y_lo);
643}
644
645void MacroAssembler::lea(Register dst, AddressLiteral src) {
646 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
647}
648
649void MacroAssembler::lea(Address dst, AddressLiteral adr) {
650 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec());
651 movptr(dst, rscratch1);
652}
653
654void MacroAssembler::leave() {
655 // %%% is this really better? Why not on 32bit too?
656 emit_int8((unsigned char)0xC9); // LEAVE
657}
658
659void MacroAssembler::lneg(Register hi, Register lo) {
660 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 660); ::breakpoint(); } while (0)
; // 64bit doesn't use two regs
661 negq(lo);
662}
663
664void MacroAssembler::movoop(Register dst, jobject obj) {
665 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate());
666}
667
668void MacroAssembler::movoop(Address dst, jobject obj) {
669 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate());
670 movq(dst, rscratch1);
671}
672
673void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
674 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
675}
676
677void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
678 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate());
679 movq(dst, rscratch1);
680}
681
682void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) {
683 if (src.is_lval()) {
684 mov_literal64(dst, (intptr_t)src.target(), src.rspec());
685 } else {
686 if (reachable(src)) {
687 movq(dst, as_Address(src));
688 } else {
689 lea(scratch, src);
690 movq(dst, Address(scratch, 0));
691 }
692 }
693}
694
695void MacroAssembler::movptr(ArrayAddress dst, Register src) {
696 movq(as_Address(dst), src);
697}
698
699void MacroAssembler::movptr(Register dst, ArrayAddress src) {
700 movq(dst, as_Address(src));
701}
702
703// src should NEVER be a real pointer. Use AddressLiteral for true pointers
704void MacroAssembler::movptr(Address dst, intptr_t src) {
705 if (is_simm32(src)) {
706 movptr(dst, checked_cast<int32_t>(src));
707 } else {
708 mov64(rscratch1, src);
709 movq(dst, rscratch1);
710 }
711}
712
713// These are mostly for initializing NULL
714void MacroAssembler::movptr(Address dst, int32_t src) {
715 movslq(dst, src);
716}
717
718void MacroAssembler::movptr(Register dst, int32_t src) {
719 mov64(dst, (intptr_t)src);
720}
721
722void MacroAssembler::pushoop(jobject obj) {
723 movoop(rscratch1, obj);
724 push(rscratch1);
725}
726
727void MacroAssembler::pushklass(Metadata* obj) {
728 mov_metadata(rscratch1, obj);
729 push(rscratch1);
730}
731
732void MacroAssembler::pushptr(AddressLiteral src) {
733 lea(rscratch1, src);
734 if (src.is_lval()) {
735 push(rscratch1);
736 } else {
737 pushq(Address(rscratch1, 0));
738 }
739}
740
741void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
742 reset_last_Java_frame(r15_thread, clear_fp);
743}
744
745void MacroAssembler::set_last_Java_frame(Register last_java_sp,
746 Register last_java_fp,
747 address last_java_pc) {
748 vzeroupper();
749 // determine last_java_sp register
750 if (!last_java_sp->is_valid()) {
751 last_java_sp = rsp;
752 }
753
754 // last_java_fp is optional
755 if (last_java_fp->is_valid()) {
756 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()),
757 last_java_fp);
758 }
759
760 // last_java_pc is optional
761 if (last_java_pc != NULL__null) {
762 Address java_pc(r15_thread,
763 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset());
764 lea(rscratch1, InternalAddress(last_java_pc));
765 movptr(java_pc, rscratch1);
766 }
767
768 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
769}
770
771static void pass_arg0(MacroAssembler* masm, Register arg) {
772 if (c_rarg0 != arg ) {
773 masm->mov(c_rarg0, arg);
774 }
775}
776
777static void pass_arg1(MacroAssembler* masm, Register arg) {
778 if (c_rarg1 != arg ) {
779 masm->mov(c_rarg1, arg);
780 }
781}
782
783static void pass_arg2(MacroAssembler* masm, Register arg) {
784 if (c_rarg2 != arg ) {
785 masm->mov(c_rarg2, arg);
786 }
787}
788
789static void pass_arg3(MacroAssembler* masm, Register arg) {
790 if (c_rarg3 != arg ) {
791 masm->mov(c_rarg3, arg);
792 }
793}
794
795void MacroAssembler::stop(const char* msg) {
796 if (ShowMessageBoxOnError) {
797 address rip = pc();
798 pusha(); // get regs on stack
799 lea(c_rarg1, InternalAddress(rip));
800 movq(c_rarg2, rsp); // pass pointer to regs array
801 }
802 lea(c_rarg0, ExternalAddress((address) msg));
803 andq(rsp, -16); // align stack as required by ABI
804 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64)))));
805 hlt();
806}
807
808void MacroAssembler::warn(const char* msg) {
809 push(rbp);
810 movq(rbp, rsp);
811 andq(rsp, -16); // align stack as required by push_CPU_state and call
812 push_CPU_state(); // keeps alignment at 16 bytes
813 lea(c_rarg0, ExternalAddress((address) msg));
814 lea(rax, ExternalAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning)))));
815 call(rax);
816 pop_CPU_state();
817 mov(rsp, rbp);
818 pop(rbp);
819}
820
821void MacroAssembler::print_state() {
822 address rip = pc();
823 pusha(); // get regs on stack
824 push(rbp);
825 movq(rbp, rsp);
826 andq(rsp, -16); // align stack as required by push_CPU_state and call
827 push_CPU_state(); // keeps alignment at 16 bytes
828
829 lea(c_rarg0, InternalAddress(rip));
830 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
831 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64)((address)((address_word)(MacroAssembler::print_state64))), c_rarg0, c_rarg1);
832
833 pop_CPU_state();
834 mov(rsp, rbp);
835 pop(rbp);
836 popa();
837}
838
839#ifndef PRODUCT
840extern "C" void findpc(intptr_t x);
841#endif
842
843void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
844 // In order to get locks to work, we need to fake a in_VM state
845 if (ShowMessageBoxOnError) {
846 JavaThread* thread = JavaThread::current();
847 JavaThreadState saved_state = thread->thread_state();
848 thread->set_thread_state(_thread_in_vm);
849#ifndef PRODUCT
850 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
851 ttyLocker ttyl;
852 BytecodeCounter::print();
853 }
854#endif
855 // To see where a verify_oop failed, get $ebx+40/X for this frame.
856 // XXX correct this offset for amd64
857 // This is the value of eip which points to where verify_oop will return.
858 if (os::message_box(msg, "Execution stopped, print registers?")) {
859 print_state64(pc, regs);
860 BREAKPOINT::breakpoint();
861 }
862 }
863 fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 863, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0)
;
864}
865
866void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
867 ttyLocker ttyl;
868 FlagSetting fs(Debugging, true);
869 tty->print_cr("rip = 0x%016lx", (intptr_t)pc);
870#ifndef PRODUCT
871 tty->cr();
872 findpc(pc);
873 tty->cr();
874#endif
875#define PRINT_REG(rax, value) \
876 { tty->print("%s = ", #rax); os::print_location(tty, value); }
877 PRINT_REG(rax, regs[15]);
878 PRINT_REG(rbx, regs[12]);
879 PRINT_REG(rcx, regs[14]);
880 PRINT_REG(rdx, regs[13]);
881 PRINT_REG(rdi, regs[8]);
882 PRINT_REG(rsi, regs[9]);
883 PRINT_REG(rbp, regs[10]);
884 // rsp is actually not stored by pusha(), compute the old rsp from regs (rsp after pusha): regs + 16 = old rsp
885 PRINT_REG(rsp, (intptr_t)(&regs[16]));
886 PRINT_REG(r8 , regs[7]);
887 PRINT_REG(r9 , regs[6]);
888 PRINT_REG(r10, regs[5]);
889 PRINT_REG(r11, regs[4]);
890 PRINT_REG(r12, regs[3]);
891 PRINT_REG(r13, regs[2]);
892 PRINT_REG(r14, regs[1]);
893 PRINT_REG(r15, regs[0]);
894#undef PRINT_REG
895 // Print some words near the top of the stack.
896 int64_t* rsp = &regs[16];
897 int64_t* dump_sp = rsp;
898 for (int col1 = 0; col1 < 8; col1++) {
899 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
900 os::print_location(tty, *dump_sp++);
901 }
902 for (int row = 0; row < 25; row++) {
903 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
904 for (int col = 0; col < 4; col++) {
905 tty->print(" 0x%016lx", (intptr_t)*dump_sp++);
906 }
907 tty->cr();
908 }
909 // Print some instructions around pc:
910 Disassembler::decode((address)pc-64, (address)pc);
911 tty->print_cr("--------");
912 Disassembler::decode((address)pc, (address)pc+32);
913}
914
915// The java_calling_convention describes stack locations as ideal slots on
916// a frame with no abi restrictions. Since we must observe abi restrictions
917// (like the placement of the register window) the slots must be biased by
918// the following value.
919static int reg2offset_in(VMReg r) {
920 // Account for saved rbp and return address
921 // This should really be in_preserve_stack_slots
922 return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
923}
924
925static int reg2offset_out(VMReg r) {
926 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
927}
928
929// A long move
930void MacroAssembler::long_move(VMRegPair src, VMRegPair dst) {
931
932 // The calling conventions assures us that each VMregpair is either
933 // all really one physical register or adjacent stack slots.
934
935 if (src.is_single_phys_reg() ) {
936 if (dst.is_single_phys_reg()) {
937 if (dst.first() != src.first()) {
938 mov(dst.first()->as_Register(), src.first()->as_Register());
939 }
940 } else {
941 assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 941, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
942 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
943 }
944 } else if (dst.is_single_phys_reg()) {
945 assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 945, "assert(" "src.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
946 movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
947 } else {
948 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg()
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 948, "assert(" "src.is_single_reg() && dst.is_single_reg()"
") failed", "not stack pairs"); ::breakpoint(); } } while (0
)
;
949 movq(rax, Address(rbp, reg2offset_in(src.first())));
950 movq(Address(rsp, reg2offset_out(dst.first())), rax);
951 }
952}
953
954// A double move
955void MacroAssembler::double_move(VMRegPair src, VMRegPair dst) {
956
957 // The calling conventions assures us that each VMregpair is either
958 // all really one physical register or adjacent stack slots.
959
960 if (src.is_single_phys_reg() ) {
961 if (dst.is_single_phys_reg()) {
962 // In theory these overlap but the ordering is such that this is likely a nop
963 if ( src.first() != dst.first()) {
964 movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
965 }
966 } else {
967 assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 967, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
968 movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
969 }
970 } else if (dst.is_single_phys_reg()) {
971 assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 971, "assert(" "src.is_single_reg()" ") failed", "not a stack pair"
); ::breakpoint(); } } while (0)
;
972 movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first())));
973 } else {
974 assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg()
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 974, "assert(" "src.is_single_reg() && dst.is_single_reg()"
") failed", "not stack pairs"); ::breakpoint(); } } while (0
)
;
975 movq(rax, Address(rbp, reg2offset_in(src.first())));
976 movq(Address(rsp, reg2offset_out(dst.first())), rax);
977 }
978}
979
980
981// A float arg may have to do float reg int reg conversion
982void MacroAssembler::float_move(VMRegPair src, VMRegPair dst) {
983 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move")do { if (!(!src.second()->is_valid() && !dst.second
()->is_valid())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 983, "assert(" "!src.second()->is_valid() && !dst.second()->is_valid()"
") failed", "bad float_move"); ::breakpoint(); } } while (0)
;
984
985 // The calling conventions assures us that each VMregpair is either
986 // all really one physical register or adjacent stack slots.
987
988 if (src.first()->is_stack()) {
989 if (dst.first()->is_stack()) {
990 movl(rax, Address(rbp, reg2offset_in(src.first())));
991 movptr(Address(rsp, reg2offset_out(dst.first())), rax);
992 } else {
993 // stack to reg
994 assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(dst.first()->is_XMMRegister())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 994, "assert(" "dst.first()->is_XMMRegister()" ") failed"
, "only expect xmm registers as parameters"); ::breakpoint();
} } while (0)
;
995 movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first())));
996 }
997 } else if (dst.first()->is_stack()) {
998 // reg to stack
999 assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(src.first()->is_XMMRegister())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 999, "assert(" "src.first()->is_XMMRegister()" ") failed"
, "only expect xmm registers as parameters"); ::breakpoint();
} } while (0)
;
1000 movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1001 } else {
1002 // reg to reg
1003 // In theory these overlap but the ordering is such that this is likely a nop
1004 if ( src.first() != dst.first()) {
1005 movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
1006 }
1007 }
1008}
1009
1010// On 64 bit we will store integer like items to the stack as
1011// 64 bits items (x86_32/64 abi) even though java would only store
1012// 32bits for a parameter. On 32bit it will simply be 32 bits
1013// So this routine will do 32->32 on 32bit and 32->64 on 64bit
1014void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst) {
1015 if (src.first()->is_stack()) {
1016 if (dst.first()->is_stack()) {
1017 // stack to stack
1018 movslq(rax, Address(rbp, reg2offset_in(src.first())));
1019 movq(Address(rsp, reg2offset_out(dst.first())), rax);
1020 } else {
1021 // stack to reg
1022 movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
1023 }
1024 } else if (dst.first()->is_stack()) {
1025 // reg to stack
1026 // Do we really have to sign extend???
1027 // __ movslq(src.first()->as_Register(), src.first()->as_Register());
1028 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1029 } else {
1030 // Do we really have to sign extend???
1031 // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
1032 if (dst.first() != src.first()) {
1033 movq(dst.first()->as_Register(), src.first()->as_Register());
1034 }
1035 }
1036}
1037
1038void MacroAssembler::move_ptr(VMRegPair src, VMRegPair dst) {
1039 if (src.first()->is_stack()) {
1040 if (dst.first()->is_stack()) {
1041 // stack to stack
1042 movq(rax, Address(rbp, reg2offset_in(src.first())));
1043 movq(Address(rsp, reg2offset_out(dst.first())), rax);
1044 } else {
1045 // stack to reg
1046 movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
1047 }
1048 } else if (dst.first()->is_stack()) {
1049 // reg to stack
1050 movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1051 } else {
1052 if (dst.first() != src.first()) {
1053 movq(dst.first()->as_Register(), src.first()->as_Register());
1054 }
1055 }
1056}
1057
1058// An oop arg. Must pass a handle not the oop itself
1059void MacroAssembler::object_move(OopMap* map,
1060 int oop_handle_offset,
1061 int framesize_in_slots,
1062 VMRegPair src,
1063 VMRegPair dst,
1064 bool is_receiver,
1065 int* receiver_offset) {
1066
1067 // must pass a handle. First figure out the location we use as a handle
1068
1069 Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register();
1070
1071 // See if oop is NULL if it is we need no handle
1072
1073 if (src.first()->is_stack()) {
1074
1075 // Oop is already on the stack as an argument
1076 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1077 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1078 if (is_receiver) {
1079 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1080 }
1081
1082 cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD0L);
1083 lea(rHandle, Address(rbp, reg2offset_in(src.first())));
1084 // conditionally move a NULL
1085 cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first())));
1086 } else {
1087
1088 // Oop is in an a register we must store it to the space we reserve
1089 // on the stack for oop_handles and pass a handle if oop is non-NULL
1090
1091 const Register rOop = src.first()->as_Register();
1092 int oop_slot;
1093 if (rOop == j_rarg0)
1094 oop_slot = 0;
1095 else if (rOop == j_rarg1)
1096 oop_slot = 1;
1097 else if (rOop == j_rarg2)
1098 oop_slot = 2;
1099 else if (rOop == j_rarg3)
1100 oop_slot = 3;
1101 else if (rOop == j_rarg4)
1102 oop_slot = 4;
1103 else {
1104 assert(rOop == j_rarg5, "wrong register")do { if (!(rOop == j_rarg5)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1104, "assert(" "rOop == j_rarg5" ") failed", "wrong register"
); ::breakpoint(); } } while (0)
;
1105 oop_slot = 5;
1106 }
1107
1108 oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
1109 int offset = oop_slot*VMRegImpl::stack_slot_size;
1110
1111 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1112 // Store oop in handle area, may be NULL
1113 movptr(Address(rsp, offset), rOop);
1114 if (is_receiver) {
1115 *receiver_offset = offset;
1116 }
1117
1118 cmpptr(rOop, (int32_t)NULL_WORD0L);
1119 lea(rHandle, Address(rsp, offset));
1120 // conditionally move a NULL from the handle area where it was just stored
1121 cmovptr(Assembler::equal, rHandle, Address(rsp, offset));
1122 }
1123
1124 // If arg is on the stack then place it otherwise it is already in correct reg.
1125 if (dst.first()->is_stack()) {
1126 movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1127 }
1128}
1129
1130#endif // _LP64
1131
1132// Now versions that are common to 32/64 bit
1133
1134void MacroAssembler::addptr(Register dst, int32_t imm32) {
1135 LP64_ONLY(addq(dst, imm32))addq(dst, imm32) NOT_LP64(addl(dst, imm32));
1136}
1137
1138void MacroAssembler::addptr(Register dst, Register src) {
1139 LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src));
1140}
1141
1142void MacroAssembler::addptr(Address dst, Register src) {
1143 LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src));
1144}
1145
1146void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) {
1147 if (reachable(src)) {
1148 Assembler::addsd(dst, as_Address(src));
1149 } else {
1150 lea(rscratch1, src);
1151 Assembler::addsd(dst, Address(rscratch1, 0));
1152 }
1153}
1154
1155void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) {
1156 if (reachable(src)) {
1157 addss(dst, as_Address(src));
1158 } else {
1159 lea(rscratch1, src);
1160 addss(dst, Address(rscratch1, 0));
1161 }
1162}
1163
1164void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) {
1165 if (reachable(src)) {
1166 Assembler::addpd(dst, as_Address(src));
1167 } else {
1168 lea(rscratch1, src);
1169 Assembler::addpd(dst, Address(rscratch1, 0));
1170 }
1171}
1172
1173// See 8273459. Function for ensuring 64-byte alignment, intended for stubs only.
1174// Stub code is generated once and never copied.
1175// NMethods can't use this because they get copied and we can't force alignment > 32 bytes.
1176void MacroAssembler::align64() {
1177 align(64, (unsigned long long) pc());
1178}
1179
1180void MacroAssembler::align32() {
1181 align(32, (unsigned long long) pc());
1182}
1183
1184void MacroAssembler::align(int modulus) {
1185 // 8273459: Ensure alignment is possible with current segment alignment
1186 assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment")do { if (!(modulus <= CodeEntryAlignment)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1186, "assert(" "modulus <= CodeEntryAlignment" ") failed"
, "Alignment must be <= CodeEntryAlignment"); ::breakpoint
(); } } while (0)
;
1187 align(modulus, offset());
1188}
1189
1190void MacroAssembler::align(int modulus, int target) {
1191 if (target % modulus != 0) {
1192 nop(modulus - (target % modulus));
1193 }
1194}
1195
1196void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
1197 // Used in sign-masking with aligned address.
1198 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1198, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
1199 if (reachable(src)) {
1200 Assembler::andpd(dst, as_Address(src));
1201 } else {
1202 lea(scratch_reg, src);
1203 Assembler::andpd(dst, Address(scratch_reg, 0));
1204 }
1205}
1206
1207void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
1208 // Used in sign-masking with aligned address.
1209 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1209, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
1210 if (reachable(src)) {
1211 Assembler::andps(dst, as_Address(src));
1212 } else {
1213 lea(scratch_reg, src);
1214 Assembler::andps(dst, Address(scratch_reg, 0));
1215 }
1216}
1217
1218void MacroAssembler::andptr(Register dst, int32_t imm32) {
1219 LP64_ONLY(andq(dst, imm32))andq(dst, imm32) NOT_LP64(andl(dst, imm32));
1220}
1221
1222void MacroAssembler::atomic_incl(Address counter_addr) {
1223 lock();
1224 incrementl(counter_addr);
1225}
1226
1227void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
1228 if (reachable(counter_addr)) {
1229 atomic_incl(as_Address(counter_addr));
1230 } else {
1231 lea(scr, counter_addr);
1232 atomic_incl(Address(scr, 0));
1233 }
1234}
1235
1236#ifdef _LP641
1237void MacroAssembler::atomic_incq(Address counter_addr) {
1238 lock();
1239 incrementq(counter_addr);
1240}
1241
1242void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1243 if (reachable(counter_addr)) {
1244 atomic_incq(as_Address(counter_addr));
1245 } else {
1246 lea(scr, counter_addr);
1247 atomic_incq(Address(scr, 0));
1248 }
1249}
1250#endif
1251
1252// Writes to stack successive pages until offset reached to check for
1253// stack overflow + shadow pages. This clobbers tmp.
1254void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1255 movptr(tmp, rsp);
1256 // Bang stack for total size given plus shadow page size.
1257 // Bang one page at a time because large size can bang beyond yellow and
1258 // red zones.
1259 Label loop;
1260 bind(loop);
1261 movl(Address(tmp, (-os::vm_page_size())), size );
1262 subptr(tmp, os::vm_page_size());
1263 subl(size, os::vm_page_size());
1264 jcc(Assembler::greater, loop);
1265
1266 // Bang down shadow pages too.
1267 // At this point, (tmp-0) is the last address touched, so don't
1268 // touch it again. (It was touched as (tmp-pagesize) but then tmp
1269 // was post-decremented.) Skip this address by starting at i=1, and
1270 // touch a few more pages below. N.B. It is important to touch all
1271 // the way down including all pages in the shadow zone.
1272 for (int i = 1; i < ((int)StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); i++) {
1273 // this could be any sized move but this is can be a debugging crumb
1274 // so the bigger the better.
1275 movptr(Address(tmp, (-i*os::vm_page_size())), size );
1276 }
1277}
1278
1279void MacroAssembler::reserved_stack_check() {
1280 // testing if reserved zone needs to be enabled
1281 Label no_reserved_zone_enabling;
1282 Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread)r15_thread;
1283 NOT_LP64(get_thread(rsi);)
1284
1285 cmpptr(rsp, Address(thread, JavaThread::reserved_stack_activation_offset()));
1286 jcc(Assembler::below, no_reserved_zone_enabling);
1287
1288 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)((address)((address_word)(SharedRuntime::enable_stack_reserved_zone
)))
, thread);
1289 jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
1290 should_not_reach_here();
1291
1292 bind(no_reserved_zone_enabling);
1293}
1294
1295void MacroAssembler::c2bool(Register x) {
1296 // implements x == 0 ? 0 : 1
1297 // note: must only look at least-significant byte of x
1298 // since C-style booleans are stored in one byte
1299 // only! (was bug)
1300 andl(x, 0xFF);
1301 setb(Assembler::notZero, x);
1302}
1303
1304// Wouldn't need if AddressLiteral version had new name
1305void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
1306 Assembler::call(L, rtype);
1307}
1308
1309void MacroAssembler::call(Register entry) {
1310 Assembler::call(entry);
1311}
1312
1313void MacroAssembler::call(AddressLiteral entry) {
1314 if (reachable(entry)) {
1315 Assembler::call_literal(entry.target(), entry.rspec());
1316 } else {
1317 lea(rscratch1, entry);
1318 Assembler::call(rscratch1);
1319 }
1320}
1321
1322void MacroAssembler::ic_call(address entry, jint method_index) {
1323 RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
1324 movptr(rax, (intptr_t)Universe::non_oop_word());
1325 call(AddressLiteral(entry, rh));
1326}
1327
1328// Implementation of call_VM versions
1329
1330void MacroAssembler::call_VM(Register oop_result,
1331 address entry_point,
1332 bool check_exceptions) {
1333 Label C, E;
1334 call(C, relocInfo::none);
1335 jmp(E);
1336
1337 bind(C);
1338 call_VM_helper(oop_result, entry_point, 0, check_exceptions);
1339 ret(0);
1340
1341 bind(E);
1342}
1343
1344void MacroAssembler::call_VM(Register oop_result,
1345 address entry_point,
1346 Register arg_1,
1347 bool check_exceptions) {
1348 Label C, E;
1349 call(C, relocInfo::none);
1350 jmp(E);
1351
1352 bind(C);
1353 pass_arg1(this, arg_1);
1354 call_VM_helper(oop_result, entry_point, 1, check_exceptions);
1355 ret(0);
1356
1357 bind(E);
1358}
1359
1360void MacroAssembler::call_VM(Register oop_result,
1361 address entry_point,
1362 Register arg_1,
1363 Register arg_2,
1364 bool check_exceptions) {
1365 Label C, E;
1366 call(C, relocInfo::none);
1367 jmp(E);
1368
1369 bind(C);
1370
1371 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1371, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1372
1373 pass_arg2(this, arg_2);
1374 pass_arg1(this, arg_1);
1375 call_VM_helper(oop_result, entry_point, 2, check_exceptions);
1376 ret(0);
1377
1378 bind(E);
1379}
1380
1381void MacroAssembler::call_VM(Register oop_result,
1382 address entry_point,
1383 Register arg_1,
1384 Register arg_2,
1385 Register arg_3,
1386 bool check_exceptions) {
1387 Label C, E;
1388 call(C, relocInfo::none);
1389 jmp(E);
1390
1391 bind(C);
1392
1393 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1393, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1394 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1394, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1395 pass_arg3(this, arg_3);
1396
1397 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1397, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1398 pass_arg2(this, arg_2);
1399
1400 pass_arg1(this, arg_1);
1401 call_VM_helper(oop_result, entry_point, 3, check_exceptions);
1402 ret(0);
1403
1404 bind(E);
1405}
1406
1407void MacroAssembler::call_VM(Register oop_result,
1408 Register last_java_sp,
1409 address entry_point,
1410 int number_of_arguments,
1411 bool check_exceptions) {
1412 Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg);
1413 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1414}
1415
1416void MacroAssembler::call_VM(Register oop_result,
1417 Register last_java_sp,
1418 address entry_point,
1419 Register arg_1,
1420 bool check_exceptions) {
1421 pass_arg1(this, arg_1);
1422 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1423}
1424
1425void MacroAssembler::call_VM(Register oop_result,
1426 Register last_java_sp,
1427 address entry_point,
1428 Register arg_1,
1429 Register arg_2,
1430 bool check_exceptions) {
1431
1432 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1432, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1433 pass_arg2(this, arg_2);
1434 pass_arg1(this, arg_1);
1435 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1436}
1437
1438void MacroAssembler::call_VM(Register oop_result,
1439 Register last_java_sp,
1440 address entry_point,
1441 Register arg_1,
1442 Register arg_2,
1443 Register arg_3,
1444 bool check_exceptions) {
1445 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1445, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1446 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1446, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1447 pass_arg3(this, arg_3);
1448 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1448, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1449 pass_arg2(this, arg_2);
1450 pass_arg1(this, arg_1);
1451 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1452}
1453
1454void MacroAssembler::super_call_VM(Register oop_result,
1455 Register last_java_sp,
1456 address entry_point,
1457 int number_of_arguments,
1458 bool check_exceptions) {
1459 Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg);
1460 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
1461}
1462
1463void MacroAssembler::super_call_VM(Register oop_result,
1464 Register last_java_sp,
1465 address entry_point,
1466 Register arg_1,
1467 bool check_exceptions) {
1468 pass_arg1(this, arg_1);
1469 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
1470}
1471
1472void MacroAssembler::super_call_VM(Register oop_result,
1473 Register last_java_sp,
1474 address entry_point,
1475 Register arg_1,
1476 Register arg_2,
1477 bool check_exceptions) {
1478
1479 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1479, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1480 pass_arg2(this, arg_2);
1481 pass_arg1(this, arg_1);
1482 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
1483}
1484
1485void MacroAssembler::super_call_VM(Register oop_result,
1486 Register last_java_sp,
1487 address entry_point,
1488 Register arg_1,
1489 Register arg_2,
1490 Register arg_3,
1491 bool check_exceptions) {
1492 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1492, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1493 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1493, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1494 pass_arg3(this, arg_3);
1495 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1495, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1496 pass_arg2(this, arg_2);
1497 pass_arg1(this, arg_1);
1498 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
1499}
1500
1501void MacroAssembler::call_VM_base(Register oop_result,
1502 Register java_thread,
1503 Register last_java_sp,
1504 address entry_point,
1505 int number_of_arguments,
1506 bool check_exceptions) {
1507 // determine java_thread register
1508 if (!java_thread->is_valid()) {
1509#ifdef _LP641
1510 java_thread = r15_thread;
1511#else
1512 java_thread = rdi;
1513 get_thread(java_thread);
1514#endif // LP64
1515 }
1516 // determine last_java_sp register
1517 if (!last_java_sp->is_valid()) {
1518 last_java_sp = rsp;
1519 }
1520 // debugging support
1521 assert(number_of_arguments >= 0 , "cannot have negative number of arguments")do { if (!(number_of_arguments >= 0)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1521, "assert(" "number_of_arguments >= 0" ") failed", "cannot have negative number of arguments"
); ::breakpoint(); } } while (0)
;
1522 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"))do { if (!(java_thread == r15_thread)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1522, "assert(" "java_thread == r15_thread" ") failed", "unexpected register"
); ::breakpoint(); } } while (0)
;
1523#ifdef ASSERT1
1524 // TraceBytecodes does not use r12 but saves it over the call, so don't verify
1525 // r12 is the heapbase.
1526 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)if (UseCompressedOops && !TraceBytecodes) verify_heapbase
("call_VM_base: heap base corrupted?");
1527#endif // ASSERT
1528
1529 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result")do { if (!(java_thread != oop_result)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1529, "assert(" "java_thread != oop_result" ") failed", "cannot use the same register for java_thread & oop_result"
); ::breakpoint(); } } while (0)
;
1530 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp")do { if (!(java_thread != last_java_sp)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1530, "assert(" "java_thread != last_java_sp" ") failed", "cannot use the same register for java_thread & last_java_sp"
); ::breakpoint(); } } while (0)
;
1531
1532 // push java thread (becomes first argument of C function)
1533
1534 NOT_LP64(push(java_thread); number_of_arguments++);
1535 LP64_ONLY(mov(c_rarg0, r15_thread))mov(c_rarg0, r15_thread);
1536
1537 // set last Java frame before call
1538 assert(last_java_sp != rbp, "can't use ebp/rbp")do { if (!(last_java_sp != rbp)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1538, "assert(" "last_java_sp != rbp" ") failed", "can't use ebp/rbp"
); ::breakpoint(); } } while (0)
;
1539
1540 // Only interpreter should have to set fp
1541 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL__null);
1542
1543 // do the call, remove parameters
1544 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
1545
1546 // restore the thread (cannot use the pushed argument since arguments
1547 // may be overwritten by C code generated by an optimizing compiler);
1548 // however can use the register value directly if it is callee saved.
1549 if (LP64_ONLY(true ||)true || java_thread == rdi || java_thread == rsi) {
1550 // rdi & rsi (also r15) are callee saved -> nothing to do
1551#ifdef ASSERT1
1552 guarantee(java_thread != rax, "change this code")do { if (!(java_thread != rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1552, "guarantee(" "java_thread != rax" ") failed", "change this code"
); ::breakpoint(); } } while (0)
;
1553 push(rax);
1554 { Label L;
1555 get_thread(rax);
1556 cmpptr(java_thread, rax);
1557 jcc(Assembler::equal, L);
1558 STOP("MacroAssembler::call_VM_base: rdi not callee saved?")block_comment("MacroAssembler::call_VM_base: rdi not callee saved?"
); stop("MacroAssembler::call_VM_base: rdi not callee saved?"
)
;
1559 bind(L);
1560 }
1561 pop(rax);
1562#endif
1563 } else {
1564 get_thread(java_thread);
1565 }
1566 // reset last Java frame
1567 // Only interpreter should have to clear fp
1568 reset_last_Java_frame(java_thread, true);
1569
1570 // C++ interp handles this in the interpreter
1571 check_and_handle_popframe(java_thread);
1572 check_and_handle_earlyret(java_thread);
1573
1574 if (check_exceptions) {
1575 // check for pending exceptions (java_thread is set upon return)
1576 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD0L);
1577#ifndef _LP641
1578 jump_cc(Assembler::notEqual,
1579 RuntimeAddress(StubRoutines::forward_exception_entry()));
1580#else
1581 // This used to conditionally jump to forward_exception however it is
1582 // possible if we relocate that the branch will not reach. So we must jump
1583 // around so we can always reach
1584
1585 Label ok;
1586 jcc(Assembler::equal, ok);
1587 jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1588 bind(ok);
1589#endif // LP64
1590 }
1591
1592 // get oop result if there is one and reset the value in the thread
1593 if (oop_result->is_valid()) {
1594 get_vm_result(oop_result, java_thread);
1595 }
1596}
1597
1598void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
1599
1600 // Calculate the value for last_Java_sp
1601 // somewhat subtle. call_VM does an intermediate call
1602 // which places a return address on the stack just under the
1603 // stack pointer as the user finsihed with it. This allows
1604 // use to retrieve last_Java_pc from last_Java_sp[-1].
1605 // On 32bit we then have to push additional args on the stack to accomplish
1606 // the actual requested call. On 64bit call_VM only can use register args
1607 // so the only extra space is the return address that call_VM created.
1608 // This hopefully explains the calculations here.
1609
1610#ifdef _LP641
1611 // We've pushed one address, correct last_Java_sp
1612 lea(rax, Address(rsp, wordSize));
1613#else
1614 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize));
1615#endif // LP64
1616
1617 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions);
1618
1619}
1620
1621// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
1622void MacroAssembler::call_VM_leaf0(address entry_point) {
1623 MacroAssembler::call_VM_leaf_base(entry_point, 0);
1624}
1625
1626void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
1627 call_VM_leaf_base(entry_point, number_of_arguments);
1628}
1629
1630void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
1631 pass_arg0(this, arg_0);
1632 call_VM_leaf(entry_point, 1);
1633}
1634
1635void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1636
1637 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1637, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1638 pass_arg1(this, arg_1);
1639 pass_arg0(this, arg_0);
1640 call_VM_leaf(entry_point, 2);
1641}
1642
1643void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1644 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1644, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1645 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1645, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1646 pass_arg2(this, arg_2);
1647 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1647, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1648 pass_arg1(this, arg_1);
1649 pass_arg0(this, arg_0);
1650 call_VM_leaf(entry_point, 3);
1651}
1652
1653void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
1654 pass_arg0(this, arg_0);
1655 MacroAssembler::call_VM_leaf_base(entry_point, 1);
1656}
1657
1658void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
1659
1660 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1660, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1661 pass_arg1(this, arg_1);
1662 pass_arg0(this, arg_0);
1663 MacroAssembler::call_VM_leaf_base(entry_point, 2);
1664}
1665
1666void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
1667 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1667, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1668 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1668, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1669 pass_arg2(this, arg_2);
1670 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1670, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1671 pass_arg1(this, arg_1);
1672 pass_arg0(this, arg_0);
1673 MacroAssembler::call_VM_leaf_base(entry_point, 3);
1674}
1675
1676void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
1677 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"))do { if (!(arg_0 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1677, "assert(" "arg_0 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1678 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1678, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1679 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1679, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1680 pass_arg3(this, arg_3);
1681 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1681, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1682 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1682, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1683 pass_arg2(this, arg_2);
1684 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1684, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg"
); ::breakpoint(); } } while (0)
;
1685 pass_arg1(this, arg_1);
1686 pass_arg0(this, arg_0);
1687 MacroAssembler::call_VM_leaf_base(entry_point, 4);
1688}
1689
1690void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
1691 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
1692 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD0L);
1693 verify_oop_msg(oop_result, "broken oop in call_VM_base")_verify_oop_checked(oop_result, "broken oop " "oop_result" ", "
"\"broken oop in call_VM_base\"", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1693)
;
1694}
1695
1696void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
1697 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
1698 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD0L);
1699}
1700
1701void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
1702}
1703
1704void MacroAssembler::check_and_handle_popframe(Register java_thread) {
1705}
1706
1707void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) {
1708 if (reachable(src1)) {
1709 cmpl(as_Address(src1), imm);
1710 } else {
1711 lea(rscratch1, src1);
1712 cmpl(Address(rscratch1, 0), imm);
1713 }
1714}
1715
1716void MacroAssembler::cmp32(Register src1, AddressLiteral src2) {
1717 assert(!src2.is_lval(), "use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1717, "assert(" "!src2.is_lval()" ") failed", "use cmpptr")
; ::breakpoint(); } } while (0)
;
1718 if (reachable(src2)) {
1719 cmpl(src1, as_Address(src2));
1720 } else {
1721 lea(rscratch1, src2);
1722 cmpl(src1, Address(rscratch1, 0));
1723 }
1724}
1725
1726void MacroAssembler::cmp32(Register src1, int32_t imm) {
1727 Assembler::cmpl(src1, imm);
1728}
1729
1730void MacroAssembler::cmp32(Register src1, Address src2) {
1731 Assembler::cmpl(src1, src2);
1732}
1733
1734void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
1735 ucomisd(opr1, opr2);
1736
1737 Label L;
1738 if (unordered_is_less) {
1739 movl(dst, -1);
1740 jcc(Assembler::parity, L);
1741 jcc(Assembler::below , L);
1742 movl(dst, 0);
1743 jcc(Assembler::equal , L);
1744 increment(dst);
1745 } else { // unordered is greater
1746 movl(dst, 1);
1747 jcc(Assembler::parity, L);
1748 jcc(Assembler::above , L);
1749 movl(dst, 0);
1750 jcc(Assembler::equal , L);
1751 decrementl(dst);
1752 }
1753 bind(L);
1754}
1755
1756void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) {
1757 ucomiss(opr1, opr2);
1758
1759 Label L;
1760 if (unordered_is_less) {
1761 movl(dst, -1);
1762 jcc(Assembler::parity, L);
1763 jcc(Assembler::below , L);
1764 movl(dst, 0);
1765 jcc(Assembler::equal , L);
1766 increment(dst);
1767 } else { // unordered is greater
1768 movl(dst, 1);
1769 jcc(Assembler::parity, L);
1770 jcc(Assembler::above , L);
1771 movl(dst, 0);
1772 jcc(Assembler::equal , L);
1773 decrementl(dst);
1774 }
1775 bind(L);
1776}
1777
1778
1779void MacroAssembler::cmp8(AddressLiteral src1, int imm) {
1780 if (reachable(src1)) {
1781 cmpb(as_Address(src1), imm);
1782 } else {
1783 lea(rscratch1, src1);
1784 cmpb(Address(rscratch1, 0), imm);
1785 }
1786}
1787
1788void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) {
1789#ifdef _LP641
1790 if (src2.is_lval()) {
1791 movptr(rscratch1, src2);
1792 Assembler::cmpq(src1, rscratch1);
1793 } else if (reachable(src2)) {
1794 cmpq(src1, as_Address(src2));
1795 } else {
1796 lea(rscratch1, src2);
1797 Assembler::cmpq(src1, Address(rscratch1, 0));
1798 }
1799#else
1800 if (src2.is_lval()) {
1801 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
1802 } else {
1803 cmpl(src1, as_Address(src2));
1804 }
1805#endif // _LP64
1806}
1807
1808void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
1809 assert(src2.is_lval(), "not a mem-mem compare")do { if (!(src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1809, "assert(" "src2.is_lval()" ") failed", "not a mem-mem compare"
); ::breakpoint(); } } while (0)
;
1810#ifdef _LP641
1811 // moves src2's literal address
1812 movptr(rscratch1, src2);
1813 Assembler::cmpq(src1, rscratch1);
1814#else
1815 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec());
1816#endif // _LP64
1817}
1818
1819void MacroAssembler::cmpoop(Register src1, Register src2) {
1820 cmpptr(src1, src2);
1821}
1822
1823void MacroAssembler::cmpoop(Register src1, Address src2) {
1824 cmpptr(src1, src2);
1825}
1826
1827#ifdef _LP641
1828void MacroAssembler::cmpoop(Register src1, jobject src2) {
1829 movoop(rscratch1, src2);
1830 cmpptr(src1, rscratch1);
1831}
1832#endif
1833
1834void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
1835 if (reachable(adr)) {
1836 lock();
1837 cmpxchgptr(reg, as_Address(adr));
1838 } else {
1839 lea(rscratch1, adr);
1840 lock();
1841 cmpxchgptr(reg, Address(rscratch1, 0));
1842 }
1843}
1844
1845void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
1846 LP64_ONLY(cmpxchgq(reg, adr))cmpxchgq(reg, adr) NOT_LP64(cmpxchgl(reg, adr));
1847}
1848
1849void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
1850 if (reachable(src)) {
1851 Assembler::comisd(dst, as_Address(src));
1852 } else {
1853 lea(rscratch1, src);
1854 Assembler::comisd(dst, Address(rscratch1, 0));
1855 }
1856}
1857
1858void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
1859 if (reachable(src)) {
1860 Assembler::comiss(dst, as_Address(src));
1861 } else {
1862 lea(rscratch1, src);
1863 Assembler::comiss(dst, Address(rscratch1, 0));
1864 }
1865}
1866
1867
1868void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) {
1869 Condition negated_cond = negate_condition(cond);
1870 Label L;
1871 jcc(negated_cond, L);
1872 pushf(); // Preserve flags
1873 atomic_incl(counter_addr);
1874 popf();
1875 bind(L);
1876}
1877
1878int MacroAssembler::corrected_idivl(Register reg) {
1879 // Full implementation of Java idiv and irem; checks for
1880 // special case as described in JVM spec., p.243 & p.271.
1881 // The function returns the (pc) offset of the idivl
1882 // instruction - may be needed for implicit exceptions.
1883 //
1884 // normal case special case
1885 //
1886 // input : rax,: dividend min_int
1887 // reg: divisor (may not be rax,/rdx) -1
1888 //
1889 // output: rax,: quotient (= rax, idiv reg) min_int
1890 // rdx: remainder (= rax, irem reg) 0
1891 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1891, "assert(" "reg != rax && reg != rdx" ") failed"
, "reg cannot be rax, or rdx register"); ::breakpoint(); } } while
(0)
;
1892 const int min_int = 0x80000000;
1893 Label normal_case, special_case;
1894
1895 // check for special case
1896 cmpl(rax, min_int);
1897 jcc(Assembler::notEqual, normal_case);
1898 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0)
1899 cmpl(reg, -1);
1900 jcc(Assembler::equal, special_case);
1901
1902 // handle normal case
1903 bind(normal_case);
1904 cdql();
1905 int idivl_offset = offset();
1906 idivl(reg);
1907
1908 // normal and special case exit
1909 bind(special_case);
1910
1911 return idivl_offset;
1912}
1913
1914
1915
1916void MacroAssembler::decrementl(Register reg, int value) {
1917 if (value == min_jint) {subl(reg, value) ; return; }
1918 if (value < 0) { incrementl(reg, -value); return; }
1919 if (value == 0) { ; return; }
1920 if (value == 1 && UseIncDec) { decl(reg) ; return; }
1921 /* else */ { subl(reg, value) ; return; }
1922}
1923
1924void MacroAssembler::decrementl(Address dst, int value) {
1925 if (value == min_jint) {subl(dst, value) ; return; }
1926 if (value < 0) { incrementl(dst, -value); return; }
1927 if (value == 0) { ; return; }
1928 if (value == 1 && UseIncDec) { decl(dst) ; return; }
1929 /* else */ { subl(dst, value) ; return; }
1930}
1931
1932void MacroAssembler::division_with_shift (Register reg, int shift_value) {
1933 assert (shift_value > 0, "illegal shift value")do { if (!(shift_value > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1933, "assert(" "shift_value > 0" ") failed", "illegal shift value"
); ::breakpoint(); } } while (0)
;
1934 Label _is_positive;
1935 testl (reg, reg);
1936 jcc (Assembler::positive, _is_positive);
1937 int offset = (1 << shift_value) - 1 ;
1938
1939 if (offset == 1) {
1940 incrementl(reg);
1941 } else {
1942 addl(reg, offset);
1943 }
1944
1945 bind (_is_positive);
1946 sarl(reg, shift_value);
1947}
1948
1949void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) {
1950 if (reachable(src)) {
1951 Assembler::divsd(dst, as_Address(src));
1952 } else {
1953 lea(rscratch1, src);
1954 Assembler::divsd(dst, Address(rscratch1, 0));
1955 }
1956}
1957
1958void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
1959 if (reachable(src)) {
1960 Assembler::divss(dst, as_Address(src));
1961 } else {
1962 lea(rscratch1, src);
1963 Assembler::divss(dst, Address(rscratch1, 0));
1964 }
1965}
1966
1967void MacroAssembler::enter() {
1968 push(rbp);
1969 mov(rbp, rsp);
1970}
1971
1972// A 5 byte nop that is safe for patching (see patch_verified_entry)
1973void MacroAssembler::fat_nop() {
1974 if (UseAddressNop) {
1975 addr_nop_5();
1976 } else {
1977 emit_int8(0x26); // es:
1978 emit_int8(0x2e); // cs:
1979 emit_int8(0x64); // fs:
1980 emit_int8(0x65); // gs:
1981 emit_int8((unsigned char)0x90);
1982 }
1983}
1984
1985#ifndef _LP641
1986void MacroAssembler::fcmp(Register tmp) {
1987 fcmp(tmp, 1, true, true);
1988}
1989
1990void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
1991 assert(!pop_right || pop_left, "usage error")do { if (!(!pop_right || pop_left)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1991, "assert(" "!pop_right || pop_left" ") failed", "usage error"
); ::breakpoint(); } } while (0)
;
1992 if (VM_Version::supports_cmov()) {
1993 assert(tmp == noreg, "unneeded temp")do { if (!(tmp == noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 1993, "assert(" "tmp == noreg" ") failed", "unneeded temp")
; ::breakpoint(); } } while (0)
;
1994 if (pop_left) {
1995 fucomip(index);
1996 } else {
1997 fucomi(index);
1998 }
1999 if (pop_right) {
2000 fpop();
2001 }
2002 } else {
2003 assert(tmp != noreg, "need temp")do { if (!(tmp != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2003, "assert(" "tmp != noreg" ") failed", "need temp"); ::
breakpoint(); } } while (0)
;
2004 if (pop_left) {
2005 if (pop_right) {
2006 fcompp();
2007 } else {
2008 fcomp(index);
2009 }
2010 } else {
2011 fcom(index);
2012 }
2013 // convert FPU condition into eflags condition via rax,
2014 save_rax(tmp);
2015 fwait(); fnstsw_ax();
2016 sahf();
2017 restore_rax(tmp);
2018 }
2019 // condition codes set as follows:
2020 //
2021 // CF (corresponds to C0) if x < y
2022 // PF (corresponds to C2) if unordered
2023 // ZF (corresponds to C3) if x = y
2024}
2025
2026void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) {
2027 fcmp2int(dst, unordered_is_less, 1, true, true);
2028}
2029
2030void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) {
2031 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right);
2032 Label L;
2033 if (unordered_is_less) {
2034 movl(dst, -1);
2035 jcc(Assembler::parity, L);
2036 jcc(Assembler::below , L);
2037 movl(dst, 0);
2038 jcc(Assembler::equal , L);
2039 increment(dst);
2040 } else { // unordered is greater
2041 movl(dst, 1);
2042 jcc(Assembler::parity, L);
2043 jcc(Assembler::above , L);
2044 movl(dst, 0);
2045 jcc(Assembler::equal , L);
2046 decrementl(dst);
2047 }
2048 bind(L);
2049}
2050
2051void MacroAssembler::fld_d(AddressLiteral src) {
2052 fld_d(as_Address(src));
2053}
2054
2055void MacroAssembler::fld_s(AddressLiteral src) {
2056 fld_s(as_Address(src));
2057}
2058
2059void MacroAssembler::fldcw(AddressLiteral src) {
2060 Assembler::fldcw(as_Address(src));
2061}
2062
2063void MacroAssembler::fpop() {
2064 ffree();
2065 fincstp();
2066}
2067
2068void MacroAssembler::fremr(Register tmp) {
2069 save_rax(tmp);
2070 { Label L;
2071 bind(L);
2072 fprem();
2073 fwait(); fnstsw_ax();
2074 sahf();
2075 jcc(Assembler::parity, L);
2076 }
2077 restore_rax(tmp);
2078 // Result is in ST0.
2079 // Note: fxch & fpop to get rid of ST1
2080 // (otherwise FPU stack could overflow eventually)
2081 fxch(1);
2082 fpop();
2083}
2084
2085void MacroAssembler::empty_FPU_stack() {
2086 if (VM_Version::supports_mmx()) {
2087 emms();
2088 } else {
2089 for (int i = 8; i-- > 0; ) ffree(i);
2090 }
2091}
2092#endif // !LP64
2093
2094void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
2095 if (reachable(src)) {
2096 Assembler::mulpd(dst, as_Address(src));
2097 } else {
2098 lea(rscratch1, src);
2099 Assembler::mulpd(dst, Address(rscratch1, 0));
2100 }
2101}
2102
2103void MacroAssembler::load_float(Address src) {
2104#ifdef _LP641
2105 movflt(xmm0, src);
2106#else
2107 if (UseSSE >= 1) {
2108 movflt(xmm0, src);
2109 } else {
2110 fld_s(src);
2111 }
2112#endif // LP64
2113}
2114
2115void MacroAssembler::store_float(Address dst) {
2116#ifdef _LP641
2117 movflt(dst, xmm0);
2118#else
2119 if (UseSSE >= 1) {
2120 movflt(dst, xmm0);
2121 } else {
2122 fstp_s(dst);
2123 }
2124#endif // LP64
2125}
2126
2127void MacroAssembler::load_double(Address src) {
2128#ifdef _LP641
2129 movdbl(xmm0, src);
2130#else
2131 if (UseSSE >= 2) {
2132 movdbl(xmm0, src);
2133 } else {
2134 fld_d(src);
2135 }
2136#endif // LP64
2137}
2138
2139void MacroAssembler::store_double(Address dst) {
2140#ifdef _LP641
2141 movdbl(dst, xmm0);
2142#else
2143 if (UseSSE >= 2) {
2144 movdbl(dst, xmm0);
2145 } else {
2146 fstp_d(dst);
2147 }
2148#endif // LP64
2149}
2150
2151// dst = c = a * b + c
2152void MacroAssembler::fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) {
2153 Assembler::vfmadd231sd(c, a, b);
2154 if (dst != c) {
2155 movdbl(dst, c);
2156 }
2157}
2158
2159// dst = c = a * b + c
2160void MacroAssembler::fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) {
2161 Assembler::vfmadd231ss(c, a, b);
2162 if (dst != c) {
2163 movflt(dst, c);
2164 }
2165}
2166
2167// dst = c = a * b + c
2168void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) {
2169 Assembler::vfmadd231pd(c, a, b, vector_len);
2170 if (dst != c) {
2171 vmovdqu(dst, c);
2172 }
2173}
2174
2175// dst = c = a * b + c
2176void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) {
2177 Assembler::vfmadd231ps(c, a, b, vector_len);
2178 if (dst != c) {
2179 vmovdqu(dst, c);
2180 }
2181}
2182
2183// dst = c = a * b + c
2184void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) {
2185 Assembler::vfmadd231pd(c, a, b, vector_len);
2186 if (dst != c) {
2187 vmovdqu(dst, c);
2188 }
2189}
2190
2191// dst = c = a * b + c
2192void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) {
2193 Assembler::vfmadd231ps(c, a, b, vector_len);
2194 if (dst != c) {
2195 vmovdqu(dst, c);
2196 }
2197}
2198
2199void MacroAssembler::incrementl(AddressLiteral dst) {
2200 if (reachable(dst)) {
2201 incrementl(as_Address(dst));
2202 } else {
2203 lea(rscratch1, dst);
2204 incrementl(Address(rscratch1, 0));
2205 }
2206}
2207
2208void MacroAssembler::incrementl(ArrayAddress dst) {
2209 incrementl(as_Address(dst));
2210}
2211
2212void MacroAssembler::incrementl(Register reg, int value) {
2213 if (value == min_jint) {addl(reg, value) ; return; }
2214 if (value < 0) { decrementl(reg, -value); return; }
2215 if (value == 0) { ; return; }
2216 if (value == 1 && UseIncDec) { incl(reg) ; return; }
2217 /* else */ { addl(reg, value) ; return; }
2218}
2219
2220void MacroAssembler::incrementl(Address dst, int value) {
2221 if (value == min_jint) {addl(dst, value) ; return; }
2222 if (value < 0) { decrementl(dst, -value); return; }
2223 if (value == 0) { ; return; }
2224 if (value == 1 && UseIncDec) { incl(dst) ; return; }
2225 /* else */ { addl(dst, value) ; return; }
2226}
2227
2228void MacroAssembler::jump(AddressLiteral dst) {
2229 if (reachable(dst)) {
2230 jmp_literal(dst.target(), dst.rspec());
2231 } else {
2232 lea(rscratch1, dst);
2233 jmp(rscratch1);
2234 }
2235}
2236
2237void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) {
2238 if (reachable(dst)) {
2239 InstructionMark im(this);
2240 relocate(dst.reloc());
2241 const int short_size = 2;
2242 const int long_size = 6;
2243 int offs = (intptr_t)dst.target() - ((intptr_t)pc());
2244 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) {
2245 // 0111 tttn #8-bit disp
2246 emit_int8(0x70 | cc);
2247 emit_int8((offs - short_size) & 0xFF);
2248 } else {
2249 // 0000 1111 1000 tttn #32-bit disp
2250 emit_int8(0x0F);
2251 emit_int8((unsigned char)(0x80 | cc));
2252 emit_int32(offs - long_size);
2253 }
2254 } else {
2255#ifdef ASSERT1
2256 warning("reversing conditional branch");
2257#endif /* ASSERT */
2258 Label skip;
2259 jccb(reverse[cc], skip)jccb_0(reverse[cc], skip, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2259)
;
2260 lea(rscratch1, dst);
2261 Assembler::jmp(rscratch1);
2262 bind(skip);
2263 }
2264}
2265
2266void MacroAssembler::fld_x(AddressLiteral src) {
2267 Assembler::fld_x(as_Address(src));
2268}
2269
2270void MacroAssembler::ldmxcsr(AddressLiteral src) {
2271 if (reachable(src)) {
2272 Assembler::ldmxcsr(as_Address(src));
2273 } else {
2274 lea(rscratch1, src);
2275 Assembler::ldmxcsr(Address(rscratch1, 0));
2276 }
2277}
2278
2279int MacroAssembler::load_signed_byte(Register dst, Address src) {
2280 int off;
2281 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2282 off = offset();
2283 movsbl(dst, src); // movsxb
2284 } else {
2285 off = load_unsigned_byte(dst, src);
2286 shll(dst, 24);
2287 sarl(dst, 24);
2288 }
2289 return off;
2290}
2291
2292// Note: load_signed_short used to be called load_signed_word.
2293// Although the 'w' in x86 opcodes refers to the term "word" in the assembler
2294// manual, which means 16 bits, that usage is found nowhere in HotSpot code.
2295// The term "word" in HotSpot means a 32- or 64-bit machine word.
2296int MacroAssembler::load_signed_short(Register dst, Address src) {
2297 int off;
2298 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2299 // This is dubious to me since it seems safe to do a signed 16 => 64 bit
2300 // version but this is what 64bit has always done. This seems to imply
2301 // that users are only using 32bits worth.
2302 off = offset();
2303 movswl(dst, src); // movsxw
2304 } else {
2305 off = load_unsigned_short(dst, src);
2306 shll(dst, 16);
2307 sarl(dst, 16);
2308 }
2309 return off;
2310}
2311
2312int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
2313 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
2314 // and "3.9 Partial Register Penalties", p. 22).
2315 int off;
2316 if (LP64_ONLY(true || )true || VM_Version::is_P6() || src.uses(dst)) {
2317 off = offset();
2318 movzbl(dst, src); // movzxb
2319 } else {
2320 xorl(dst, dst);
2321 off = offset();
2322 movb(dst, src);
2323 }
2324 return off;
2325}
2326
2327// Note: load_unsigned_short used to be called load_unsigned_word.
2328int MacroAssembler::load_unsigned_short(Register dst, Address src) {
2329 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16,
2330 // and "3.9 Partial Register Penalties", p. 22).
2331 int off;
2332 if (LP64_ONLY(true ||)true || VM_Version::is_P6() || src.uses(dst)) {
2333 off = offset();
2334 movzwl(dst, src); // movzxw
2335 } else {
2336 xorl(dst, dst);
2337 off = offset();
2338 movw(dst, src);
2339 }
2340 return off;
2341}
2342
2343void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
2344 switch (size_in_bytes) {
2345#ifndef _LP641
2346 case 8:
2347 assert(dst2 != noreg, "second dest register required")do { if (!(dst2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2347, "assert(" "dst2 != noreg" ") failed", "second dest register required"
); ::breakpoint(); } } while (0)
;
2348 movl(dst, src);
2349 movl(dst2, src.plus_disp(BytesPerInt));
2350 break;
2351#else
2352 case 8: movq(dst, src); break;
2353#endif
2354 case 4: movl(dst, src); break;
2355 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
2356 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
2357 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2357); ::breakpoint(); } while (0)
;
2358 }
2359}
2360
2361void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
2362 switch (size_in_bytes) {
2363#ifndef _LP641
2364 case 8:
2365 assert(src2 != noreg, "second source register required")do { if (!(src2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2365, "assert(" "src2 != noreg" ") failed", "second source register required"
); ::breakpoint(); } } while (0)
;
2366 movl(dst, src);
2367 movl(dst.plus_disp(BytesPerInt), src2);
2368 break;
2369#else
2370 case 8: movq(dst, src); break;
2371#endif
2372 case 4: movl(dst, src); break;
2373 case 2: movw(dst, src); break;
2374 case 1: movb(dst, src); break;
2375 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2375); ::breakpoint(); } while (0)
;
2376 }
2377}
2378
2379void MacroAssembler::mov32(AddressLiteral dst, Register src) {
2380 if (reachable(dst)) {
2381 movl(as_Address(dst), src);
2382 } else {
2383 lea(rscratch1, dst);
2384 movl(Address(rscratch1, 0), src);
2385 }
2386}
2387
2388void MacroAssembler::mov32(Register dst, AddressLiteral src) {
2389 if (reachable(src)) {
2390 movl(dst, as_Address(src));
2391 } else {
2392 lea(rscratch1, src);
2393 movl(dst, Address(rscratch1, 0));
2394 }
2395}
2396
2397// C++ bool manipulation
2398
2399void MacroAssembler::movbool(Register dst, Address src) {
2400 if(sizeof(bool) == 1)
2401 movb(dst, src);
2402 else if(sizeof(bool) == 2)
2403 movw(dst, src);
2404 else if(sizeof(bool) == 4)
2405 movl(dst, src);
2406 else
2407 // unsupported
2408 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2408); ::breakpoint(); } while (0)
;
2409}
2410
2411void MacroAssembler::movbool(Address dst, bool boolconst) {
2412 if(sizeof(bool) == 1)
2413 movb(dst, (int) boolconst);
2414 else if(sizeof(bool) == 2)
2415 movw(dst, (int) boolconst);
2416 else if(sizeof(bool) == 4)
2417 movl(dst, (int) boolconst);
2418 else
2419 // unsupported
2420 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2420); ::breakpoint(); } while (0)
;
2421}
2422
2423void MacroAssembler::movbool(Address dst, Register src) {
2424 if(sizeof(bool) == 1)
2425 movb(dst, src);
2426 else if(sizeof(bool) == 2)
2427 movw(dst, src);
2428 else if(sizeof(bool) == 4)
2429 movl(dst, src);
2430 else
2431 // unsupported
2432 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2432); ::breakpoint(); } while (0)
;
2433}
2434
2435void MacroAssembler::movbyte(ArrayAddress dst, int src) {
2436 movb(as_Address(dst), src);
2437}
2438
2439void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) {
2440 if (reachable(src)) {
2441 movdl(dst, as_Address(src));
2442 } else {
2443 lea(rscratch1, src);
2444 movdl(dst, Address(rscratch1, 0));
2445 }
2446}
2447
2448void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
2449 if (reachable(src)) {
2450 movq(dst, as_Address(src));
2451 } else {
2452 lea(rscratch1, src);
2453 movq(dst, Address(rscratch1, 0));
2454 }
2455}
2456
2457void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) {
2458 if (reachable(src)) {
2459 if (UseXmmLoadAndClearUpper) {
2460 movsd (dst, as_Address(src));
2461 } else {
2462 movlpd(dst, as_Address(src));
2463 }
2464 } else {
2465 lea(rscratch1, src);
2466 if (UseXmmLoadAndClearUpper) {
2467 movsd (dst, Address(rscratch1, 0));
2468 } else {
2469 movlpd(dst, Address(rscratch1, 0));
2470 }
2471 }
2472}
2473
2474void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) {
2475 if (reachable(src)) {
2476 movss(dst, as_Address(src));
2477 } else {
2478 lea(rscratch1, src);
2479 movss(dst, Address(rscratch1, 0));
2480 }
2481}
2482
2483void MacroAssembler::movptr(Register dst, Register src) {
2484 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2485}
2486
2487void MacroAssembler::movptr(Register dst, Address src) {
2488 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2489}
2490
2491// src should NEVER be a real pointer. Use AddressLiteral for true pointers
2492void MacroAssembler::movptr(Register dst, intptr_t src) {
2493 LP64_ONLY(mov64(dst, src))mov64(dst, src) NOT_LP64(movl(dst, src));
2494}
2495
2496void MacroAssembler::movptr(Address dst, Register src) {
2497 LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src));
2498}
2499
2500void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2501 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2501, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2502 Assembler::movdqu(dst, src);
2503}
2504
2505void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2506 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2506, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2507 Assembler::movdqu(dst, src);
2508}
2509
2510void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2511 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2511, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2512 Assembler::movdqu(dst, src);
2513}
2514
2515void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2516 if (reachable(src)) {
2517 movdqu(dst, as_Address(src));
2518 } else {
2519 lea(scratchReg, src);
2520 movdqu(dst, Address(scratchReg, 0));
2521 }
2522}
2523
2524void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2525 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2525, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2526 Assembler::vmovdqu(dst, src);
2527}
2528
2529void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2530 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2530, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2531 Assembler::vmovdqu(dst, src);
2532}
2533
2534void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2535 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2535, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2536 Assembler::vmovdqu(dst, src);
2537}
2538
2539void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2540 if (reachable(src)) {
2541 vmovdqu(dst, as_Address(src));
2542 }
2543 else {
2544 lea(scratch_reg, src);
2545 vmovdqu(dst, Address(scratch_reg, 0));
2546 }
2547}
2548
2549void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) {
2550 assert(vector_len <= AVX_256bit, "AVX2 vector length")do { if (!(vector_len <= AVX_256bit)) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2550, "assert(" "vector_len <= AVX_256bit" ") failed", "AVX2 vector length"
); ::breakpoint(); } } while (0)
;
2551 if (vector_len == AVX_256bit) {
2552 vmovdqu(dst, src, scratch_reg);
2553 } else {
2554 movdqu(dst, src, scratch_reg);
2555 }
2556}
2557
2558void MacroAssembler::kmov(KRegister dst, Address src) {
2559 if (VM_Version::supports_avx512bw()) {
2560 kmovql(dst, src);
2561 } else {
2562 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2562, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2563 kmovwl(dst, src);
2564 }
2565}
2566
2567void MacroAssembler::kmov(Address dst, KRegister src) {
2568 if (VM_Version::supports_avx512bw()) {
2569 kmovql(dst, src);
2570 } else {
2571 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2571, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2572 kmovwl(dst, src);
2573 }
2574}
2575
2576void MacroAssembler::kmov(KRegister dst, KRegister src) {
2577 if (VM_Version::supports_avx512bw()) {
2578 kmovql(dst, src);
2579 } else {
2580 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2580, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2581 kmovwl(dst, src);
2582 }
2583}
2584
2585void MacroAssembler::kmov(Register dst, KRegister src) {
2586 if (VM_Version::supports_avx512bw()) {
2587 kmovql(dst, src);
2588 } else {
2589 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2589, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2590 kmovwl(dst, src);
2591 }
2592}
2593
2594void MacroAssembler::kmov(KRegister dst, Register src) {
2595 if (VM_Version::supports_avx512bw()) {
2596 kmovql(dst, src);
2597 } else {
2598 assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison)
= 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2598, "assert(" "VM_Version::supports_evex()" ") failed", ""
); ::breakpoint(); } } while (0)
;
2599 kmovwl(dst, src);
2600 }
2601}
2602
2603void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) {
2604 if (reachable(src)) {
2605 kmovql(dst, as_Address(src));
2606 } else {
2607 lea(scratch_reg, src);
2608 kmovql(dst, Address(scratch_reg, 0));
2609 }
2610}
2611
2612void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
2613 if (reachable(src)) {
2614 kmovwl(dst, as_Address(src));
2615 } else {
2616 lea(scratch_reg, src);
2617 kmovwl(dst, Address(scratch_reg, 0));
2618 }
2619}
2620
2621void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2622 int vector_len, Register scratch_reg) {
2623 if (reachable(src)) {
2624 if (mask == k0) {
2625 Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
2626 } else {
2627 Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
2628 }
2629 } else {
2630 lea(scratch_reg, src);
2631 if (mask == k0) {
2632 Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
2633 } else {
2634 Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2635 }
2636 }
2637}
2638
2639void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2640 int vector_len, Register scratch_reg) {
2641 if (reachable(src)) {
2642 Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
2643 } else {
2644 lea(scratch_reg, src);
2645 Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2646 }
2647}
2648
2649void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2650 int vector_len, Register scratch_reg) {
2651 if (reachable(src)) {
2652 Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
2653 } else {
2654 lea(scratch_reg, src);
2655 Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2656 }
2657}
2658
2659void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2660 int vector_len, Register scratch_reg) {
2661 if (reachable(src)) {
2662 Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
2663 } else {
2664 lea(scratch_reg, src);
2665 Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2666 }
2667}
2668
2669void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2670 if (reachable(src)) {
2671 Assembler::evmovdquq(dst, as_Address(src), vector_len);
2672 } else {
2673 lea(rscratch, src);
2674 Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2675 }
2676}
2677
2678void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2679 if (reachable(src)) {
2680 Assembler::movdqa(dst, as_Address(src));
2681 } else {
2682 lea(rscratch1, src);
2683 Assembler::movdqa(dst, Address(rscratch1, 0));
2684 }
2685}
2686
2687void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2688 if (reachable(src)) {
2689 Assembler::movsd(dst, as_Address(src));
2690 } else {
2691 lea(rscratch1, src);
2692 Assembler::movsd(dst, Address(rscratch1, 0));
2693 }
2694}
2695
2696void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
2697 if (reachable(src)) {
2698 Assembler::movss(dst, as_Address(src));
2699 } else {
2700 lea(rscratch1, src);
2701 Assembler::movss(dst, Address(rscratch1, 0));
2702 }
2703}
2704
2705void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) {
2706 if (reachable(src)) {
2707 Assembler::mulsd(dst, as_Address(src));
2708 } else {
2709 lea(rscratch1, src);
2710 Assembler::mulsd(dst, Address(rscratch1, 0));
2711 }
2712}
2713
2714void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) {
2715 if (reachable(src)) {
2716 Assembler::mulss(dst, as_Address(src));
2717 } else {
2718 lea(rscratch1, src);
2719 Assembler::mulss(dst, Address(rscratch1, 0));
2720 }
2721}
2722
2723void MacroAssembler::null_check(Register reg, int offset) {
2724 if (needs_explicit_null_check(offset)) {
2725 // provoke OS NULL exception if reg = NULL by
2726 // accessing M[reg] w/o changing any (non-CC) registers
2727 // NOTE: cmpl is plenty here to provoke a segv
2728 cmpptr(rax, Address(reg, 0));
2729 // Note: should probably use testl(rax, Address(reg, 0));
2730 // may be shorter code (however, this version of
2731 // testl needs to be implemented first)
2732 } else {
2733 // nothing to do, (later) access of M[reg + offset]
2734 // will provoke OS NULL exception if reg = NULL
2735 }
2736}
2737
2738void MacroAssembler::os_breakpoint() {
2739 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability
2740 // (e.g., MSVC can't call ps() otherwise)
2741 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)((address)((address_word)(os::breakpoint)))));
2742}
2743
2744void MacroAssembler::unimplemented(const char* what) {
2745 const char* buf = NULL__null;
2746 {
2747 ResourceMark rm;
2748 stringStream ss;
2749 ss.print("unimplemented: %s", what);
2750 buf = code_string(ss.as_string());
2751 }
2752 stop(buf);
2753}
2754
2755#ifdef _LP641
2756#define XSTATE_BV0x200 0x200
2757#endif
2758
2759void MacroAssembler::pop_CPU_state() {
2760 pop_FPU_state();
2761 pop_IU_state();
2762}
2763
2764void MacroAssembler::pop_FPU_state() {
2765#ifndef _LP641
2766 frstor(Address(rsp, 0));
2767#else
2768 fxrstor(Address(rsp, 0));
2769#endif
2770 addptr(rsp, FPUStateSizeInWords * wordSize);
2771}
2772
2773void MacroAssembler::pop_IU_state() {
2774 popa();
2775 LP64_ONLY(addq(rsp, 8))addq(rsp, 8);
2776 popf();
2777}
2778
2779// Save Integer and Float state
2780// Warning: Stack must be 16 byte aligned (64bit)
2781void MacroAssembler::push_CPU_state() {
2782 push_IU_state();
2783 push_FPU_state();
2784}
2785
2786void MacroAssembler::push_FPU_state() {
2787 subptr(rsp, FPUStateSizeInWords * wordSize);
2788#ifndef _LP641
2789 fnsave(Address(rsp, 0));
2790 fwait();
2791#else
2792 fxsave(Address(rsp, 0));
2793#endif // LP64
2794}
2795
2796void MacroAssembler::push_IU_state() {
2797 // Push flags first because pusha kills them
2798 pushf();
2799 // Make sure rsp stays 16-byte aligned
2800 LP64_ONLY(subq(rsp, 8))subq(rsp, 8);
2801 pusha();
2802}
2803
2804void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { // determine java_thread register
2805 if (!java_thread->is_valid()) {
2806 java_thread = rdi;
2807 get_thread(java_thread);
2808 }
2809 // we must set sp to zero to clear frame
2810 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD0L);
2811 // must clear fp, so that compiled frames are not confused; it is
2812 // possible that we need it only for debugging
2813 if (clear_fp) {
2814 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD0L);
2815 }
2816 // Always clear the pc because it could have been set by make_walkable()
2817 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD0L);
2818 vzeroupper();
2819}
2820
2821void MacroAssembler::restore_rax(Register tmp) {
2822 if (tmp == noreg) pop(rax);
2823 else if (tmp != rax) mov(rax, tmp);
2824}
2825
2826void MacroAssembler::round_to(Register reg, int modulus) {
2827 addptr(reg, modulus - 1);
2828 andptr(reg, -modulus);
2829}
2830
2831void MacroAssembler::save_rax(Register tmp) {
2832 if (tmp == noreg) push(rax);
2833 else if (tmp != rax) mov(tmp, rax);
2834}
2835
2836void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) {
2837 if (at_return) {
2838 // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
2839 // we may safely use rsp instead to perform the stack watermark check.
2840 cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, JavaThread::polling_word_offset()));
2841 jcc(Assembler::above, slow_path);
2842 return;
2843 }
2844 testb(Address(thread_reg, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit());
2845 jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
2846}
2847
2848// Calls to C land
2849//
2850// When entering C land, the rbp, & rsp of the last Java frame have to be recorded
2851// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
2852// has to be reset to 0. This is required to allow proper stack traversal.
2853void MacroAssembler::set_last_Java_frame(Register java_thread,
2854 Register last_java_sp,
2855 Register last_java_fp,
2856 address last_java_pc) {
2857 vzeroupper();
2858 // determine java_thread register
2859 if (!java_thread->is_valid()) {
2860 java_thread = rdi;
2861 get_thread(java_thread);
2862 }
2863 // determine last_java_sp register
2864 if (!last_java_sp->is_valid()) {
2865 last_java_sp = rsp;
2866 }
2867
2868 // last_java_fp is optional
2869
2870 if (last_java_fp->is_valid()) {
2871 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp);
2872 }
2873
2874 // last_java_pc is optional
2875
2876 if (last_java_pc != NULL__null) {
2877 lea(Address(java_thread,
2878 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
2879 InternalAddress(last_java_pc));
2880
2881 }
2882 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp);
2883}
2884
2885void MacroAssembler::shlptr(Register dst, int imm8) {
2886 LP64_ONLY(shlq(dst, imm8))shlq(dst, imm8) NOT_LP64(shll(dst, imm8));
2887}
2888
2889void MacroAssembler::shrptr(Register dst, int imm8) {
2890 LP64_ONLY(shrq(dst, imm8))shrq(dst, imm8) NOT_LP64(shrl(dst, imm8));
2891}
2892
2893void MacroAssembler::sign_extend_byte(Register reg) {
2894 if (LP64_ONLY(true ||)true || (VM_Version::is_P6() && reg->has_byte_register())) {
2895 movsbl(reg, reg); // movsxb
2896 } else {
2897 shll(reg, 24);
2898 sarl(reg, 24);
2899 }
2900}
2901
2902void MacroAssembler::sign_extend_short(Register reg) {
2903 if (LP64_ONLY(true ||)true || VM_Version::is_P6()) {
2904 movswl(reg, reg); // movsxw
2905 } else {
2906 shll(reg, 16);
2907 sarl(reg, 16);
2908 }
2909}
2910
2911void MacroAssembler::testl(Register dst, AddressLiteral src) {
2912 assert(reachable(src), "Address should be reachable")do { if (!(reachable(src))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2912, "assert(" "reachable(src)" ") failed", "Address should be reachable"
); ::breakpoint(); } } while (0)
;
2913 testl(dst, as_Address(src));
2914}
2915
2916void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
2917 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2917, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2918 Assembler::pcmpeqb(dst, src);
2919}
2920
2921void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
2922 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2922, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2923 Assembler::pcmpeqw(dst, src);
2924}
2925
2926void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
2927 assert((dst->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2927, "assert(" "(dst->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
2928 Assembler::pcmpestri(dst, src, imm8);
2929}
2930
2931void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
2932 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2932, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2933 Assembler::pcmpestri(dst, src, imm8);
2934}
2935
2936void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
2937 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2937, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2938 Assembler::pmovzxbw(dst, src);
2939}
2940
2941void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
2942 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2942, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2943 Assembler::pmovzxbw(dst, src);
2944}
2945
2946void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
2947 assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2947, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
2948 Assembler::pmovmskb(dst, src);
2949}
2950
2951void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
2952 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 2952, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
2953 Assembler::ptest(dst, src);
2954}
2955
2956void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
2957 if (reachable(src)) {
2958 Assembler::sqrtsd(dst, as_Address(src));
2959 } else {
2960 lea(rscratch1, src);
2961 Assembler::sqrtsd(dst, Address(rscratch1, 0));
2962 }
2963}
2964
2965void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) {
2966 if (reachable(src)) {
2967 Assembler::sqrtss(dst, as_Address(src));
2968 } else {
2969 lea(rscratch1, src);
2970 Assembler::sqrtss(dst, Address(rscratch1, 0));
2971 }
2972}
2973
2974void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) {
2975 if (reachable(src)) {
2976 Assembler::subsd(dst, as_Address(src));
2977 } else {
2978 lea(rscratch1, src);
2979 Assembler::subsd(dst, Address(rscratch1, 0));
2980 }
2981}
2982
2983void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) {
2984 if (reachable(src)) {
2985 Assembler::roundsd(dst, as_Address(src), rmode);
2986 } else {
2987 lea(scratch_reg, src);
2988 Assembler::roundsd(dst, Address(scratch_reg, 0), rmode);
2989 }
2990}
2991
2992void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) {
2993 if (reachable(src)) {
2994 Assembler::subss(dst, as_Address(src));
2995 } else {
2996 lea(rscratch1, src);
2997 Assembler::subss(dst, Address(rscratch1, 0));
2998 }
2999}
3000
3001void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
3002 if (reachable(src)) {
3003 Assembler::ucomisd(dst, as_Address(src));
3004 } else {
3005 lea(rscratch1, src);
3006 Assembler::ucomisd(dst, Address(rscratch1, 0));
3007 }
3008}
3009
3010void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
3011 if (reachable(src)) {
3012 Assembler::ucomiss(dst, as_Address(src));
3013 } else {
3014 lea(rscratch1, src);
3015 Assembler::ucomiss(dst, Address(rscratch1, 0));
3016 }
3017}
3018
3019void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
3020 // Used in sign-bit flipping with aligned address.
3021 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3021, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
3022 if (reachable(src)) {
3023 Assembler::xorpd(dst, as_Address(src));
3024 } else {
3025 lea(scratch_reg, src);
3026 Assembler::xorpd(dst, Address(scratch_reg, 0));
3027 }
3028}
3029
3030void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) {
3031 if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
3032 Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
3033 }
3034 else {
3035 Assembler::xorpd(dst, src);
3036 }
3037}
3038
3039void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
3040 if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) {
3041 Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit);
3042 } else {
3043 Assembler::xorps(dst, src);
3044 }
3045}
3046
3047void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
3048 // Used in sign-bit flipping with aligned address.
3049 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15
) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3049, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)"
") failed", "SSE mode requires address alignment 16 bytes");
::breakpoint(); } } while (0)
;
3050 if (reachable(src)) {
3051 Assembler::xorps(dst, as_Address(src));
3052 } else {
3053 lea(scratch_reg, src);
3054 Assembler::xorps(dst, Address(scratch_reg, 0));
3055 }
3056}
3057
3058void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
3059 // Used in sign-bit flipping with aligned address.
3060 bool aligned_adr = (((intptr_t)src.target() & 15) == 0);
3061 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || aligned_adr)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3061, "assert(" "(UseAVX > 0) || aligned_adr" ") failed"
, "SSE mode requires address alignment 16 bytes"); ::breakpoint
(); } } while (0)
;
3062 if (reachable(src)) {
3063 Assembler::pshufb(dst, as_Address(src));
3064 } else {
3065 lea(rscratch1, src);
3066 Assembler::pshufb(dst, Address(rscratch1, 0));
3067 }
3068}
3069
3070// AVX 3-operands instructions
3071
3072void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3073 if (reachable(src)) {
3074 vaddsd(dst, nds, as_Address(src));
3075 } else {
3076 lea(rscratch1, src);
3077 vaddsd(dst, nds, Address(rscratch1, 0));
3078 }
3079}
3080
3081void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3082 if (reachable(src)) {
3083 vaddss(dst, nds, as_Address(src));
3084 } else {
3085 lea(rscratch1, src);
3086 vaddss(dst, nds, Address(rscratch1, 0));
3087 }
3088}
3089
3090void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
3091 assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3091, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX"
); ::breakpoint(); } } while (0)
;
3092 if (reachable(src)) {
3093 Assembler::vpaddb(dst, nds, as_Address(src), vector_len);
3094 } else {
3095 lea(rscratch, src);
3096 Assembler::vpaddb(dst, nds, Address(rscratch, 0), vector_len);
3097 }
3098}
3099
3100void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
3101 assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3101, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX"
); ::breakpoint(); } } while (0)
;
3102 if (reachable(src)) {
3103 Assembler::vpaddd(dst, nds, as_Address(src), vector_len);
3104 } else {
3105 lea(rscratch, src);
3106 Assembler::vpaddd(dst, nds, Address(rscratch, 0), vector_len);
3107 }
3108}
3109
3110void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
3111 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3111, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3112 vandps(dst, nds, negate_field, vector_len);
3113}
3114
3115void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
3116 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3116, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3117 vandpd(dst, nds, negate_field, vector_len);
3118}
3119
3120void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3121 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3121, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3122 Assembler::vpaddb(dst, nds, src, vector_len);
3123}
3124
3125void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3126 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3126, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3127 Assembler::vpaddb(dst, nds, src, vector_len);
3128}
3129
3130void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3131 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3131, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3132 Assembler::vpaddw(dst, nds, src, vector_len);
3133}
3134
3135void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3136 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3136, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3137 Assembler::vpaddw(dst, nds, src, vector_len);
3138}
3139
3140void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3141 if (reachable(src)) {
3142 Assembler::vpand(dst, nds, as_Address(src), vector_len);
3143 } else {
3144 lea(scratch_reg, src);
3145 Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3146 }
3147}
3148
3149void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3150 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3150, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3151 Assembler::vpbroadcastw(dst, src, vector_len);
3152}
3153
3154void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3155 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3155, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3156 Assembler::vpcmpeqb(dst, nds, src, vector_len);
3157}
3158
3159void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3160 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3160, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3161 Assembler::vpcmpeqw(dst, nds, src, vector_len);
3162}
3163
3164void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
3165 AddressLiteral src, int vector_len, Register scratch_reg) {
3166 if (reachable(src)) {
3167 Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
3168 } else {
3169 lea(scratch_reg, src);
3170 Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
3171 }
3172}
3173
3174void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3175 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3176 if (reachable(src)) {
3177 Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3178 } else {
3179 lea(scratch_reg, src);
3180 Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3181 }
3182}
3183
3184void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3185 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3186 if (reachable(src)) {
3187 Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3188 } else {
3189 lea(scratch_reg, src);
3190 Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3191 }
3192}
3193
3194void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3195 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3196 if (reachable(src)) {
3197 Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3198 } else {
3199 lea(scratch_reg, src);
3200 Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3201 }
3202}
3203
3204void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3205 int comparison, bool is_signed, int vector_len, Register scratch_reg) {
3206 if (reachable(src)) {
3207 Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len);
3208 } else {
3209 lea(scratch_reg, src);
3210 Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len);
3211 }
3212}
3213
3214void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
3215 if (width == Assembler::Q) {
3216 Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
3217 } else {
3218 Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
3219 }
3220}
3221
3222void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
3223 int eq_cond_enc = 0x29;
3224 int gt_cond_enc = 0x37;
3225 if (width != Assembler::Q) {
3226 eq_cond_enc = 0x74 + width;
3227 gt_cond_enc = 0x64 + width;
3228 }
3229 switch (cond) {
3230 case eq:
3231 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3232 break;
3233 case neq:
3234 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3235 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3236 break;
3237 case le:
3238 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3239 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3240 break;
3241 case nlt:
3242 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3243 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3244 break;
3245 case lt:
3246 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3247 break;
3248 case nle:
3249 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3250 break;
3251 default:
3252 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3252, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
3253 }
3254}
3255
3256void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3257 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3257, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3258 Assembler::vpmovzxbw(dst, src, vector_len);
3259}
3260
3261void MacroAssembler::vpmovmskb(Register dst, XMMRegister src, int vector_len) {
3262 assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3262, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15"
); ::breakpoint(); } } while (0)
;
3263 Assembler::vpmovmskb(dst, src, vector_len);
3264}
3265
3266void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3267 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3267, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3268 Assembler::vpmullw(dst, nds, src, vector_len);
3269}
3270
3271void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3272 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3272, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3273 Assembler::vpmullw(dst, nds, src, vector_len);
3274}
3275
3276void MacroAssembler::vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3277 assert((UseAVX > 0), "AVX support is needed")do { if (!((UseAVX > 0))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3277, "assert(" "(UseAVX > 0)" ") failed", "AVX support is needed"
); ::breakpoint(); } } while (0)
;
3278 if (reachable(src)) {
3279 Assembler::vpmulld(dst, nds, as_Address(src), vector_len);
3280 } else {
3281 lea(scratch_reg, src);
3282 Assembler::vpmulld(dst, nds, Address(scratch_reg, 0), vector_len);
3283 }
3284}
3285
3286void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3287 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3287, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3288 Assembler::vpsubb(dst, nds, src, vector_len);
3289}
3290
3291void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3292 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3292, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3293 Assembler::vpsubb(dst, nds, src, vector_len);
3294}
3295
3296void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3297 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3297, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3298 Assembler::vpsubw(dst, nds, src, vector_len);
3299}
3300
3301void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3302 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3302, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3303 Assembler::vpsubw(dst, nds, src, vector_len);
3304}
3305
3306void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3307 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3307, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3308 Assembler::vpsraw(dst, nds, shift, vector_len);
3309}
3310
3311void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3312 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3312, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3313 Assembler::vpsraw(dst, nds, shift, vector_len);
3314}
3315
3316void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3317 assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3317, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint
(); } } while (0)
;
3318 if (!VM_Version::supports_avx512vl() && vector_len < 2) {
3319 vector_len = 2;
3320 }
3321 Assembler::evpsraq(dst, nds, shift, vector_len);
3322}
3323
3324void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3325 assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3325, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint
(); } } while (0)
;
3326 if (!VM_Version::supports_avx512vl() && vector_len < 2) {
3327 vector_len = 2;
3328 }
3329 Assembler::evpsraq(dst, nds, shift, vector_len);
3330}
3331
3332void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3333 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3333, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3334 Assembler::vpsrlw(dst, nds, shift, vector_len);
3335}
3336
3337void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3338 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3338, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3339 Assembler::vpsrlw(dst, nds, shift, vector_len);
3340}
3341
3342void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
3343 assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift->
encoding() < 16 && nds->encoding() < 16) || VM_Version
::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3343, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3344 Assembler::vpsllw(dst, nds, shift, vector_len);
3345}
3346
3347void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
3348 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3348, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3349 Assembler::vpsllw(dst, nds, shift, vector_len);
3350}
3351
3352void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
3353 assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding
() < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3353, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3354 Assembler::vptest(dst, src);
3355}
3356
3357void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
3358 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3358, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3359 Assembler::punpcklbw(dst, src);
3360}
3361
3362void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
3363 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl
()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3363, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3364 Assembler::pshufd(dst, src, mode);
3365}
3366
3367void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
3368 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding
() < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3368, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3369 Assembler::pshuflw(dst, src, mode);
3370}
3371
3372void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3373 if (reachable(src)) {
3374 vandpd(dst, nds, as_Address(src), vector_len);
3375 } else {
3376 lea(scratch_reg, src);
3377 vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3378 }
3379}
3380
3381void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3382 if (reachable(src)) {
3383 vandps(dst, nds, as_Address(src), vector_len);
3384 } else {
3385 lea(scratch_reg, src);
3386 vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3387 }
3388}
3389
3390void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
3391 bool merge, int vector_len, Register scratch_reg) {
3392 if (reachable(src)) {
3393 Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
3394 } else {
3395 lea(scratch_reg, src);
3396 Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
3397 }
3398}
3399
3400void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3401 if (reachable(src)) {
3402 vdivsd(dst, nds, as_Address(src));
3403 } else {
3404 lea(rscratch1, src);
3405 vdivsd(dst, nds, Address(rscratch1, 0));
3406 }
3407}
3408
3409void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3410 if (reachable(src)) {
3411 vdivss(dst, nds, as_Address(src));
3412 } else {
3413 lea(rscratch1, src);
3414 vdivss(dst, nds, Address(rscratch1, 0));
3415 }
3416}
3417
3418void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3419 if (reachable(src)) {
3420 vmulsd(dst, nds, as_Address(src));
3421 } else {
3422 lea(rscratch1, src);
3423 vmulsd(dst, nds, Address(rscratch1, 0));
3424 }
3425}
3426
3427void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3428 if (reachable(src)) {
3429 vmulss(dst, nds, as_Address(src));
3430 } else {
3431 lea(rscratch1, src);
3432 vmulss(dst, nds, Address(rscratch1, 0));
3433 }
3434}
3435
3436void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3437 if (reachable(src)) {
3438 vsubsd(dst, nds, as_Address(src));
3439 } else {
3440 lea(rscratch1, src);
3441 vsubsd(dst, nds, Address(rscratch1, 0));
3442 }
3443}
3444
3445void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3446 if (reachable(src)) {
3447 vsubss(dst, nds, as_Address(src));
3448 } else {
3449 lea(rscratch1, src);
3450 vsubss(dst, nds, Address(rscratch1, 0));
3451 }
3452}
3453
3454void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3455 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3455, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3456 vxorps(dst, nds, src, Assembler::AVX_128bit);
3457}
3458
3459void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3460 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding
() < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3460, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())"
") failed", "XMM register should be 0-15"); ::breakpoint(); }
} while (0)
;
3461 vxorpd(dst, nds, src, Assembler::AVX_128bit);
3462}
3463
3464void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3465 if (reachable(src)) {
3466 vxorpd(dst, nds, as_Address(src), vector_len);
3467 } else {
3468 lea(scratch_reg, src);
3469 vxorpd(dst, nds, Address(scratch_reg, 0), vector_len);
3470 }
3471}
3472
3473void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3474 if (reachable(src)) {
3475 vxorps(dst, nds, as_Address(src), vector_len);
3476 } else {
3477 lea(scratch_reg, src);
3478 vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3479 }
3480}
3481
3482void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3483 if (UseAVX > 1 || (vector_len < 1)) {
3484 if (reachable(src)) {
3485 Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3486 } else {
3487 lea(scratch_reg, src);
3488 Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3489 }
3490 }
3491 else {
3492 MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3493 }
3494}
3495
3496void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3497 if (reachable(src)) {
3498 Assembler::vpermd(dst, nds, as_Address(src), vector_len);
3499 } else {
3500 lea(scratch_reg, src);
3501 Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
3502 }
3503}
3504
3505void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3506 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3507 STATIC_ASSERT(inverted_jweak_mask == -2)static_assert((inverted_jweak_mask == -2), "inverted_jweak_mask == -2"
)
; // otherwise check this code
3508 // The inverted mask is sign-extended
3509 andptr(possibly_jweak, inverted_jweak_mask);
3510}
3511
3512void MacroAssembler::resolve_jobject(Register value,
3513 Register thread,
3514 Register tmp) {
3515 assert_different_registers(value, thread, tmp);
3516 Label done, not_weak;
3517 testptr(value, value);
3518 jcc(Assembler::zero, done); // Use NULL as-is.
3519 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3520 jcc(Assembler::zero, not_weak);
3521 // Resolve jweak.
3522 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3523 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
3524 verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3524)
;
3525 jmp(done);
3526 bind(not_weak);
3527 // Resolve (untagged) jobject.
3528 access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
3529 verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3529)
;
3530 bind(done);
3531}
3532
3533void MacroAssembler::subptr(Register dst, int32_t imm32) {
3534 LP64_ONLY(subq(dst, imm32))subq(dst, imm32) NOT_LP64(subl(dst, imm32));
3535}
3536
3537// Force generation of a 4 byte immediate value even if it fits into 8bit
3538void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
3539 LP64_ONLY(subq_imm32(dst, imm32))subq_imm32(dst, imm32) NOT_LP64(subl_imm32(dst, imm32));
3540}
3541
3542void MacroAssembler::subptr(Register dst, Register src) {
3543 LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src));
3544}
3545
3546// C++ bool manipulation
3547void MacroAssembler::testbool(Register dst) {
3548 if(sizeof(bool) == 1)
3549 testb(dst, 0xff);
3550 else if(sizeof(bool) == 2) {
3551 // testw implementation needed for two byte bools
3552 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3552); ::breakpoint(); } while (0)
;
3553 } else if(sizeof(bool) == 4)
3554 testl(dst, dst);
3555 else
3556 // unsupported
3557 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3557); ::breakpoint(); } while (0)
;
3558}
3559
3560void MacroAssembler::testptr(Register dst, Register src) {
3561 LP64_ONLY(testq(dst, src))testq(dst, src) NOT_LP64(testl(dst, src));
3562}
3563
3564// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
3565void MacroAssembler::tlab_allocate(Register thread, Register obj,
3566 Register var_size_in_bytes,
3567 int con_size_in_bytes,
3568 Register t1,
3569 Register t2,
3570 Label& slow_case) {
3571 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3572 bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
3573}
3574
3575// Defines obj, preserves var_size_in_bytes
3576void MacroAssembler::eden_allocate(Register thread, Register obj,
3577 Register var_size_in_bytes,
3578 int con_size_in_bytes,
3579 Register t1,
3580 Label& slow_case) {
3581 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
3582 bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
3583}
3584
3585// Preserves the contents of address, destroys the contents length_in_bytes and temp.
3586void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
3587 assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different")do { if (!(address != length_in_bytes && address != temp
&& temp != length_in_bytes)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3587, "assert(" "address != length_in_bytes && address != temp && temp != length_in_bytes"
") failed", "registers must be different"); ::breakpoint(); }
} while (0)
;
3588 assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord")do { if (!((offset_in_bytes & (BytesPerWord - 1)) == 0)) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3588, "assert(" "(offset_in_bytes & (BytesPerWord - 1)) == 0"
") failed", "offset must be a multiple of BytesPerWord"); ::
breakpoint(); } } while (0)
;
3589 Label done;
3590
3591 testptr(length_in_bytes, length_in_bytes);
3592 jcc(Assembler::zero, done);
3593
3594 // initialize topmost word, divide index by 2, check if odd and test if zero
3595 // note: for the remaining code to work, index must be a multiple of BytesPerWord
3596#ifdef ASSERT1
3597 {
3598 Label L;
3599 testptr(length_in_bytes, BytesPerWord - 1);
3600 jcc(Assembler::zero, L);
3601 stop("length must be a multiple of BytesPerWord");
3602 bind(L);
3603 }
3604#endif
3605 Register index = length_in_bytes;
3606 xorptr(temp, temp); // use _zero reg to clear memory (shorter code)
3607 if (UseIncDec) {
3608 shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set
3609 } else {
3610 shrptr(index, 2); // use 2 instructions to avoid partial flag stall
3611 shrptr(index, 1);
3612 }
3613#ifndef _LP641
3614 // index could have not been a multiple of 8 (i.e., bit 2 was set)
3615 {
3616 Label even;
3617 // note: if index was a multiple of 8, then it cannot
3618 // be 0 now otherwise it must have been 0 before
3619 // => if it is even, we don't need to check for 0 again
3620 jcc(Assembler::carryClear, even);
3621 // clear topmost word (no jump would be needed if conditional assignment worked here)
3622 movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp);
3623 // index could be 0 now, must check again
3624 jcc(Assembler::zero, done);
3625 bind(even);
3626 }
3627#endif // !_LP64
3628 // initialize remaining object fields: index is a multiple of 2 now
3629 {
3630 Label loop;
3631 bind(loop);
3632 movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp);
3633 NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);)
3634 decrement(index);
3635 jcc(Assembler::notZero, loop);
3636 }
3637
3638 bind(done);
3639}
3640
3641// Look up the method for a megamorphic invokeinterface call.
3642// The target method is determined by <intf_klass, itable_index>.
3643// The receiver klass is in recv_klass.
3644// On success, the result will be in method_result, and execution falls through.
3645// On failure, execution transfers to the given label.
3646void MacroAssembler::lookup_interface_method(Register recv_klass,
3647 Register intf_klass,
3648 RegisterOrConstant itable_index,
3649 Register method_result,
3650 Register scan_temp,
3651 Label& L_no_such_interface,
3652 bool return_method) {
3653 assert_different_registers(recv_klass, intf_klass, scan_temp);
3654 assert_different_registers(method_result, intf_klass, scan_temp);
3655 assert(recv_klass != method_result || !return_method,do { if (!(recv_klass != method_result || !return_method)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3656, "assert(" "recv_klass != method_result || !return_method"
") failed", "recv_klass can be destroyed when method isn't needed"
); ::breakpoint(); } } while (0)
3656 "recv_klass can be destroyed when method isn't needed")do { if (!(recv_klass != method_result || !return_method)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3656, "assert(" "recv_klass != method_result || !return_method"
") failed", "recv_klass can be destroyed when method isn't needed"
); ::breakpoint(); } } while (0)
;
3657
3658 assert(itable_index.is_constant() || itable_index.as_register() == method_result,do { if (!(itable_index.is_constant() || itable_index.as_register
() == method_result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result"
") failed", "caller must use same register for non-constant itable index as for method"
); ::breakpoint(); } } while (0)
3659 "caller must use same register for non-constant itable index as for method")do { if (!(itable_index.is_constant() || itable_index.as_register
() == method_result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result"
") failed", "caller must use same register for non-constant itable index as for method"
); ::breakpoint(); } } while (0)
;
3660
3661 // Compute start of first itableOffsetEntry (which is at the end of the vtable)
3662 int vtable_base = in_bytes(Klass::vtable_start_offset());
3663 int itentry_off = itableMethodEntry::method_offset_in_bytes();
3664 int scan_step = itableOffsetEntry::size() * wordSize;
3665 int vte_size = vtableEntry::size_in_bytes();
3666 Address::ScaleFactor times_vte_scale = Address::times_ptr;
3667 assert(vte_size == wordSize, "else adjust times_vte_scale")do { if (!(vte_size == wordSize)) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3667, "assert(" "vte_size == wordSize" ") failed", "else adjust times_vte_scale"
); ::breakpoint(); } } while (0)
;
3668
3669 movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
3670
3671 // %%% Could store the aligned, prescaled offset in the klassoop.
3672 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
3673
3674 if (return_method) {
3675 // Adjust recv_klass by scaled itable_index, so we can free itable_index.
3676 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below")do { if (!(itableMethodEntry::size() * wordSize == wordSize))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3676, "assert(" "itableMethodEntry::size() * wordSize == wordSize"
") failed", "adjust the scaling in the code below"); ::breakpoint
(); } } while (0)
;
3677 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
3678 }
3679
3680 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
3681 // if (scan->interface() == intf) {
3682 // result = (klass + scan->offset() + itable_index);
3683 // }
3684 // }
3685 Label search, found_method;
3686
3687 for (int peel = 1; peel >= 0; peel--) {
3688 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
3689 cmpptr(intf_klass, method_result);
3690
3691 if (peel) {
3692 jccb(Assembler::equal, found_method)jccb_0(Assembler::equal, found_method, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3692)
;
3693 } else {
3694 jccb(Assembler::notEqual, search)jccb_0(Assembler::notEqual, search, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3694)
;
3695 // (invert the test to fall through to found_method...)
3696 }
3697
3698 if (!peel) break;
3699
3700 bind(search);
3701
3702 // Check that the previous entry is non-null. A null entry means that
3703 // the receiver class doesn't implement the interface, and wasn't the
3704 // same as when the caller was compiled.
3705 testptr(method_result, method_result);
3706 jcc(Assembler::zero, L_no_such_interface);
3707 addptr(scan_temp, scan_step);
3708 }
3709
3710 bind(found_method);
3711
3712 if (return_method) {
3713 // Got a hit.
3714 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
3715 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
3716 }
3717}
3718
3719
3720// virtual method calling
3721void MacroAssembler::lookup_virtual_method(Register recv_klass,
3722 RegisterOrConstant vtable_index,
3723 Register method_result) {
3724 const int base = in_bytes(Klass::vtable_start_offset());
3725 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below")do { if (!(vtableEntry::size() * wordSize == wordSize)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3725, "assert(" "vtableEntry::size() * wordSize == wordSize"
") failed", "else adjust the scaling in the code below"); ::
breakpoint(); } } while (0)
;
3726 Address vtable_entry_addr(recv_klass,
3727 vtable_index, Address::times_ptr,
3728 base + vtableEntry::method_offset_in_bytes());
3729 movptr(method_result, vtable_entry_addr);
3730}
3731
3732
3733void MacroAssembler::check_klass_subtype(Register sub_klass,
3734 Register super_klass,
3735 Register temp_reg,
3736 Label& L_success) {
3737 Label L_failure;
3738 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL__null);
3739 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL__null);
3740 bind(L_failure);
3741}
3742
3743
3744void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
3745 Register super_klass,
3746 Register temp_reg,
3747 Label* L_success,
3748 Label* L_failure,
3749 Label* L_slow_path,
3750 RegisterOrConstant super_check_offset) {
3751 assert_different_registers(sub_klass, super_klass, temp_reg);
3752 bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
3753 if (super_check_offset.is_register()) {
3754 assert_different_registers(sub_klass, super_klass,
3755 super_check_offset.as_register());
3756 } else if (must_load_sco) {
3757 assert(temp_reg != noreg, "supply either a temp or a register offset")do { if (!(temp_reg != noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3757, "assert(" "temp_reg != noreg" ") failed", "supply either a temp or a register offset"
); ::breakpoint(); } } while (0)
;
3758 }
3759
3760 Label L_fallthrough;
3761 int label_nulls = 0;
3762 if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; }
3763 if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; }
3764 if (L_slow_path == NULL__null) { L_slow_path = &L_fallthrough; label_nulls++; }
3765 assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3765, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch"
); ::breakpoint(); } } while (0)
;
3766
3767 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3768 int sco_offset = in_bytes(Klass::super_check_offset_offset());
3769 Address super_check_offset_addr(super_klass, sco_offset);
3770
3771 // Hacked jcc, which "knows" that L_fallthrough, at least, is in
3772 // range of a jccb. If this routine grows larger, reconsider at
3773 // least some of these.
3774#define local_jcc(assembler_cond, label) \
3775 if (&(label) == &L_fallthrough) jccb(assembler_cond, label)jccb_0(assembler_cond, label, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3775)
; \
3776 else jcc( assembler_cond, label) /*omit semi*/
3777
3778 // Hacked jmp, which may only be used just before L_fallthrough.
3779#define final_jmp(label) \
3780 if (&(label) == &L_fallthrough) { /*do nothing*/ } \
3781 else jmp(label) /*omit semi*/
3782
3783 // If the pointers are equal, we are done (e.g., String[] elements).
3784 // This self-check enables sharing of secondary supertype arrays among
3785 // non-primary types such as array-of-interface. Otherwise, each such
3786 // type would need its own customized SSA.
3787 // We move this check to the front of the fast path because many
3788 // type checks are in fact trivially successful in this manner,
3789 // so we get a nicely predicted branch right at the start of the check.
3790 cmpptr(sub_klass, super_klass);
3791 local_jcc(Assembler::equal, *L_success);
3792
3793 // Check the supertype display:
3794 if (must_load_sco) {
3795 // Positive movl does right thing on LP64.
3796 movl(temp_reg, super_check_offset_addr);
3797 super_check_offset = RegisterOrConstant(temp_reg);
3798 }
3799 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
3800 cmpptr(super_klass, super_check_addr); // load displayed supertype
3801
3802 // This check has worked decisively for primary supers.
3803 // Secondary supers are sought in the super_cache ('super_cache_addr').
3804 // (Secondary supers are interfaces and very deeply nested subtypes.)
3805 // This works in the same check above because of a tricky aliasing
3806 // between the super_cache and the primary super display elements.
3807 // (The 'super_check_addr' can address either, as the case requires.)
3808 // Note that the cache is updated below if it does not help us find
3809 // what we need immediately.
3810 // So if it was a primary super, we can just fail immediately.
3811 // Otherwise, it's the slow path for us (no success at this point).
3812
3813 if (super_check_offset.is_register()) {
3814 local_jcc(Assembler::equal, *L_success);
3815 cmpl(super_check_offset.as_register(), sc_offset);
3816 if (L_failure == &L_fallthrough) {
3817 local_jcc(Assembler::equal, *L_slow_path);
3818 } else {
3819 local_jcc(Assembler::notEqual, *L_failure);
3820 final_jmp(*L_slow_path);
3821 }
3822 } else if (super_check_offset.as_constant() == sc_offset) {
3823 // Need a slow path; fast failure is impossible.
3824 if (L_slow_path == &L_fallthrough) {
3825 local_jcc(Assembler::equal, *L_success);
3826 } else {
3827 local_jcc(Assembler::notEqual, *L_slow_path);
3828 final_jmp(*L_success);
3829 }
3830 } else {
3831 // No slow path; it's a fast decision.
3832 if (L_failure == &L_fallthrough) {
3833 local_jcc(Assembler::equal, *L_success);
3834 } else {
3835 local_jcc(Assembler::notEqual, *L_failure);
3836 final_jmp(*L_success);
3837 }
3838 }
3839
3840 bind(L_fallthrough);
3841
3842#undef local_jcc
3843#undef final_jmp
3844}
3845
3846
3847void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
3848 Register super_klass,
3849 Register temp_reg,
3850 Register temp2_reg,
3851 Label* L_success,
3852 Label* L_failure,
3853 bool set_cond_codes) {
3854 assert_different_registers(sub_klass, super_klass, temp_reg);
3855 if (temp2_reg != noreg)
3856 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
3857#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
3858
3859 Label L_fallthrough;
3860 int label_nulls = 0;
3861 if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; }
3862 if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; }
3863 assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3863, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch"
); ::breakpoint(); } } while (0)
;
3864
3865 // a couple of useful fields in sub_klass:
3866 int ss_offset = in_bytes(Klass::secondary_supers_offset());
3867 int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
3868 Address secondary_supers_addr(sub_klass, ss_offset);
3869 Address super_cache_addr( sub_klass, sc_offset);
3870
3871 // Do a linear scan of the secondary super-klass chain.
3872 // This code is rarely used, so simplicity is a virtue here.
3873 // The repne_scan instruction uses fixed registers, which we must spill.
3874 // Don't worry too much about pre-existing connections with the input regs.
3875
3876 assert(sub_klass != rax, "killed reg")do { if (!(sub_klass != rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3876, "assert(" "sub_klass != rax" ") failed", "killed reg"
); ::breakpoint(); } } while (0)
; // killed by mov(rax, super)
3877 assert(sub_klass != rcx, "killed reg")do { if (!(sub_klass != rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3877, "assert(" "sub_klass != rcx" ") failed", "killed reg"
); ::breakpoint(); } } while (0)
; // killed by lea(rcx, &pst_counter)
3878
3879 // Get super_klass value into rax (even if it was in rdi or rcx).
3880 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
3881 if (super_klass != rax || UseCompressedOops) {
3882 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
3883 mov(rax, super_klass);
3884 }
3885 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
3886 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
3887
3888#ifndef PRODUCT
3889 int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
3890 ExternalAddress pst_counter_addr((address) pst_counter);
3891 NOT_LP64( incrementl(pst_counter_addr) );
3892 LP64_ONLY( lea(rcx, pst_counter_addr) )lea(rcx, pst_counter_addr);
3893 LP64_ONLY( incrementl(Address(rcx, 0)) )incrementl(Address(rcx, 0));
3894#endif //PRODUCT
3895
3896 // We will consult the secondary-super array.
3897 movptr(rdi, secondary_supers_addr);
3898 // Load the array length. (Positive movl does right thing on LP64.)
3899 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
3900 // Skip to start of data.
3901 addptr(rdi, Array<Klass*>::base_offset_in_bytes());
3902
3903 // Scan RCX words at [RDI] for an occurrence of RAX.
3904 // Set NZ/Z based on last compare.
3905 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does
3906 // not change flags (only scas instruction which is repeated sets flags).
3907 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found.
3908
3909 testptr(rax,rax); // Set Z = 0
3910 repne_scan();
3911
3912 // Unspill the temp. registers:
3913 if (pushed_rdi) pop(rdi);
3914 if (pushed_rcx) pop(rcx);
3915 if (pushed_rax) pop(rax);
3916
3917 if (set_cond_codes) {
3918 // Special hack for the AD files: rdi is guaranteed non-zero.
3919 assert(!pushed_rdi, "rdi must be left non-NULL")do { if (!(!pushed_rdi)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3919, "assert(" "!pushed_rdi" ") failed", "rdi must be left non-NULL"
); ::breakpoint(); } } while (0)
;
3920 // Also, the condition codes are properly set Z/NZ on succeed/failure.
3921 }
3922
3923 if (L_failure == &L_fallthrough)
3924 jccb(Assembler::notEqual, *L_failure)jccb_0(Assembler::notEqual, *L_failure, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3924)
;
3925 else jcc(Assembler::notEqual, *L_failure);
3926
3927 // Success. Cache the super we found and proceed in triumph.
3928 movptr(super_cache_addr, super_klass);
3929
3930 if (L_success != &L_fallthrough) {
3931 jmp(*L_success);
3932 }
3933
3934#undef IS_A_TEMP
3935
3936 bind(L_fallthrough);
3937}
3938
3939void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
3940 assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required")do { if (!(L_fast_path != __null || L_slow_path != __null)) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3940, "assert(" "L_fast_path != __null || L_slow_path != __null"
") failed", "at least one is required"); ::breakpoint(); } }
while (0)
;
3941
3942 Label L_fallthrough;
3943 if (L_fast_path == NULL__null) {
3944 L_fast_path = &L_fallthrough;
3945 } else if (L_slow_path == NULL__null) {
3946 L_slow_path = &L_fallthrough;
3947 }
3948
3949 // Fast path check: class is fully initialized
3950 cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3951 jcc(Assembler::equal, *L_fast_path);
3952
3953 // Fast path check: current thread is initializer thread
3954 cmpptr(thread, Address(klass, InstanceKlass::init_thread_offset()));
3955 if (L_slow_path == &L_fallthrough) {
3956 jcc(Assembler::equal, *L_fast_path);
3957 bind(*L_slow_path);
3958 } else if (L_fast_path == &L_fallthrough) {
3959 jcc(Assembler::notEqual, *L_slow_path);
3960 bind(*L_fast_path);
3961 } else {
3962 Unimplemented()do { (*g_assert_poison) = 'X';; report_unimplemented("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3962); ::breakpoint(); } while (0)
;
3963 }
3964}
3965
3966void MacroAssembler::cmov32(Condition cc, Register dst, Address src) {
3967 if (VM_Version::supports_cmov()) {
3968 cmovl(cc, dst, src);
3969 } else {
3970 Label L;
3971 jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3971)
;
3972 movl(dst, src);
3973 bind(L);
3974 }
3975}
3976
3977void MacroAssembler::cmov32(Condition cc, Register dst, Register src) {
3978 if (VM_Version::supports_cmov()) {
3979 cmovl(cc, dst, src);
3980 } else {
3981 Label L;
3982 jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 3982)
;
3983 movl(dst, src);
3984 bind(L);
3985 }
3986}
3987
3988void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
3989 if (!VerifyOops) return;
3990
3991 // Pass register number to verify_oop_subroutine
3992 const char* b = NULL__null;
3993 {
3994 ResourceMark rm;
3995 stringStream ss;
3996 ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
3997 b = code_string(ss.as_string());
3998 }
3999 BLOCK_COMMENT("verify_oop {");
4000#ifdef _LP641
4001 push(rscratch1); // save r10, trashed by movptr()
4002#endif
4003 push(rax); // save rax,
4004 push(reg); // pass register argument
4005 ExternalAddress buffer((address) b);
4006 // avoid using pushptr, as it modifies scratch registers
4007 // and our contract is not to modify anything
4008 movptr(rax, buffer.addr());
4009 push(rax);
4010 // call indirectly to solve generation ordering problem
4011 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4012 call(rax);
4013 // Caller pops the arguments (oop, message) and restores rax, r10
4014 BLOCK_COMMENT("} verify_oop");
4015}
4016
4017void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
4018 if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
4019 vpternlogd(dst, 0xFF, dst, dst, vector_len);
4020 } else {
4021 assert(UseAVX > 0, "")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4021, "assert(" "UseAVX > 0" ") failed", ""); ::breakpoint
(); } } while (0)
;
4022 vpcmpeqb(dst, dst, dst, vector_len);
4023 }
4024}
4025
4026Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
4027 int extra_slot_offset) {
4028 // cf. TemplateTable::prepare_invoke(), if (load_receiver).
4029 int stackElementSize = Interpreter::stackElementSize;
4030 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
4031#ifdef ASSERT1
4032 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
4033 assert(offset1 - offset == stackElementSize, "correct arithmetic")do { if (!(offset1 - offset == stackElementSize)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4033, "assert(" "offset1 - offset == stackElementSize" ") failed"
, "correct arithmetic"); ::breakpoint(); } } while (0)
;
4034#endif
4035 Register scale_reg = noreg;
4036 Address::ScaleFactor scale_factor = Address::no_scale;
4037 if (arg_slot.is_constant()) {
4038 offset += arg_slot.as_constant() * stackElementSize;
4039 } else {
4040 scale_reg = arg_slot.as_register();
4041 scale_factor = Address::times(stackElementSize);
4042 }
4043 offset += wordSize; // return PC is on stack
4044 return Address(rsp, scale_reg, scale_factor, offset);
4045}
4046
4047void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
4048 if (!VerifyOops) return;
4049
4050 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
4051 // Pass register number to verify_oop_subroutine
4052 const char* b = NULL__null;
4053 {
4054 ResourceMark rm;
4055 stringStream ss;
4056 ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
4057 b = code_string(ss.as_string());
4058 }
4059#ifdef _LP641
4060 push(rscratch1); // save r10, trashed by movptr()
4061#endif
4062 push(rax); // save rax,
4063 // addr may contain rsp so we will have to adjust it based on the push
4064 // we just did (and on 64 bit we do two pushes)
4065 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which
4066 // stores rax into addr which is backwards of what was intended.
4067 if (addr.uses(rsp)) {
4068 lea(rax, addr);
4069 pushptr(Address(rax, LP64_ONLY(2 *)2 * BytesPerWord));
4070 } else {
4071 pushptr(addr);
4072 }
4073
4074 ExternalAddress buffer((address) b);
4075 // pass msg argument
4076 // avoid using pushptr, as it modifies scratch registers
4077 // and our contract is not to modify anything
4078 movptr(rax, buffer.addr());
4079 push(rax);
4080
4081 // call indirectly to solve generation ordering problem
4082 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
4083 call(rax);
4084 // Caller pops the arguments (addr, message) and restores rax, r10.
4085}
4086
4087void MacroAssembler::verify_tlab() {
4088#ifdef ASSERT1
4089 if (UseTLAB && VerifyOops) {
4090 Label next, ok;
4091 Register t1 = rsi;
4092 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread)r15_thread;
4093
4094 push(t1);
4095 NOT_LP64(push(thread_reg));
4096 NOT_LP64(get_thread(thread_reg));
4097
4098 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4099 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
4100 jcc(Assembler::aboveEqual, next);
4101 STOP("assert(top >= start)")block_comment("assert(top >= start)"); stop("assert(top >= start)"
)
;
4102 should_not_reach_here();
4103
4104 bind(next);
4105 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
4106 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
4107 jcc(Assembler::aboveEqual, ok);
4108 STOP("assert(top <= end)")block_comment("assert(top <= end)"); stop("assert(top <= end)"
)
;
4109 should_not_reach_here();
4110
4111 bind(ok);
4112 NOT_LP64(pop(thread_reg));
4113 pop(t1);
4114 }
4115#endif
4116}
4117
4118class ControlWord {
4119 public:
4120 int32_t _value;
4121
4122 int rounding_control() const { return (_value >> 10) & 3 ; }
4123 int precision_control() const { return (_value >> 8) & 3 ; }
4124 bool precision() const { return ((_value >> 5) & 1) != 0; }
4125 bool underflow() const { return ((_value >> 4) & 1) != 0; }
4126 bool overflow() const { return ((_value >> 3) & 1) != 0; }
4127 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
4128 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
4129 bool invalid() const { return ((_value >> 0) & 1) != 0; }
4130
4131 void print() const {
4132 // rounding control
4133 const char* rc;
4134 switch (rounding_control()) {
4135 case 0: rc = "round near"; break;
4136 case 1: rc = "round down"; break;
4137 case 2: rc = "round up "; break;
4138 case 3: rc = "chop "; break;
4139 default:
4140 rc = NULL__null; // silence compiler warnings
4141 fatal("Unknown rounding control: %d", rounding_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4141, "Unknown rounding control: %d", rounding_control()); ::
breakpoint(); } while (0)
;
4142 };
4143 // precision control
4144 const char* pc;
4145 switch (precision_control()) {
4146 case 0: pc = "24 bits "; break;
4147 case 1: pc = "reserved"; break;
4148 case 2: pc = "53 bits "; break;
4149 case 3: pc = "64 bits "; break;
4150 default:
4151 pc = NULL__null; // silence compiler warnings
4152 fatal("Unknown precision control: %d", precision_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4152, "Unknown precision control: %d", precision_control())
; ::breakpoint(); } while (0)
;
4153 };
4154 // flags
4155 char f[9];
4156 f[0] = ' ';
4157 f[1] = ' ';
4158 f[2] = (precision ()) ? 'P' : 'p';
4159 f[3] = (underflow ()) ? 'U' : 'u';
4160 f[4] = (overflow ()) ? 'O' : 'o';
4161 f[5] = (zero_divide ()) ? 'Z' : 'z';
4162 f[6] = (denormalized()) ? 'D' : 'd';
4163 f[7] = (invalid ()) ? 'I' : 'i';
4164 f[8] = '\x0';
4165 // output
4166 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc);
4167 }
4168
4169};
4170
4171class StatusWord {
4172 public:
4173 int32_t _value;
4174
4175 bool busy() const { return ((_value >> 15) & 1) != 0; }
4176 bool C3() const { return ((_value >> 14) & 1) != 0; }
4177 bool C2() const { return ((_value >> 10) & 1) != 0; }
4178 bool C1() const { return ((_value >> 9) & 1) != 0; }
4179 bool C0() const { return ((_value >> 8) & 1) != 0; }
4180 int top() const { return (_value >> 11) & 7 ; }
4181 bool error_status() const { return ((_value >> 7) & 1) != 0; }
4182 bool stack_fault() const { return ((_value >> 6) & 1) != 0; }
4183 bool precision() const { return ((_value >> 5) & 1) != 0; }
4184 bool underflow() const { return ((_value >> 4) & 1) != 0; }
4185 bool overflow() const { return ((_value >> 3) & 1) != 0; }
4186 bool zero_divide() const { return ((_value >> 2) & 1) != 0; }
4187 bool denormalized() const { return ((_value >> 1) & 1) != 0; }
4188 bool invalid() const { return ((_value >> 0) & 1) != 0; }
4189
4190 void print() const {
4191 // condition codes
4192 char c[5];
4193 c[0] = (C3()) ? '3' : '-';
4194 c[1] = (C2()) ? '2' : '-';
4195 c[2] = (C1()) ? '1' : '-';
4196 c[3] = (C0()) ? '0' : '-';
4197 c[4] = '\x0';
4198 // flags
4199 char f[9];
4200 f[0] = (error_status()) ? 'E' : '-';
4201 f[1] = (stack_fault ()) ? 'S' : '-';
4202 f[2] = (precision ()) ? 'P' : '-';
4203 f[3] = (underflow ()) ? 'U' : '-';
4204 f[4] = (overflow ()) ? 'O' : '-';
4205 f[5] = (zero_divide ()) ? 'Z' : '-';
4206 f[6] = (denormalized()) ? 'D' : '-';
4207 f[7] = (invalid ()) ? 'I' : '-';
4208 f[8] = '\x0';
4209 // output
4210 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top());
4211 }
4212
4213};
4214
4215class TagWord {
4216 public:
4217 int32_t _value;
4218
4219 int tag_at(int i) const { return (_value >> (i*2)) & 3; }
4220
4221 void print() const {
4222 printf("%04x", _value & 0xFFFF);
4223 }
4224
4225};
4226
4227class FPU_Register {
4228 public:
4229 int32_t _m0;
4230 int32_t _m1;
4231 int16_t _ex;
4232
4233 bool is_indefinite() const {
4234 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0;
4235 }
4236
4237 void print() const {
4238 char sign = (_ex < 0) ? '-' : '+';
4239 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " ";
4240 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind);
4241 };
4242
4243};
4244
4245class FPU_State {
4246 public:
4247 enum {
4248 register_size = 10,
4249 number_of_registers = 8,
4250 register_mask = 7
4251 };
4252
4253 ControlWord _control_word;
4254 StatusWord _status_word;
4255 TagWord _tag_word;
4256 int32_t _error_offset;
4257 int32_t _error_selector;
4258 int32_t _data_offset;
4259 int32_t _data_selector;
4260 int8_t _register[register_size * number_of_registers];
4261
4262 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); }
4263 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; }
4264
4265 const char* tag_as_string(int tag) const {
4266 switch (tag) {
4267 case 0: return "valid";
4268 case 1: return "zero";
4269 case 2: return "special";
4270 case 3: return "empty";
4271 }
4272 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4272); ::breakpoint(); } while (0)
;
4273 return NULL__null;
4274 }
4275
4276 void print() const {
4277 // print computation registers
4278 { int t = _status_word.top();
4279 for (int i = 0; i < number_of_registers; i++) {
4280 int j = (i - t) & register_mask;
4281 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j);
4282 st(j)->print();
4283 printf(" %s\n", tag_as_string(_tag_word.tag_at(i)));
4284 }
4285 }
4286 printf("\n");
4287 // print control registers
4288 printf("ctrl = "); _control_word.print(); printf("\n");
4289 printf("stat = "); _status_word .print(); printf("\n");
4290 printf("tags = "); _tag_word .print(); printf("\n");
4291 }
4292
4293};
4294
4295class Flag_Register {
4296 public:
4297 int32_t _value;
4298
4299 bool overflow() const { return ((_value >> 11) & 1) != 0; }
4300 bool direction() const { return ((_value >> 10) & 1) != 0; }
4301 bool sign() const { return ((_value >> 7) & 1) != 0; }
4302 bool zero() const { return ((_value >> 6) & 1) != 0; }
4303 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; }
4304 bool parity() const { return ((_value >> 2) & 1) != 0; }
4305 bool carry() const { return ((_value >> 0) & 1) != 0; }
4306
4307 void print() const {
4308 // flags
4309 char f[8];
4310 f[0] = (overflow ()) ? 'O' : '-';
4311 f[1] = (direction ()) ? 'D' : '-';
4312 f[2] = (sign ()) ? 'S' : '-';
4313 f[3] = (zero ()) ? 'Z' : '-';
4314 f[4] = (auxiliary_carry()) ? 'A' : '-';
4315 f[5] = (parity ()) ? 'P' : '-';
4316 f[6] = (carry ()) ? 'C' : '-';
4317 f[7] = '\x0';
4318 // output
4319 printf("%08x flags = %s", _value, f);
4320 }
4321
4322};
4323
4324class IU_Register {
4325 public:
4326 int32_t _value;
4327
4328 void print() const {
4329 printf("%08x %11d", _value, _value);
4330 }
4331
4332};
4333
4334class IU_State {
4335 public:
4336 Flag_Register _eflags;
4337 IU_Register _rdi;
4338 IU_Register _rsi;
4339 IU_Register _rbp;
4340 IU_Register _rsp;
4341 IU_Register _rbx;
4342 IU_Register _rdx;
4343 IU_Register _rcx;
4344 IU_Register _rax;
4345
4346 void print() const {
4347 // computation registers
4348 printf("rax, = "); _rax.print(); printf("\n");
4349 printf("rbx, = "); _rbx.print(); printf("\n");
4350 printf("rcx = "); _rcx.print(); printf("\n");
4351 printf("rdx = "); _rdx.print(); printf("\n");
4352 printf("rdi = "); _rdi.print(); printf("\n");
4353 printf("rsi = "); _rsi.print(); printf("\n");
4354 printf("rbp, = "); _rbp.print(); printf("\n");
4355 printf("rsp = "); _rsp.print(); printf("\n");
4356 printf("\n");
4357 // control registers
4358 printf("flgs = "); _eflags.print(); printf("\n");
4359 }
4360};
4361
4362
4363class CPU_State {
4364 public:
4365 FPU_State _fpu_state;
4366 IU_State _iu_state;
4367
4368 void print() const {
4369 printf("--------------------------------------------------\n");
4370 _iu_state .print();
4371 printf("\n");
4372 _fpu_state.print();
4373 printf("--------------------------------------------------\n");
4374 }
4375
4376};
4377
4378
4379static void _print_CPU_state(CPU_State* state) {
4380 state->print();
4381};
4382
4383
4384void MacroAssembler::print_CPU_state() {
4385 push_CPU_state();
4386 push(rsp); // pass CPU state
4387 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)((address)((address_word)(_print_CPU_state)))));
4388 addptr(rsp, wordSize); // discard argument
4389 pop_CPU_state();
4390}
4391
4392
4393#ifndef _LP641
4394static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) {
4395 static int counter = 0;
4396 FPU_State* fs = &state->_fpu_state;
4397 counter++;
4398 // For leaf calls, only verify that the top few elements remain empty.
4399 // We only need 1 empty at the top for C2 code.
4400 if( stack_depth < 0 ) {
4401 if( fs->tag_for_st(7) != 3 ) {
4402 printf("FPR7 not empty\n");
4403 state->print();
4404 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4404, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4405 return false;
4406 }
4407 return true; // All other stack states do not matter
4408 }
4409
4410 assert((fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std(),do { if (!((fs->_control_word._value & 0xffff) == StubRoutines
::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()"
") failed", "bad FPU control word"); ::breakpoint(); } } while
(0)
4411 "bad FPU control word")do { if (!((fs->_control_word._value & 0xffff) == StubRoutines
::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()"
") failed", "bad FPU control word"); ::breakpoint(); } } while
(0)
;
4412
4413 // compute stack depth
4414 int i = 0;
4415 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++;
4416 int d = i;
4417 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++;
4418 // verify findings
4419 if (i != FPU_State::number_of_registers) {
4420 // stack not contiguous
4421 printf("%s: stack not contiguous at ST%d\n", s, i);
4422 state->print();
4423 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4423, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4424 return false;
4425 }
4426 // check if computed stack depth corresponds to expected stack depth
4427 if (stack_depth < 0) {
4428 // expected stack depth is -stack_depth or less
4429 if (d > -stack_depth) {
4430 // too many elements on the stack
4431 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d);
4432 state->print();
4433 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4433, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4434 return false;
4435 }
4436 } else {
4437 // expected stack depth is stack_depth
4438 if (d != stack_depth) {
4439 // wrong stack depth
4440 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d);
4441 state->print();
4442 assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4442, "assert(" "false" ") failed", "error"); ::breakpoint(
); } } while (0)
;
4443 return false;
4444 }
4445 }
4446 // everything is cool
4447 return true;
4448}
4449
4450void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
4451 if (!VerifyFPU) return;
4452 push_CPU_state();
4453 push(rsp); // pass CPU state
4454 ExternalAddress msg((address) s);
4455 // pass message string s
4456 pushptr(msg.addr());
4457 push(stack_depth); // pass stack depth
4458 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)((address)((address_word)(_verify_FPU)))));
4459 addptr(rsp, 3 * wordSize); // discard arguments
4460 // check for error
4461 { Label L;
4462 testl(rax, rax);
4463 jcc(Assembler::notZero, L);
4464 int3(); // break if error condition
4465 bind(L);
4466 }
4467 pop_CPU_state();
4468}
4469#endif // _LP64
4470
4471void MacroAssembler::restore_cpu_control_state_after_jni() {
4472 // Either restore the MXCSR register after returning from the JNI Call
4473 // or verify that it wasn't changed (with -Xcheck:jni flag).
4474 if (VM_Version::supports_sse()) {
4475 if (RestoreMXCSROnJNICalls) {
4476 ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()));
4477 } else if (CheckJNICalls) {
4478 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry()));
4479 }
4480 }
4481 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
4482 vzeroupper();
4483 // Reset k1 to 0xffff.
4484
4485#ifdef COMPILER21
4486 if (PostLoopMultiversioning && VM_Version::supports_evex()) {
4487 push(rcx);
4488 movl(rcx, 0xffff);
4489 kmovwl(k1, rcx);
4490 pop(rcx);
4491 }
4492#endif // COMPILER2
4493
4494#ifndef _LP641
4495 // Either restore the x87 floating pointer control word after returning
4496 // from the JNI call or verify that it wasn't changed.
4497 if (CheckJNICalls) {
4498 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry()));
4499 }
4500#endif // _LP64
4501}
4502
4503// ((OopHandle)result).resolve();
4504void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
4505 assert_different_registers(result, tmp);
4506
4507 // Only 64 bit platforms support GCs that require a tmp register
4508 // Only IN_HEAP loads require a thread_tmp register
4509 // OopHandle::resolve is an indirection like jobject.
4510 access_load_at(T_OBJECT, IN_NATIVE,
4511 result, Address(result, 0), tmp, /*tmp_thread*/noreg);
4512}
4513
4514// ((WeakHandle)result).resolve();
4515void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) {
4516 assert_different_registers(rresult, rtmp);
4517 Label resolved;
4518
4519 // A null weak handle resolves to null.
4520 cmpptr(rresult, 0);
4521 jcc(Assembler::equal, resolved);
4522
4523 // Only 64 bit platforms support GCs that require a tmp register
4524 // Only IN_HEAP loads require a thread_tmp register
4525 // WeakHandle::resolve is an indirection like jweak.
4526 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
4527 rresult, Address(rresult, 0), rtmp, /*tmp_thread*/noreg);
4528 bind(resolved);
4529}
4530
4531void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
4532 // get mirror
4533 const int mirror_offset = in_bytes(Klass::java_mirror_offset());
4534 load_method_holder(mirror, method);
4535 movptr(mirror, Address(mirror, mirror_offset));
4536 resolve_oop_handle(mirror, tmp);
4537}
4538
4539void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) {
4540 load_method_holder(rresult, rmethod);
4541 movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset()));
4542}
4543
4544void MacroAssembler::load_method_holder(Register holder, Register method) {
4545 movptr(holder, Address(method, Method::const_offset())); // ConstMethod*
4546 movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool*
4547 movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
4548}
4549
4550void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
4551 assert_different_registers(src, tmp);
4552 assert_different_registers(dst, tmp);
4553#ifdef _LP641
4554 if (UseCompressedClassPointers) {
4555 movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4556 decode_klass_not_null(dst, tmp);
4557 } else
4558#endif
4559 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
4560}
4561
4562void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
4563 assert_different_registers(src, tmp);
4564 assert_different_registers(dst, tmp);
4565#ifdef _LP641
4566 if (UseCompressedClassPointers) {
4567 encode_klass_not_null(src, tmp);
4568 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4569 } else
4570#endif
4571 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
4572}
4573
4574void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
4575 Register tmp1, Register thread_tmp) {
4576 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4577 decorators = AccessInternal::decorator_fixup(decorators);
4578 bool as_raw = (decorators & AS_RAW) != 0;
4579 if (as_raw) {
4580 bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4581 } else {
4582 bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
4583 }
4584}
4585
4586void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
4587 Register tmp1, Register tmp2) {
4588 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
4589 decorators = AccessInternal::decorator_fixup(decorators);
4590 bool as_raw = (decorators & AS_RAW) != 0;
4591 if (as_raw) {
4592 bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
4593 } else {
4594 bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
4595 }
4596}
4597
4598void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
4599 Register thread_tmp, DecoratorSet decorators) {
4600 access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
4601}
4602
4603// Doesn't do verfication, generates fixed size code
4604void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
4605 Register thread_tmp, DecoratorSet decorators) {
4606 access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
4607}
4608
4609void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
4610 Register tmp2, DecoratorSet decorators) {
4611 access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
4612}
4613
4614// Used for storing NULLs.
4615void MacroAssembler::store_heap_oop_null(Address dst) {
4616 access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
4617}
4618
4619#ifdef _LP641
4620void MacroAssembler::store_klass_gap(Register dst, Register src) {
4621 if (UseCompressedClassPointers) {
4622 // Store to klass gap in destination
4623 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
4624 }
4625}
4626
4627#ifdef ASSERT1
4628void MacroAssembler::verify_heapbase(const char* msg) {
4629 assert (UseCompressedOops, "should be compressed")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4629, "assert(" "UseCompressedOops" ") failed", "should be compressed"
); ::breakpoint(); } } while (0)
;
4630 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4630, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4631 if (CheckCompressedOops) {
4632 Label ok;
4633 push(rscratch1); // cmpptr trashes rscratch1
4634 cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4635 jcc(Assembler::equal, ok);
4636 STOP(msg)block_comment(msg); stop(msg);
4637 bind(ok);
4638 pop(rscratch1);
4639 }
4640}
4641#endif
4642
4643// Algorithm must match oop.inline.hpp encode_heap_oop.
4644void MacroAssembler::encode_heap_oop(Register r) {
4645#ifdef ASSERT1
4646 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
4647#endif
4648 verify_oop_msg(r, "broken oop in encode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4648)
;
4649 if (CompressedOops::base() == NULL__null) {
4650 if (CompressedOops::shift() != 0) {
4651 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4651, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4652 shrq(r, LogMinObjAlignmentInBytes);
4653 }
4654 return;
4655 }
4656 testq(r, r);
4657 cmovq(Assembler::equal, r, r12_heapbase);
4658 subq(r, r12_heapbase);
4659 shrq(r, LogMinObjAlignmentInBytes);
4660}
4661
4662void MacroAssembler::encode_heap_oop_not_null(Register r) {
4663#ifdef ASSERT1
4664 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
4665 if (CheckCompressedOops) {
4666 Label ok;
4667 testq(r, r);
4668 jcc(Assembler::notEqual, ok);
4669 STOP("null oop passed to encode_heap_oop_not_null")block_comment("null oop passed to encode_heap_oop_not_null");
stop("null oop passed to encode_heap_oop_not_null")
;
4670 bind(ok);
4671 }
4672#endif
4673 verify_oop_msg(r, "broken oop in encode_heap_oop_not_null")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop_not_null\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4673)
;
4674 if (CompressedOops::base() != NULL__null) {
4675 subq(r, r12_heapbase);
4676 }
4677 if (CompressedOops::shift() != 0) {
4678 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4678, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4679 shrq(r, LogMinObjAlignmentInBytes);
4680 }
4681}
4682
4683void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
4684#ifdef ASSERT1
4685 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
4686 if (CheckCompressedOops) {
4687 Label ok;
4688 testq(src, src);
4689 jcc(Assembler::notEqual, ok);
4690 STOP("null oop passed to encode_heap_oop_not_null2")block_comment("null oop passed to encode_heap_oop_not_null2")
; stop("null oop passed to encode_heap_oop_not_null2")
;
4691 bind(ok);
4692 }
4693#endif
4694 verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2")_verify_oop_checked(src, "broken oop " "src" ", " "\"broken oop in encode_heap_oop_not_null2\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4694)
;
4695 if (dst != src) {
4696 movq(dst, src);
4697 }
4698 if (CompressedOops::base() != NULL__null) {
4699 subq(dst, r12_heapbase);
4700 }
4701 if (CompressedOops::shift() != 0) {
4702 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4702, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4703 shrq(dst, LogMinObjAlignmentInBytes);
4704 }
4705}
4706
4707void MacroAssembler::decode_heap_oop(Register r) {
4708#ifdef ASSERT1
4709 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
4710#endif
4711 if (CompressedOops::base() == NULL__null) {
4712 if (CompressedOops::shift() != 0) {
4713 assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4713, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4714 shlq(r, LogMinObjAlignmentInBytes);
4715 }
4716 } else {
4717 Label done;
4718 shlq(r, LogMinObjAlignmentInBytes);
4719 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4719)
;
4720 addq(r, r12_heapbase);
4721 bind(done);
4722 }
4723 verify_oop_msg(r, "broken oop in decode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in decode_heap_oop\""
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4723)
;
4724}
4725
4726void MacroAssembler::decode_heap_oop_not_null(Register r) {
4727 // Note: it will change flags
4728 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4728, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4729 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4729, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4730 // Cannot assert, unverified entry point counts instructions (see .ad file)
4731 // vtableStubs also counts instructions in pd_code_size_limit.
4732 // Also do not verify_oop as this is called by verify_oop.
4733 if (CompressedOops::shift() != 0) {
4734 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4734, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4735 shlq(r, LogMinObjAlignmentInBytes);
4736 if (CompressedOops::base() != NULL__null) {
4737 addq(r, r12_heapbase);
4738 }
4739 } else {
4740 assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4740, "assert(" "CompressedOops::base() == __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
4741 }
4742}
4743
4744void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
4745 // Note: it will change flags
4746 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4746, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4747 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4747, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4748 // Cannot assert, unverified entry point counts instructions (see .ad file)
4749 // vtableStubs also counts instructions in pd_code_size_limit.
4750 // Also do not verify_oop as this is called by verify_oop.
4751 if (CompressedOops::shift() != 0) {
4752 assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4752, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4753 if (LogMinObjAlignmentInBytes == Address::times_8) {
4754 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
4755 } else {
4756 if (dst != src) {
4757 movq(dst, src);
4758 }
4759 shlq(dst, LogMinObjAlignmentInBytes);
4760 if (CompressedOops::base() != NULL__null) {
4761 addq(dst, r12_heapbase);
4762 }
4763 }
4764 } else {
4765 assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4765, "assert(" "CompressedOops::base() == __null" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
4766 if (dst != src) {
4767 movq(dst, src);
4768 }
4769 }
4770}
4771
4772void MacroAssembler::encode_klass_not_null(Register r, Register tmp) {
4773 assert_different_registers(r, tmp);
4774 if (CompressedKlassPointers::base() != NULL__null) {
4775 mov64(tmp, (int64_t)CompressedKlassPointers::base());
4776 subq(r, tmp);
4777 }
4778 if (CompressedKlassPointers::shift() != 0) {
4779 assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4779, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4780 shrq(r, LogKlassAlignmentInBytes);
4781 }
4782}
4783
4784void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) {
4785 assert_different_registers(src, dst);
4786 if (CompressedKlassPointers::base() != NULL__null) {
4787 mov64(dst, -(int64_t)CompressedKlassPointers::base());
4788 addq(dst, src);
4789 } else {
4790 movptr(dst, src);
4791 }
4792 if (CompressedKlassPointers::shift() != 0) {
4793 assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4793, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4794 shrq(dst, LogKlassAlignmentInBytes);
4795 }
4796}
4797
4798void MacroAssembler::decode_klass_not_null(Register r, Register tmp) {
4799 assert_different_registers(r, tmp);
4800 // Note: it will change flags
4801 assert(UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4801, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4802 // Cannot assert, unverified entry point counts instructions (see .ad file)
4803 // vtableStubs also counts instructions in pd_code_size_limit.
4804 // Also do not verify_oop as this is called by verify_oop.
4805 if (CompressedKlassPointers::shift() != 0) {
4806 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4806, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4807 shlq(r, LogKlassAlignmentInBytes);
4808 }
4809 if (CompressedKlassPointers::base() != NULL__null) {
4810 mov64(tmp, (int64_t)CompressedKlassPointers::base());
4811 addq(r, tmp);
4812 }
4813}
4814
4815void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) {
4816 assert_different_registers(src, dst);
4817 // Note: it will change flags
4818 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4818, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4819 // Cannot assert, unverified entry point counts instructions (see .ad file)
4820 // vtableStubs also counts instructions in pd_code_size_limit.
4821 // Also do not verify_oop as this is called by verify_oop.
4822
4823 if (CompressedKlassPointers::base() == NULL__null &&
4824 CompressedKlassPointers::shift() == 0) {
4825 // The best case scenario is that there is no base or shift. Then it is already
4826 // a pointer that needs nothing but a register rename.
4827 movl(dst, src);
4828 } else {
4829 if (CompressedKlassPointers::base() != NULL__null) {
4830 mov64(dst, (int64_t)CompressedKlassPointers::base());
4831 } else {
4832 xorq(dst, dst);
4833 }
4834 if (CompressedKlassPointers::shift() != 0) {
4835 assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers
::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4835, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()"
") failed", "decode alg wrong"); ::breakpoint(); } } while (
0)
;
4836 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?")do { if (!(LogKlassAlignmentInBytes == Address::times_8)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4836, "assert(" "LogKlassAlignmentInBytes == Address::times_8"
") failed", "klass not aligned on 64bits?"); ::breakpoint();
} } while (0)
;
4837 leaq(dst, Address(dst, src, Address::times_8, 0));
4838 } else {
4839 addq(dst, src);
4840 }
4841 }
4842}
4843
4844void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
4845 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4845, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4846 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4846, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4847 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4847, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4848 int oop_index = oop_recorder()->find_index(obj);
4849 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4850 mov_narrow_oop(dst, oop_index, rspec);
4851}
4852
4853void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
4854 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4854, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4855 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4855, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4856 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4856, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4857 int oop_index = oop_recorder()->find_index(obj);
4858 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4859 mov_narrow_oop(dst, oop_index, rspec);
4860}
4861
4862void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
4863 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4863, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4864 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4864, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4865 int klass_index = oop_recorder()->find_index(k);
4866 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4867 mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4868}
4869
4870void MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
4871 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4871, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4872 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4872, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4873 int klass_index = oop_recorder()->find_index(k);
4874 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4875 mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4876}
4877
4878void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
4879 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4879, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4880 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4880, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4881 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4881, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4882 int oop_index = oop_recorder()->find_index(obj);
4883 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4884 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
4885}
4886
4887void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
4888 assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4888, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4889 assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4889, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized"
); ::breakpoint(); } } while (0)
;
4890 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4890, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4891 int oop_index = oop_recorder()->find_index(obj);
4892 RelocationHolder rspec = oop_Relocation::spec(oop_index);
4893 Assembler::cmp_narrow_oop(dst, oop_index, rspec);
4894}
4895
4896void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
4897 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4897, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4898 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4898, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4899 int klass_index = oop_recorder()->find_index(k);
4900 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4901 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4902}
4903
4904void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
4905 assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4905, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers"
); ::breakpoint(); } } while (0)
;
4906 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4906, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder"
); ::breakpoint(); } } while (0)
;
4907 int klass_index = oop_recorder()->find_index(k);
4908 RelocationHolder rspec = metadata_Relocation::spec(klass_index);
4909 Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec);
4910}
4911
4912void MacroAssembler::reinit_heapbase() {
4913 if (UseCompressedOops) {
4914 if (Universe::heap() != NULL__null) {
4915 if (CompressedOops::base() == NULL__null) {
4916 MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
4917 } else {
4918 mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base());
4919 }
4920 } else {
4921 movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()));
4922 }
4923 }
4924}
4925
4926#endif // _LP64
4927
4928// C2 compiled method's prolog code.
4929void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
4930
4931 // WARNING: Initial instruction MUST be 5 bytes or longer so that
4932 // NativeJump::patch_verified_entry will be able to patch out the entry
4933 // code safely. The push to verify stack depth is ok at 5 bytes,
4934 // the frame allocation can be either 3 or 6 bytes. So if we don't do
4935 // stack bang then we must use the 6 byte frame allocation even if
4936 // we have no frame. :-(
4937 assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect")do { if (!(stack_bang_size >= framesize || stack_bang_size
<= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4937, "assert(" "stack_bang_size >= framesize || stack_bang_size <= 0"
") failed", "stack bang size incorrect"); ::breakpoint(); } }
while (0)
;
4938
4939 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned")do { if (!((framesize & (StackAlignmentInBytes-1)) == 0))
{ (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 4939, "assert(" "(framesize & (StackAlignmentInBytes-1)) == 0"
") failed", "frame size not aligned"); ::breakpoint(); } } while
(0)
;
4940 // Remove word for return addr
4941 framesize -= wordSize;
4942 stack_bang_size -= wordSize;
4943
4944 // Calls to C2R adapters often do not accept exceptional returns.
4945 // We require that their callers must bang for them. But be careful, because
4946 // some VM calls (such as call site linkage) can use several kilobytes of
4947 // stack. But the stack safety zone should account for that.
4948 // See bugs 4446381, 4468289, 4497237.
4949 if (stack_bang_size > 0) {
4950 generate_stack_overflow_check(stack_bang_size);
4951
4952 // We always push rbp, so that on return to interpreter rbp, will be
4953 // restored correctly and we can correct the stack.
4954 push(rbp);
4955 // Save caller's stack pointer into RBP if the frame pointer is preserved.
4956 if (PreserveFramePointer) {
4957 mov(rbp, rsp);
4958 }
4959 // Remove word for ebp
4960 framesize -= wordSize;
4961
4962 // Create frame
4963 if (framesize) {
4964 subptr(rsp, framesize);
4965 }
4966 } else {
4967 // Create frame (force generation of a 4 byte immediate value)
4968 subptr_imm32(rsp, framesize);
4969
4970 // Save RBP register now.
4971 framesize -= wordSize;
4972 movptr(Address(rsp, framesize), rbp);
4973 // Save caller's stack pointer into RBP if the frame pointer is preserved.
4974 if (PreserveFramePointer) {
4975 movptr(rbp, rsp);
4976 if (framesize > 0) {
4977 addptr(rbp, framesize);
4978 }
4979 }
4980 }
4981
4982 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
4983 framesize -= wordSize;
4984 movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
4985 }
4986
4987#ifndef _LP641
4988 // If method sets FPU control word do it now
4989 if (fp_mode_24b) {
4990 fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
4991 }
4992 if (UseSSE >= 2 && VerifyFPU) {
4993 verify_FPU(0, "FPU stack must be clean on entry");
4994 }
4995#endif
4996
4997#ifdef ASSERT1
4998 if (VerifyStackAtCalls) {
4999 Label L;
5000 push(rax);
5001 mov(rax, rsp);
5002 andptr(rax, StackAlignmentInBytes-1);
5003 cmpptr(rax, StackAlignmentInBytes-wordSize);
5004 pop(rax);
5005 jcc(Assembler::equal, L);
5006 STOP("Stack is not properly aligned!")block_comment("Stack is not properly aligned!"); stop("Stack is not properly aligned!"
)
;
5007 bind(L);
5008 }
5009#endif
5010
5011 if (!is_stub) {
5012 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
5013 bs->nmethod_entry_barrier(this);
5014 }
5015}
5016
5017#if COMPILER2_OR_JVMCI1
5018
5019// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
5020void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5021 // cnt - number of qwords (8-byte words).
5022 // base - start address, qword aligned.
5023 Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
5024 bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
5025 if (use64byteVector) {
5026 vpxor(xtmp, xtmp, xtmp, AVX_512bit);
5027 } else if (MaxVectorSize >= 32) {
5028 vpxor(xtmp, xtmp, xtmp, AVX_256bit);
5029 } else {
5030 pxor(xtmp, xtmp);
5031 }
5032 jmp(L_zero_64_bytes);
5033
5034 BIND(L_loop);
5035 if (MaxVectorSize >= 32) {
5036 fill64(base, 0, xtmp, use64byteVector);
5037 } else {
5038 movdqu(Address(base, 0), xtmp);
5039 movdqu(Address(base, 16), xtmp);
5040 movdqu(Address(base, 32), xtmp);
5041 movdqu(Address(base, 48), xtmp);
5042 }
5043 addptr(base, 64);
5044
5045 BIND(L_zero_64_bytes);
5046 subptr(cnt, 8);
5047 jccb(Assembler::greaterEqual, L_loop)jccb_0(Assembler::greaterEqual, L_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5047)
;
5048
5049 // Copy trailing 64 bytes
5050 if (use64byteVector) {
5051 addptr(cnt, 8);
5052 jccb(Assembler::equal, L_end)jccb_0(Assembler::equal, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5052)
;
5053 fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true);
5054 jmp(L_end);
5055 } else {
5056 addptr(cnt, 4);
5057 jccb(Assembler::less, L_tail)jccb_0(Assembler::less, L_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5057)
;
5058 if (MaxVectorSize >= 32) {
5059 vmovdqu(Address(base, 0), xtmp);
5060 } else {
5061 movdqu(Address(base, 0), xtmp);
5062 movdqu(Address(base, 16), xtmp);
5063 }
5064 }
5065 addptr(base, 32);
5066 subptr(cnt, 4);
5067
5068 BIND(L_tail);
5069 addptr(cnt, 4);
5070 jccb(Assembler::lessEqual, L_end)jccb_0(Assembler::lessEqual, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5070)
;
5071 if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
5072 fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp);
5073 } else {
5074 decrement(cnt);
5075
5076 BIND(L_sloop);
5077 movq(Address(base, 0), xtmp);
5078 addptr(base, 8);
5079 decrement(cnt);
5080 jccb(Assembler::greaterEqual, L_sloop)jccb_0(Assembler::greaterEqual, L_sloop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5080)
;
5081 }
5082 BIND(L_end);
5083}
5084
5085// Clearing constant sized memory using YMM/ZMM registers.
5086void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
5087 assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "")do { if (!(UseAVX > 2 && VM_Version::supports_avx512vlbw
())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5087, "assert(" "UseAVX > 2 && VM_Version::supports_avx512vlbw()"
") failed", ""); ::breakpoint(); } } while (0)
;
5088 bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);
5089
5090 int vector64_count = (cnt & (~0x7)) >> 3;
5091 cnt = cnt & 0x7;
5092
5093 // 64 byte initialization loop.
5094 vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit);
5095 for (int i = 0; i < vector64_count; i++) {
5096 fill64(base, i * 64, xtmp, use64byteVector);
5097 }
5098
5099 // Clear remaining 64 byte tail.
5100 int disp = vector64_count * 64;
5101 if (cnt) {
5102 switch (cnt) {
5103 case 1:
5104 movq(Address(base, disp), xtmp);
5105 break;
5106 case 2:
5107 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_128bit);
5108 break;
5109 case 3:
5110 movl(rtmp, 0x7);
5111 kmovwl(mask, rtmp);
5112 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit);
5113 break;
5114 case 4:
5115 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5116 break;
5117 case 5:
5118 if (use64byteVector) {
5119 movl(rtmp, 0x1F);
5120 kmovwl(mask, rtmp);
5121 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5122 } else {
5123 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5124 movq(Address(base, disp + 32), xtmp);
5125 }
5126 break;
5127 case 6:
5128 if (use64byteVector) {
5129 movl(rtmp, 0x3F);
5130 kmovwl(mask, rtmp);
5131 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5132 } else {
5133 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5134 evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit);
5135 }
5136 break;
5137 case 7:
5138 if (use64byteVector) {
5139 movl(rtmp, 0x7F);
5140 kmovwl(mask, rtmp);
5141 evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
5142 } else {
5143 evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
5144 movl(rtmp, 0x7);
5145 kmovwl(mask, rtmp);
5146 evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
5147 }
5148 break;
5149 default:
5150 fatal("Unexpected length : %d\n",cnt)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5150, "Unexpected length : %d\n",cnt); ::breakpoint(); } while
(0)
;
5151 break;
5152 }
5153 }
5154}
5155
5156void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
5157 bool is_large, KRegister mask) {
5158 // cnt - number of qwords (8-byte words).
5159 // base - start address, qword aligned.
5160 // is_large - if optimizers know cnt is larger than InitArrayShortSize
5161 assert(base==rdi, "base register must be edi for rep stos")do { if (!(base==rdi)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5161, "assert(" "base==rdi" ") failed", "base register must be edi for rep stos"
); ::breakpoint(); } } while (0)
;
5162 assert(tmp==rax, "tmp register must be eax for rep stos")do { if (!(tmp==rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5162, "assert(" "tmp==rax" ") failed", "tmp register must be eax for rep stos"
); ::breakpoint(); } } while (0)
;
5163 assert(cnt==rcx, "cnt register must be ecx for rep stos")do { if (!(cnt==rcx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5163, "assert(" "cnt==rcx" ") failed", "cnt register must be ecx for rep stos"
); ::breakpoint(); } } while (0)
;
5164 assert(InitArrayShortSize % BytesPerLong == 0,do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed"
, "InitArrayShortSize should be the multiple of BytesPerLong"
); ::breakpoint(); } } while (0)
5165 "InitArrayShortSize should be the multiple of BytesPerLong")do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed"
, "InitArrayShortSize should be the multiple of BytesPerLong"
); ::breakpoint(); } } while (0)
;
5166
5167 Label DONE;
5168 if (!is_large || !UseXMMForObjInit) {
5169 xorptr(tmp, tmp);
5170 }
5171
5172 if (!is_large) {
5173 Label LOOP, LONG;
5174 cmpptr(cnt, InitArrayShortSize/BytesPerLong);
5175 jccb(Assembler::greater, LONG)jccb_0(Assembler::greater, LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5175)
;
5176
5177 NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5178
5179 decrement(cnt);
5180 jccb(Assembler::negative, DONE)jccb_0(Assembler::negative, DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5180)
; // Zero length
5181
5182 // Use individual pointer-sized stores for small counts:
5183 BIND(LOOP);
5184 movptr(Address(base, cnt, Address::times_ptr), tmp);
5185 decrement(cnt);
5186 jccb(Assembler::greaterEqual, LOOP)jccb_0(Assembler::greaterEqual, LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5186)
;
5187 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5187)
;
5188
5189 BIND(LONG);
5190 }
5191
5192 // Use longer rep-prefixed ops for non-small counts:
5193 if (UseFastStosb) {
5194 shlptr(cnt, 3); // convert to number of bytes
5195 rep_stosb();
5196 } else if (UseXMMForObjInit) {
5197 xmm_clear_mem(base, cnt, tmp, xtmp, mask);
5198 } else {
5199 NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
5200 rep_stos();
5201 }
5202
5203 BIND(DONE);
5204}
5205
5206#endif //COMPILER2_OR_JVMCI
5207
5208
5209void MacroAssembler::generate_fill(BasicType t, bool aligned,
5210 Register to, Register value, Register count,
5211 Register rtmp, XMMRegister xtmp) {
5212 ShortBranchVerifier sbv(this);
5213 assert_different_registers(to, value, count, rtmp);
5214 Label L_exit;
5215 Label L_fill_2_bytes, L_fill_4_bytes;
5216
5217#if defined(COMPILER21) && defined(_LP641)
5218 if(MaxVectorSize >=32 &&
5219 VM_Version::supports_avx512vlbw() &&
5220 VM_Version::supports_bmi2()) {
5221 generate_fill_avx3(t, to, value, count, rtmp, xtmp);
5222 return;
5223 }
5224#endif
5225
5226 int shift = -1;
5227 switch (t) {
5228 case T_BYTE:
5229 shift = 2;
5230 break;
5231 case T_SHORT:
5232 shift = 1;
5233 break;
5234 case T_INT:
5235 shift = 0;
5236 break;
5237 default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5237); ::breakpoint(); } while (0)
;
5238 }
5239
5240 if (t == T_BYTE) {
5241 andl(value, 0xff);
5242 movl(rtmp, value);
5243 shll(rtmp, 8);
5244 orl(value, rtmp);
5245 }
5246 if (t == T_SHORT) {
5247 andl(value, 0xffff);
5248 }
5249 if (t == T_BYTE || t == T_SHORT) {
5250 movl(rtmp, value);
5251 shll(rtmp, 16);
5252 orl(value, rtmp);
5253 }
5254
5255 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
5256 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
5257 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
5258 Label L_skip_align2;
5259 // align source address at 4 bytes address boundary
5260 if (t == T_BYTE) {
5261 Label L_skip_align1;
5262 // One byte misalignment happens only for byte arrays
5263 testptr(to, 1);
5264 jccb(Assembler::zero, L_skip_align1)jccb_0(Assembler::zero, L_skip_align1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5264)
;
5265 movb(Address(to, 0), value);
5266 increment(to);
5267 decrement(count);
5268 BIND(L_skip_align1);
5269 }
5270 // Two bytes misalignment happens only for byte and short (char) arrays
5271 testptr(to, 2);
5272 jccb(Assembler::zero, L_skip_align2)jccb_0(Assembler::zero, L_skip_align2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5272)
;
5273 movw(Address(to, 0), value);
5274 addptr(to, 2);
5275 subl(count, 1<<(shift-1));
5276 BIND(L_skip_align2);
5277 }
5278 if (UseSSE < 2) {
5279 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
5280 // Fill 32-byte chunks
5281 subl(count, 8 << shift);
5282 jcc(Assembler::less, L_check_fill_8_bytes);
5283 align(16);
5284
5285 BIND(L_fill_32_bytes_loop);
5286
5287 for (int i = 0; i < 32; i += 4) {
5288 movl(Address(to, i), value);
5289 }
5290
5291 addptr(to, 32);
5292 subl(count, 8 << shift);
5293 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
5294 BIND(L_check_fill_8_bytes);
5295 addl(count, 8 << shift);
5296 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5296)
;
5297 jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5297)
;
5298
5299 //
5300 // length is too short, just fill qwords
5301 //
5302 BIND(L_fill_8_bytes_loop);
5303 movl(Address(to, 0), value);
5304 movl(Address(to, 4), value);
5305 addptr(to, 8);
5306 BIND(L_fill_8_bytes);
5307 subl(count, 1 << (shift + 1));
5308 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5309 // fall through to fill 4 bytes
5310 } else {
5311 Label L_fill_32_bytes;
5312 if (!UseUnalignedLoadStores) {
5313 // align to 8 bytes, we know we are 4 byte aligned to start
5314 testptr(to, 4);
5315 jccb(Assembler::zero, L_fill_32_bytes)jccb_0(Assembler::zero, L_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5315)
;
5316 movl(Address(to, 0), value);
5317 addptr(to, 4);
5318 subl(count, 1<<shift);
5319 }
5320 BIND(L_fill_32_bytes);
5321 {
5322 assert( UseSSE >= 2, "supported cpu only" )do { if (!(UseSSE >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5322, "assert(" "UseSSE >= 2" ") failed", "supported cpu only"
); ::breakpoint(); } } while (0)
;
5323 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
5324 movdl(xtmp, value);
5325 if (UseAVX >= 2 && UseUnalignedLoadStores) {
5326 Label L_check_fill_32_bytes;
5327 if (UseAVX > 2) {
5328 // Fill 64-byte chunks
5329 Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;
5330
5331 // If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2
5332 cmpl(count, VM_Version::avx3_threshold());
5333 jccb(Assembler::below, L_check_fill_64_bytes_avx2)jccb_0(Assembler::below, L_check_fill_64_bytes_avx2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5333)
;
5334
5335 vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
5336
5337 subl(count, 16 << shift);
5338 jccb(Assembler::less, L_check_fill_32_bytes)jccb_0(Assembler::less, L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5338)
;
5339 align(16);
5340
5341 BIND(L_fill_64_bytes_loop_avx3);
5342 evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit);
5343 addptr(to, 64);
5344 subl(count, 16 << shift);
5345 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3);
5346 jmpb(L_check_fill_32_bytes)jmpb_0(L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5346)
;
5347
5348 BIND(L_check_fill_64_bytes_avx2);
5349 }
5350 // Fill 64-byte chunks
5351 Label L_fill_64_bytes_loop;
5352 vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit);
5353
5354 subl(count, 16 << shift);
5355 jcc(Assembler::less, L_check_fill_32_bytes);
5356 align(16);
5357
5358 BIND(L_fill_64_bytes_loop);
5359 vmovdqu(Address(to, 0), xtmp);
5360 vmovdqu(Address(to, 32), xtmp);
5361 addptr(to, 64);
5362 subl(count, 16 << shift);
5363 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
5364
5365 BIND(L_check_fill_32_bytes);
5366 addl(count, 8 << shift);
5367 jccb(Assembler::less, L_check_fill_8_bytes)jccb_0(Assembler::less, L_check_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5367)
;
5368 vmovdqu(Address(to, 0), xtmp);
5369 addptr(to, 32);
5370 subl(count, 8 << shift);
5371
5372 BIND(L_check_fill_8_bytes);
5373 // clean upper bits of YMM registers
5374 movdl(xtmp, value);
5375 pshufd(xtmp, xtmp, 0);
5376 } else {
5377 // Fill 32-byte chunks
5378 pshufd(xtmp, xtmp, 0);
5379
5380 subl(count, 8 << shift);
5381 jcc(Assembler::less, L_check_fill_8_bytes);
5382 align(16);
5383
5384 BIND(L_fill_32_bytes_loop);
5385
5386 if (UseUnalignedLoadStores) {
5387 movdqu(Address(to, 0), xtmp);
5388 movdqu(Address(to, 16), xtmp);
5389 } else {
5390 movq(Address(to, 0), xtmp);
5391 movq(Address(to, 8), xtmp);
5392 movq(Address(to, 16), xtmp);
5393 movq(Address(to, 24), xtmp);
5394 }
5395
5396 addptr(to, 32);
5397 subl(count, 8 << shift);
5398 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
5399
5400 BIND(L_check_fill_8_bytes);
5401 }
5402 addl(count, 8 << shift);
5403 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5403)
;
5404 jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5404)
;
5405
5406 //
5407 // length is too short, just fill qwords
5408 //
5409 BIND(L_fill_8_bytes_loop);
5410 movq(Address(to, 0), xtmp);
5411 addptr(to, 8);
5412 BIND(L_fill_8_bytes);
5413 subl(count, 1 << (shift + 1));
5414 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
5415 }
5416 }
5417 // fill trailing 4 bytes
5418 BIND(L_fill_4_bytes);
5419 testl(count, 1<<shift);
5420 jccb(Assembler::zero, L_fill_2_bytes)jccb_0(Assembler::zero, L_fill_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5420)
;
5421 movl(Address(to, 0), value);
5422 if (t == T_BYTE || t == T_SHORT) {
5423 Label L_fill_byte;
5424 addptr(to, 4);
5425 BIND(L_fill_2_bytes);
5426 // fill trailing 2 bytes
5427 testl(count, 1<<(shift-1));
5428 jccb(Assembler::zero, L_fill_byte)jccb_0(Assembler::zero, L_fill_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5428)
;
5429 movw(Address(to, 0), value);
5430 if (t == T_BYTE) {
5431 addptr(to, 2);
5432 BIND(L_fill_byte);
5433 // fill trailing byte
5434 testl(count, 1);
5435 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5435)
;
5436 movb(Address(to, 0), value);
5437 } else {
5438 BIND(L_fill_byte);
5439 }
5440 } else {
5441 BIND(L_fill_2_bytes);
5442 }
5443 BIND(L_exit);
5444}
5445
5446void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len) {
5447 switch(type) {
5448 case T_BYTE:
5449 case T_BOOLEAN:
5450 evpbroadcastb(dst, src, vector_len);
5451 break;
5452 case T_SHORT:
5453 case T_CHAR:
5454 evpbroadcastw(dst, src, vector_len);
5455 break;
5456 case T_INT:
5457 case T_FLOAT:
5458 evpbroadcastd(dst, src, vector_len);
5459 break;
5460 case T_LONG:
5461 case T_DOUBLE:
5462 evpbroadcastq(dst, src, vector_len);
5463 break;
5464 default:
5465 fatal("Unhandled type : %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5465, "Unhandled type : %s", type2name(type)); ::breakpoint
(); } while (0)
;
5466 break;
5467 }
5468}
5469
5470// encode char[] to byte[] in ISO_8859_1 or ASCII
5471 //@IntrinsicCandidate
5472 //private static int implEncodeISOArray(byte[] sa, int sp,
5473 //byte[] da, int dp, int len) {
5474 // int i = 0;
5475 // for (; i < len; i++) {
5476 // char c = StringUTF16.getChar(sa, sp++);
5477 // if (c > '\u00FF')
5478 // break;
5479 // da[dp++] = (byte)c;
5480 // }
5481 // return i;
5482 //}
5483 //
5484 //@IntrinsicCandidate
5485 //private static int implEncodeAsciiArray(char[] sa, int sp,
5486 // byte[] da, int dp, int len) {
5487 // int i = 0;
5488 // for (; i < len; i++) {
5489 // char c = sa[sp++];
5490 // if (c >= '\u0080')
5491 // break;
5492 // da[dp++] = (byte)c;
5493 // }
5494 // return i;
5495 //}
5496void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
5497 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
5498 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
5499 Register tmp5, Register result, bool ascii) {
5500
5501 // rsi: src
5502 // rdi: dst
5503 // rdx: len
5504 // rcx: tmp5
5505 // rax: result
5506 ShortBranchVerifier sbv(this);
5507 assert_different_registers(src, dst, len, tmp5, result);
5508 Label L_done, L_copy_1_char, L_copy_1_char_exit;
5509
5510 int mask = ascii ? 0xff80ff80 : 0xff00ff00;
5511 int short_mask = ascii ? 0xff80 : 0xff00;
5512
5513 // set result
5514 xorl(result, result);
5515 // check for zero length
5516 testl(len, len);
5517 jcc(Assembler::zero, L_done);
5518
5519 movl(result, len);
5520
5521 // Setup pointers
5522 lea(src, Address(src, len, Address::times_2)); // char[]
5523 lea(dst, Address(dst, len, Address::times_1)); // byte[]
5524 negptr(len);
5525
5526 if (UseSSE42Intrinsics || UseAVX >= 2) {
5527 Label L_copy_8_chars, L_copy_8_chars_exit;
5528 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
5529
5530 if (UseAVX >= 2) {
5531 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
5532 movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
5533 movdl(tmp1Reg, tmp5);
5534 vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit);
5535 jmp(L_chars_32_check);
5536
5537 bind(L_copy_32_chars);
5538 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
5539 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
5540 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
5541 vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
5542 jccb(Assembler::notZero, L_copy_32_chars_exit)jccb_0(Assembler::notZero, L_copy_32_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5542)
;
5543 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
5544 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
5545 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
5546
5547 bind(L_chars_32_check);
5548 addptr(len, 32);
5549 jcc(Assembler::lessEqual, L_copy_32_chars);
5550
5551 bind(L_copy_32_chars_exit);
5552 subptr(len, 16);
5553 jccb(Assembler::greater, L_copy_16_chars_exit)jccb_0(Assembler::greater, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5553)
;
5554
5555 } else if (UseSSE42Intrinsics) {
5556 movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector
5557 movdl(tmp1Reg, tmp5);
5558 pshufd(tmp1Reg, tmp1Reg, 0);
5559 jmpb(L_chars_16_check)jmpb_0(L_chars_16_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5559)
;
5560 }
5561
5562 bind(L_copy_16_chars);
5563 if (UseAVX >= 2) {
5564 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32));
5565 vptest(tmp2Reg, tmp1Reg);
5566 jcc(Assembler::notZero, L_copy_16_chars_exit);
5567 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1);
5568 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1);
5569 } else {
5570 if (UseAVX > 0) {
5571 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
5572 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
5573 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0);
5574 } else {
5575 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32));
5576 por(tmp2Reg, tmp3Reg);
5577 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16));
5578 por(tmp2Reg, tmp4Reg);
5579 }
5580 ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector
5581 jccb(Assembler::notZero, L_copy_16_chars_exit)jccb_0(Assembler::notZero, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5581)
;
5582 packuswb(tmp3Reg, tmp4Reg);
5583 }
5584 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg);
5585
5586 bind(L_chars_16_check);
5587 addptr(len, 16);
5588 jcc(Assembler::lessEqual, L_copy_16_chars);
5589
5590 bind(L_copy_16_chars_exit);
5591 if (UseAVX >= 2) {
5592 // clean upper bits of YMM registers
5593 vpxor(tmp2Reg, tmp2Reg);
5594 vpxor(tmp3Reg, tmp3Reg);
5595 vpxor(tmp4Reg, tmp4Reg);
5596 movdl(tmp1Reg, tmp5);
5597 pshufd(tmp1Reg, tmp1Reg, 0);
5598 }
5599 subptr(len, 8);
5600 jccb(Assembler::greater, L_copy_8_chars_exit)jccb_0(Assembler::greater, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5600)
;
5601
5602 bind(L_copy_8_chars);
5603 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16));
5604 ptest(tmp3Reg, tmp1Reg);
5605 jccb(Assembler::notZero, L_copy_8_chars_exit)jccb_0(Assembler::notZero, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5605)
;
5606 packuswb(tmp3Reg, tmp1Reg);
5607 movq(Address(dst, len, Address::times_1, -8), tmp3Reg);
5608 addptr(len, 8);
5609 jccb(Assembler::lessEqual, L_copy_8_chars)jccb_0(Assembler::lessEqual, L_copy_8_chars, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5609)
;
5610
5611 bind(L_copy_8_chars_exit);
5612 subptr(len, 8);
5613 jccb(Assembler::zero, L_done)jccb_0(Assembler::zero, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5613)
;
5614 }
5615
5616 bind(L_copy_1_char);
5617 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0));
5618 testl(tmp5, short_mask); // check if Unicode or non-ASCII char
5619 jccb(Assembler::notZero, L_copy_1_char_exit)jccb_0(Assembler::notZero, L_copy_1_char_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5619)
;
5620 movb(Address(dst, len, Address::times_1, 0), tmp5);
5621 addptr(len, 1);
5622 jccb(Assembler::less, L_copy_1_char)jccb_0(Assembler::less, L_copy_1_char, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5622)
;
5623
5624 bind(L_copy_1_char_exit);
5625 addptr(result, len); // len is negative count of not processed elements
5626
5627 bind(L_done);
5628}
5629
5630#ifdef _LP641
5631/**
5632 * Helper for multiply_to_len().
5633 */
5634void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
5635 addq(dest_lo, src1);
5636 adcq(dest_hi, 0);
5637 addq(dest_lo, src2);
5638 adcq(dest_hi, 0);
5639}
5640
5641/**
5642 * Multiply 64 bit by 64 bit first loop.
5643 */
5644void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
5645 Register y, Register y_idx, Register z,
5646 Register carry, Register product,
5647 Register idx, Register kdx) {
5648 //
5649 // jlong carry, x[], y[], z[];
5650 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5651 // huge_128 product = y[idx] * x[xstart] + carry;
5652 // z[kdx] = (jlong)product;
5653 // carry = (jlong)(product >>> 64);
5654 // }
5655 // z[xstart] = carry;
5656 //
5657
5658 Label L_first_loop, L_first_loop_exit;
5659 Label L_one_x, L_one_y, L_multiply;
5660
5661 decrementl(xstart);
5662 jcc(Assembler::negative, L_one_x);
5663
5664 movq(x_xstart, Address(x, xstart, Address::times_4, 0));
5665 rorq(x_xstart, 32); // convert big-endian to little-endian
5666
5667 bind(L_first_loop);
5668 decrementl(idx);
5669 jcc(Assembler::negative, L_first_loop_exit);
5670 decrementl(idx);
5671 jcc(Assembler::negative, L_one_y);
5672 movq(y_idx, Address(y, idx, Address::times_4, 0));
5673 rorq(y_idx, 32); // convert big-endian to little-endian
5674 bind(L_multiply);
5675 movq(product, x_xstart);
5676 mulq(y_idx); // product(rax) * y_idx -> rdx:rax
5677 addq(product, carry);
5678 adcq(rdx, 0);
5679 subl(kdx, 2);
5680 movl(Address(z, kdx, Address::times_4, 4), product);
5681 shrq(product, 32);
5682 movl(Address(z, kdx, Address::times_4, 0), product);
5683 movq(carry, rdx);
5684 jmp(L_first_loop);
5685
5686 bind(L_one_y);
5687 movl(y_idx, Address(y, 0));
5688 jmp(L_multiply);
5689
5690 bind(L_one_x);
5691 movl(x_xstart, Address(x, 0));
5692 jmp(L_first_loop);
5693
5694 bind(L_first_loop_exit);
5695}
5696
5697/**
5698 * Multiply 64 bit by 64 bit and add 128 bit.
5699 */
5700void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z,
5701 Register yz_idx, Register idx,
5702 Register carry, Register product, int offset) {
5703 // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry;
5704 // z[kdx] = (jlong)product;
5705
5706 movq(yz_idx, Address(y, idx, Address::times_4, offset));
5707 rorq(yz_idx, 32); // convert big-endian to little-endian
5708 movq(product, x_xstart);
5709 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax)
5710 movq(yz_idx, Address(z, idx, Address::times_4, offset));
5711 rorq(yz_idx, 32); // convert big-endian to little-endian
5712
5713 add2_with_carry(rdx, product, carry, yz_idx);
5714
5715 movl(Address(z, idx, Address::times_4, offset+4), product);
5716 shrq(product, 32);
5717 movl(Address(z, idx, Address::times_4, offset), product);
5718
5719}
5720
5721/**
5722 * Multiply 128 bit by 128 bit. Unrolled inner loop.
5723 */
5724void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z,
5725 Register yz_idx, Register idx, Register jdx,
5726 Register carry, Register product,
5727 Register carry2) {
5728 // jlong carry, x[], y[], z[];
5729 // int kdx = ystart+1;
5730 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5731 // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry;
5732 // z[kdx+idx+1] = (jlong)product;
5733 // jlong carry2 = (jlong)(product >>> 64);
5734 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2;
5735 // z[kdx+idx] = (jlong)product;
5736 // carry = (jlong)(product >>> 64);
5737 // }
5738 // idx += 2;
5739 // if (idx > 0) {
5740 // product = (y[idx] * x_xstart) + z[kdx+idx] + carry;
5741 // z[kdx+idx] = (jlong)product;
5742 // carry = (jlong)(product >>> 64);
5743 // }
5744 //
5745
5746 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5747
5748 movl(jdx, idx);
5749 andl(jdx, 0xFFFFFFFC);
5750 shrl(jdx, 2);
5751
5752 bind(L_third_loop);
5753 subl(jdx, 1);
5754 jcc(Assembler::negative, L_third_loop_exit);
5755 subl(idx, 4);
5756
5757 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8);
5758 movq(carry2, rdx);
5759
5760 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0);
5761 movq(carry, rdx);
5762 jmp(L_third_loop);
5763
5764 bind (L_third_loop_exit);
5765
5766 andl (idx, 0x3);
5767 jcc(Assembler::zero, L_post_third_loop_done);
5768
5769 Label L_check_1;
5770 subl(idx, 2);
5771 jcc(Assembler::negative, L_check_1);
5772
5773 multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0);
5774 movq(carry, rdx);
5775
5776 bind (L_check_1);
5777 addl (idx, 0x2);
5778 andl (idx, 0x1);
5779 subl(idx, 1);
5780 jcc(Assembler::negative, L_post_third_loop_done);
5781
5782 movl(yz_idx, Address(y, idx, Address::times_4, 0));
5783 movq(product, x_xstart);
5784 mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax)
5785 movl(yz_idx, Address(z, idx, Address::times_4, 0));
5786
5787 add2_with_carry(rdx, product, yz_idx, carry);
5788
5789 movl(Address(z, idx, Address::times_4, 0), product);
5790 shrq(product, 32);
5791
5792 shlq(rdx, 32);
5793 orq(product, rdx);
5794 movq(carry, product);
5795
5796 bind(L_post_third_loop_done);
5797}
5798
5799/**
5800 * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop.
5801 *
5802 */
5803void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z,
5804 Register carry, Register carry2,
5805 Register idx, Register jdx,
5806 Register yz_idx1, Register yz_idx2,
5807 Register tmp, Register tmp3, Register tmp4) {
5808 assert(UseBMI2Instructions, "should be used only when BMI2 is available")do { if (!(UseBMI2Instructions)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 5808, "assert(" "UseBMI2Instructions" ") failed", "should be used only when BMI2 is available"
); ::breakpoint(); } } while (0)
;
5809
5810 // jlong carry, x[], y[], z[];
5811 // int kdx = ystart+1;
5812 // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
5813 // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry;
5814 // jlong carry2 = (jlong)(tmp3 >>> 64);
5815 // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2;
5816 // carry = (jlong)(tmp4 >>> 64);
5817 // z[kdx+idx+1] = (jlong)tmp3;
5818 // z[kdx+idx] = (jlong)tmp4;
5819 // }
5820 // idx += 2;
5821 // if (idx > 0) {
5822 // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry;
5823 // z[kdx+idx] = (jlong)yz_idx1;
5824 // carry = (jlong)(yz_idx1 >>> 64);
5825 // }
5826 //
5827
5828 Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
5829
5830 movl(jdx, idx);
5831 andl(jdx, 0xFFFFFFFC);
5832 shrl(jdx, 2);
5833
5834 bind(L_third_loop);
5835 subl(jdx, 1);
5836 jcc(Assembler::negative, L_third_loop_exit);
5837 subl(idx, 4);
5838
5839 movq(yz_idx1, Address(y, idx, Address::times_4, 8));
5840 rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
5841 movq(yz_idx2, Address(y, idx, Address::times_4, 0));
5842 rorxq(yz_idx2, yz_idx2, 32);
5843
5844 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3
5845 mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp
5846
5847 movq(yz_idx1, Address(z, idx, Address::times_4, 8));
5848 rorxq(yz_idx1, yz_idx1, 32);
5849 movq(yz_idx2, Address(z, idx, Address::times_4, 0));
5850 rorxq(yz_idx2, yz_idx2, 32);
5851
5852 if (VM_Version::supports_adx()) {
5853 adcxq(tmp3, carry);
5854 adoxq(tmp3, yz_idx1);
5855
5856 adcxq(tmp4, tmp);
5857 adoxq(tmp4, yz_idx2);
5858
5859 movl(carry, 0); // does not affect flags
5860 adcxq(carry2, carry);
5861 adoxq(carry2, carry);
5862 } else {
5863 add2_with_carry(tmp4, tmp3, carry, yz_idx1);
5864 add2_with_carry(carry2, tmp4, tmp, yz_idx2);
5865 }
5866 movq(carry, carry2);
5867
5868 movl(Address(z, idx, Address::times_4, 12), tmp3);
5869 shrq(tmp3, 32);
5870 movl(Address(z, idx, Address::times_4, 8), tmp3);
5871
5872 movl(Address(z, idx, Address::times_4, 4), tmp4);
5873 shrq(tmp4, 32);
5874 movl(Address(z, idx, Address::times_4, 0), tmp4);
5875
5876 jmp(L_third_loop);
5877
5878 bind (L_third_loop_exit);
5879
5880 andl (idx, 0x3);
5881 jcc(Assembler::zero, L_post_third_loop_done);
5882
5883 Label L_check_1;
5884 subl(idx, 2);
5885 jcc(Assembler::negative, L_check_1);
5886
5887 movq(yz_idx1, Address(y, idx, Address::times_4, 0));
5888 rorxq(yz_idx1, yz_idx1, 32);
5889 mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3
5890 movq(yz_idx2, Address(z, idx, Address::times_4, 0));
5891 rorxq(yz_idx2, yz_idx2, 32);
5892
5893 add2_with_carry(tmp4, tmp3, carry, yz_idx2);
5894
5895 movl(Address(z, idx, Address::times_4, 4), tmp3);
5896 shrq(tmp3, 32);
5897 movl(Address(z, idx, Address::times_4, 0), tmp3);
5898 movq(carry, tmp4);
5899
5900 bind (L_check_1);
5901 addl (idx, 0x2);
5902 andl (idx, 0x1);
5903 subl(idx, 1);
5904 jcc(Assembler::negative, L_post_third_loop_done);
5905 movl(tmp4, Address(y, idx, Address::times_4, 0));
5906 mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3
5907 movl(tmp4, Address(z, idx, Address::times_4, 0));
5908
5909 add2_with_carry(carry2, tmp3, tmp4, carry);
5910
5911 movl(Address(z, idx, Address::times_4, 0), tmp3);
5912 shrq(tmp3, 32);
5913
5914 shlq(carry2, 32);
5915 orq(tmp3, carry2);
5916 movq(carry, tmp3);
5917
5918 bind(L_post_third_loop_done);
5919}
5920
5921/**
5922 * Code for BigInteger::multiplyToLen() instrinsic.
5923 *
5924 * rdi: x
5925 * rax: xlen
5926 * rsi: y
5927 * rcx: ylen
5928 * r8: z
5929 * r11: zlen
5930 * r12: tmp1
5931 * r13: tmp2
5932 * r14: tmp3
5933 * r15: tmp4
5934 * rbx: tmp5
5935 *
5936 */
5937void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
5938 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
5939 ShortBranchVerifier sbv(this);
5940 assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx);
5941
5942 push(tmp1);
5943 push(tmp2);
5944 push(tmp3);
5945 push(tmp4);
5946 push(tmp5);
5947
5948 push(xlen);
5949 push(zlen);
5950
5951 const Register idx = tmp1;
5952 const Register kdx = tmp2;
5953 const Register xstart = tmp3;
5954
5955 const Register y_idx = tmp4;
5956 const Register carry = tmp5;
5957 const Register product = xlen;
5958 const Register x_xstart = zlen; // reuse register
5959
5960 // First Loop.
5961 //
5962 // final static long LONG_MASK = 0xffffffffL;
5963 // int xstart = xlen - 1;
5964 // int ystart = ylen - 1;
5965 // long carry = 0;
5966 // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
5967 // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
5968 // z[kdx] = (int)product;
5969 // carry = product >>> 32;
5970 // }
5971 // z[xstart] = (int)carry;
5972 //
5973
5974 movl(idx, ylen); // idx = ylen;
5975 movl(kdx, zlen); // kdx = xlen+ylen;
5976 xorq(carry, carry); // carry = 0;
5977
5978 Label L_done;
5979
5980 movl(xstart, xlen);
5981 decrementl(xstart);
5982 jcc(Assembler::negative, L_done);
5983
5984 multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
5985
5986 Label L_second_loop;
5987 testl(kdx, kdx);
5988 jcc(Assembler::zero, L_second_loop);
5989
5990 Label L_carry;
5991 subl(kdx, 1);
5992 jcc(Assembler::zero, L_carry);
5993
5994 movl(Address(z, kdx, Address::times_4, 0), carry);
5995 shrq(carry, 32);
5996 subl(kdx, 1);
5997
5998 bind(L_carry);
5999 movl(Address(z, kdx, Address::times_4, 0), carry);
6000
6001 // Second and third (nested) loops.
6002 //
6003 // for (int i = xstart-1; i >= 0; i--) { // Second loop
6004 // carry = 0;
6005 // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
6006 // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
6007 // (z[k] & LONG_MASK) + carry;
6008 // z[k] = (int)product;
6009 // carry = product >>> 32;
6010 // }
6011 // z[i] = (int)carry;
6012 // }
6013 //
6014 // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx
6015
6016 const Register jdx = tmp1;
6017
6018 bind(L_second_loop);
6019 xorl(carry, carry); // carry = 0;
6020 movl(jdx, ylen); // j = ystart+1
6021
6022 subl(xstart, 1); // i = xstart-1;
6023 jcc(Assembler::negative, L_done);
6024
6025 push (z);
6026
6027 Label L_last_x;
6028 lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j
6029 subl(xstart, 1); // i = xstart-1;
6030 jcc(Assembler::negative, L_last_x);
6031
6032 if (UseBMI2Instructions) {
6033 movq(rdx, Address(x, xstart, Address::times_4, 0));
6034 rorxq(rdx, rdx, 32); // convert big-endian to little-endian
6035 } else {
6036 movq(x_xstart, Address(x, xstart, Address::times_4, 0));
6037 rorq(x_xstart, 32); // convert big-endian to little-endian
6038 }
6039
6040 Label L_third_loop_prologue;
6041 bind(L_third_loop_prologue);
6042
6043 push (x);
6044 push (xstart);
6045 push (ylen);
6046
6047
6048 if (UseBMI2Instructions) {
6049 multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4);
6050 } else { // !UseBMI2Instructions
6051 multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x);
6052 }
6053
6054 pop(ylen);
6055 pop(xlen);
6056 pop(x);
6057 pop(z);
6058
6059 movl(tmp3, xlen);
6060 addl(tmp3, 1);
6061 movl(Address(z, tmp3, Address::times_4, 0), carry);
6062 subl(tmp3, 1);
6063 jccb(Assembler::negative, L_done)jccb_0(Assembler::negative, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6063)
;
6064
6065 shrq(carry, 32);
6066 movl(Address(z, tmp3, Address::times_4, 0), carry);
6067 jmp(L_second_loop);
6068
6069 // Next infrequent code is moved outside loops.
6070 bind(L_last_x);
6071 if (UseBMI2Instructions) {
6072 movl(rdx, Address(x, 0));
6073 } else {
6074 movl(x_xstart, Address(x, 0));
6075 }
6076 jmp(L_third_loop_prologue);
6077
6078 bind(L_done);
6079
6080 pop(zlen);
6081 pop(xlen);
6082
6083 pop(tmp5);
6084 pop(tmp4);
6085 pop(tmp3);
6086 pop(tmp2);
6087 pop(tmp1);
6088}
6089
6090void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
6091 Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
6092 assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6092, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 must be enabled."
); ::breakpoint(); } } while (0)
;
6093 Label VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
6094 Label VECTOR8_TAIL, VECTOR4_TAIL;
6095 Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
6096 Label SAME_TILL_END, DONE;
6097 Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
6098
6099 //scale is in rcx in both Win64 and Unix
6100 ShortBranchVerifier sbv(this);
6101
6102 shlq(length);
6103 xorq(result, result);
6104
6105 if ((AVX3Threshold == 0) && (UseAVX > 2) &&
6106 VM_Version::supports_avx512vlbw()) {
6107 Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
6108
6109 cmpq(length, 64);
6110 jcc(Assembler::less, VECTOR32_TAIL);
6111
6112 movq(tmp1, length);
6113 andq(tmp1, 0x3F); // tail count
6114 andq(length, ~(0x3F)); //vector count
6115
6116 bind(VECTOR64_LOOP);
6117 // AVX512 code to compare 64 byte vectors.
6118 evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
6119 evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
6120 kortestql(k7, k7);
6121 jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
6122 addq(result, 64);
6123 subq(length, 64);
6124 jccb(Assembler::notZero, VECTOR64_LOOP)jccb_0(Assembler::notZero, VECTOR64_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6124)
;
6125
6126 //bind(VECTOR64_TAIL);
6127 testq(tmp1, tmp1);
6128 jcc(Assembler::zero, SAME_TILL_END);
6129
6130 //bind(VECTOR64_TAIL);
6131 // AVX512 code to compare upto 63 byte vectors.
6132 mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
6133 shlxq(tmp2, tmp2, tmp1);
6134 notq(tmp2);
6135 kmovql(k3, tmp2);
6136
6137 evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
6138 evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
6139
6140 ktestql(k7, k3);
6141 jcc(Assembler::below, SAME_TILL_END); // not mismatch
6142
6143 bind(VECTOR64_NOT_EQUAL);
6144 kmovql(tmp1, k7);
6145 notq(tmp1);
6146 tzcntq(tmp1, tmp1);
6147 addq(result, tmp1);
6148 shrq(result);
6149 jmp(DONE);
6150 bind(VECTOR32_TAIL);
6151 }
6152
6153 cmpq(length, 8);
6154 jcc(Assembler::equal, VECTOR8_LOOP);
6155 jcc(Assembler::less, VECTOR4_TAIL);
6156
6157 if (UseAVX >= 2) {
6158 Label VECTOR16_TAIL, VECTOR32_LOOP;
6159
6160 cmpq(length, 16);
6161 jcc(Assembler::equal, VECTOR16_LOOP);
6162 jcc(Assembler::less, VECTOR8_LOOP);
6163
6164 cmpq(length, 32);
6165 jccb(Assembler::less, VECTOR16_TAIL)jccb_0(Assembler::less, VECTOR16_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6165)
;
6166
6167 subq(length, 32);
6168 bind(VECTOR32_LOOP);
6169 vmovdqu(rymm0, Address(obja, result));
6170 vmovdqu(rymm1, Address(objb, result));
6171 vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit);
6172 vptest(rymm2, rymm2);
6173 jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
6174 addq(result, 32);
6175 subq(length, 32);
6176 jcc(Assembler::greaterEqual, VECTOR32_LOOP);
6177 addq(length, 32);
6178 jcc(Assembler::equal, SAME_TILL_END);
6179 //falling through if less than 32 bytes left //close the branch here.
6180
6181 bind(VECTOR16_TAIL);
6182 cmpq(length, 16);
6183 jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6183)
;
6184 bind(VECTOR16_LOOP);
6185 movdqu(rymm0, Address(obja, result));
6186 movdqu(rymm1, Address(objb, result));
6187 vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit);
6188 ptest(rymm2, rymm2);
6189 jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
6190 addq(result, 16);
6191 subq(length, 16);
6192 jcc(Assembler::equal, SAME_TILL_END);
6193 //falling through if less than 16 bytes left
6194 } else {//regular intrinsics
6195
6196 cmpq(length, 16);
6197 jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6197)
;
6198
6199 subq(length, 16);
6200 bind(VECTOR16_LOOP);
6201 movdqu(rymm0, Address(obja, result));
6202 movdqu(rymm1, Address(objb, result));
6203 pxor(rymm0, rymm1);
6204 ptest(rymm0, rymm0);
6205 jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
6206 addq(result, 16);
6207 subq(length, 16);
6208 jccb(Assembler::greaterEqual, VECTOR16_LOOP)jccb_0(Assembler::greaterEqual, VECTOR16_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6208)
;
6209 addq(length, 16);
6210 jcc(Assembler::equal, SAME_TILL_END);
6211 //falling through if less than 16 bytes left
6212 }
6213
6214 bind(VECTOR8_TAIL);
6215 cmpq(length, 8);
6216 jccb(Assembler::less, VECTOR4_TAIL)jccb_0(Assembler::less, VECTOR4_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6216)
;
6217 bind(VECTOR8_LOOP);
6218 movq(tmp1, Address(obja, result));
6219 movq(tmp2, Address(objb, result));
6220 xorq(tmp1, tmp2);
6221 testq(tmp1, tmp1);
6222 jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found
6223 addq(result, 8);
6224 subq(length, 8);
6225 jcc(Assembler::equal, SAME_TILL_END);
6226 //falling through if less than 8 bytes left
6227
6228 bind(VECTOR4_TAIL);
6229 cmpq(length, 4);
6230 jccb(Assembler::less, BYTES_TAIL)jccb_0(Assembler::less, BYTES_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6230)
;
6231 bind(VECTOR4_LOOP);
6232 movl(tmp1, Address(obja, result));
6233 xorl(tmp1, Address(objb, result));
6234 testl(tmp1, tmp1);
6235 jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found
6236 addq(result, 4);
6237 subq(length, 4);
6238 jcc(Assembler::equal, SAME_TILL_END);
6239 //falling through if less than 4 bytes left
6240
6241 bind(BYTES_TAIL);
6242 bind(BYTES_LOOP);
6243 load_unsigned_byte(tmp1, Address(obja, result));
6244 load_unsigned_byte(tmp2, Address(objb, result));
6245 xorl(tmp1, tmp2);
6246 testl(tmp1, tmp1);
6247 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6248 decq(length);
6249 jcc(Assembler::zero, SAME_TILL_END);
6250 incq(result);
6251 load_unsigned_byte(tmp1, Address(obja, result));
6252 load_unsigned_byte(tmp2, Address(objb, result));
6253 xorl(tmp1, tmp2);
6254 testl(tmp1, tmp1);
6255 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6256 decq(length);
6257 jcc(Assembler::zero, SAME_TILL_END);
6258 incq(result);
6259 load_unsigned_byte(tmp1, Address(obja, result));
6260 load_unsigned_byte(tmp2, Address(objb, result));
6261 xorl(tmp1, tmp2);
6262 testl(tmp1, tmp1);
6263 jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
6264 jmp(SAME_TILL_END);
6265
6266 if (UseAVX >= 2) {
6267 bind(VECTOR32_NOT_EQUAL);
6268 vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
6269 vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
6270 vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit);
6271 vpmovmskb(tmp1, rymm0);
6272 bsfq(tmp1, tmp1);
6273 addq(result, tmp1);
6274 shrq(result);
6275 jmp(DONE);
6276 }
6277
6278 bind(VECTOR16_NOT_EQUAL);
6279 if (UseAVX >= 2) {
6280 vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
6281 vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
6282 pxor(rymm0, rymm2);
6283 } else {
6284 pcmpeqb(rymm2, rymm2);
6285 pxor(rymm0, rymm1);
6286 pcmpeqb(rymm0, rymm1);
6287 pxor(rymm0, rymm2);
6288 }
6289 pmovmskb(tmp1, rymm0);
6290 bsfq(tmp1, tmp1);
6291 addq(result, tmp1);
6292 shrq(result);
6293 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6293)
;
6294
6295 bind(VECTOR8_NOT_EQUAL);
6296 bind(VECTOR4_NOT_EQUAL);
6297 bsfq(tmp1, tmp1);
6298 shrq(tmp1, 3);
6299 addq(result, tmp1);
6300 bind(BYTES_NOT_EQUAL);
6301 shrq(result);
6302 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6302)
;
6303
6304 bind(SAME_TILL_END);
6305 mov64(result, -1);
6306
6307 bind(DONE);
6308}
6309
6310//Helper functions for square_to_len()
6311
6312/**
6313 * Store the squares of x[], right shifted one bit (divided by 2) into z[]
6314 * Preserves x and z and modifies rest of the registers.
6315 */
6316void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6317 // Perform square and right shift by 1
6318 // Handle odd xlen case first, then for even xlen do the following
6319 // jlong carry = 0;
6320 // for (int j=0, i=0; j < xlen; j+=2, i+=4) {
6321 // huge_128 product = x[j:j+1] * x[j:j+1];
6322 // z[i:i+1] = (carry << 63) | (jlong)(product >>> 65);
6323 // z[i+2:i+3] = (jlong)(product >>> 1);
6324 // carry = (jlong)product;
6325 // }
6326
6327 xorq(tmp5, tmp5); // carry
6328 xorq(rdxReg, rdxReg);
6329 xorl(tmp1, tmp1); // index for x
6330 xorl(tmp4, tmp4); // index for z
6331
6332 Label L_first_loop, L_first_loop_exit;
6333
6334 testl(xlen, 1);
6335 jccb(Assembler::zero, L_first_loop)jccb_0(Assembler::zero, L_first_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6335)
; //jump if xlen is even
6336
6337 // Square and right shift by 1 the odd element using 32 bit multiply
6338 movl(raxReg, Address(x, tmp1, Address::times_4, 0));
6339 imulq(raxReg, raxReg);
6340 shrq(raxReg, 1);
6341 adcq(tmp5, 0);
6342 movq(Address(z, tmp4, Address::times_4, 0), raxReg);
6343 incrementl(tmp1);
6344 addl(tmp4, 2);
6345
6346 // Square and right shift by 1 the rest using 64 bit multiply
6347 bind(L_first_loop);
6348 cmpptr(tmp1, xlen);
6349 jccb(Assembler::equal, L_first_loop_exit)jccb_0(Assembler::equal, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6349)
;
6350
6351 // Square
6352 movq(raxReg, Address(x, tmp1, Address::times_4, 0));
6353 rorq(raxReg, 32); // convert big-endian to little-endian
6354 mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax
6355
6356 // Right shift by 1 and save carry
6357 shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1
6358 rcrq(rdxReg, 1);
6359 rcrq(raxReg, 1);
6360 adcq(tmp5, 0);
6361
6362 // Store result in z
6363 movq(Address(z, tmp4, Address::times_4, 0), rdxReg);
6364 movq(Address(z, tmp4, Address::times_4, 8), raxReg);
6365
6366 // Update indices for x and z
6367 addl(tmp1, 2);
6368 addl(tmp4, 4);
6369 jmp(L_first_loop);
6370
6371 bind(L_first_loop_exit);
6372}
6373
6374
6375/**
6376 * Perform the following multiply add operation using BMI2 instructions
6377 * carry:sum = sum + op1*op2 + carry
6378 * op2 should be in rdx
6379 * op2 is preserved, all other registers are modified
6380 */
6381void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) {
6382 // assert op2 is rdx
6383 mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1
6384 addq(sum, carry);
6385 adcq(tmp2, 0);
6386 addq(sum, op1);
6387 adcq(tmp2, 0);
6388 movq(carry, tmp2);
6389}
6390
6391/**
6392 * Perform the following multiply add operation:
6393 * carry:sum = sum + op1*op2 + carry
6394 * Preserves op1, op2 and modifies rest of registers
6395 */
6396void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) {
6397 // rdx:rax = op1 * op2
6398 movq(raxReg, op2);
6399 mulq(op1);
6400
6401 // rdx:rax = sum + carry + rdx:rax
6402 addq(sum, carry);
6403 adcq(rdxReg, 0);
6404 addq(sum, raxReg);
6405 adcq(rdxReg, 0);
6406
6407 // carry:sum = rdx:sum
6408 movq(carry, rdxReg);
6409}
6410
6411/**
6412 * Add 64 bit long carry into z[] with carry propogation.
6413 * Preserves z and carry register values and modifies rest of registers.
6414 *
6415 */
6416void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) {
6417 Label L_fourth_loop, L_fourth_loop_exit;
6418
6419 movl(tmp1, 1);
6420 subl(zlen, 2);
6421 addq(Address(z, zlen, Address::times_4, 0), carry);
6422
6423 bind(L_fourth_loop);
6424 jccb(Assembler::carryClear, L_fourth_loop_exit)jccb_0(Assembler::carryClear, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6424)
;
6425 subl(zlen, 2);
6426 jccb(Assembler::negative, L_fourth_loop_exit)jccb_0(Assembler::negative, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6426)
;
6427 addq(Address(z, zlen, Address::times_4, 0), tmp1);
6428 jmp(L_fourth_loop);
6429 bind(L_fourth_loop_exit);
6430}
6431
6432/**
6433 * Shift z[] left by 1 bit.
6434 * Preserves x, len, z and zlen registers and modifies rest of the registers.
6435 *
6436 */
6437void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
6438
6439 Label L_fifth_loop, L_fifth_loop_exit;
6440
6441 // Fifth loop
6442 // Perform primitiveLeftShift(z, zlen, 1)
6443
6444 const Register prev_carry = tmp1;
6445 const Register new_carry = tmp4;
6446 const Register value = tmp2;
6447 const Register zidx = tmp3;
6448
6449 // int zidx, carry;
6450 // long value;
6451 // carry = 0;
6452 // for (zidx = zlen-2; zidx >=0; zidx -= 2) {
6453 // (carry:value) = (z[i] << 1) | carry ;
6454 // z[i] = value;
6455 // }
6456
6457 movl(zidx, zlen);
6458 xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register
6459
6460 bind(L_fifth_loop);
6461 decl(zidx); // Use decl to preserve carry flag
6462 decl(zidx);
6463 jccb(Assembler::negative, L_fifth_loop_exit)jccb_0(Assembler::negative, L_fifth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6463)
;
6464
6465 if (UseBMI2Instructions) {
6466 movq(value, Address(z, zidx, Address::times_4, 0));
6467 rclq(value, 1);
6468 rorxq(value, value, 32);
6469 movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
6470 }
6471 else {
6472 // clear new_carry
6473 xorl(new_carry, new_carry);
6474
6475 // Shift z[i] by 1, or in previous carry and save new carry
6476 movq(value, Address(z, zidx, Address::times_4, 0));
6477 shlq(value, 1);
6478 adcl(new_carry, 0);
6479
6480 orq(value, prev_carry);
6481 rorq(value, 0x20);
6482 movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
6483
6484 // Set previous carry = new carry
6485 movl(prev_carry, new_carry);
6486 }
6487 jmp(L_fifth_loop);
6488
6489 bind(L_fifth_loop_exit);
6490}
6491
6492
6493/**
6494 * Code for BigInteger::squareToLen() intrinsic
6495 *
6496 * rdi: x
6497 * rsi: len
6498 * r8: z
6499 * rcx: zlen
6500 * r12: tmp1
6501 * r13: tmp2
6502 * r14: tmp3
6503 * r15: tmp4
6504 * rbx: tmp5
6505 *
6506 */
6507void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6508
6509 Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, L_last_x, L_multiply;
6510 push(tmp1);
6511 push(tmp2);
6512 push(tmp3);
6513 push(tmp4);
6514 push(tmp5);
6515
6516 // First loop
6517 // Store the squares, right shifted one bit (i.e., divided by 2).
6518 square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg);
6519
6520 // Add in off-diagonal sums.
6521 //
6522 // Second, third (nested) and fourth loops.
6523 // zlen +=2;
6524 // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) {
6525 // carry = 0;
6526 // long op2 = x[xidx:xidx+1];
6527 // for (int j=xidx-2,k=zidx; j >= 0; j-=2) {
6528 // k -= 2;
6529 // long op1 = x[j:j+1];
6530 // long sum = z[k:k+1];
6531 // carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs);
6532 // z[k:k+1] = sum;
6533 // }
6534 // add_one_64(z, k, carry, tmp_regs);
6535 // }
6536
6537 const Register carry = tmp5;
6538 const Register sum = tmp3;
6539 const Register op1 = tmp4;
6540 Register op2 = tmp2;
6541
6542 push(zlen);
6543 push(len);
6544 addl(zlen,2);
6545 bind(L_second_loop);
6546 xorq(carry, carry);
6547 subl(zlen, 4);
6548 subl(len, 2);
6549 push(zlen);
6550 push(len);
6551 cmpl(len, 0);
6552 jccb(Assembler::lessEqual, L_second_loop_exit)jccb_0(Assembler::lessEqual, L_second_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6552)
;
6553
6554 // Multiply an array by one 64 bit long.
6555 if (UseBMI2Instructions) {
6556 op2 = rdxReg;
6557 movq(op2, Address(x, len, Address::times_4, 0));
6558 rorxq(op2, op2, 32);
6559 }
6560 else {
6561 movq(op2, Address(x, len, Address::times_4, 0));
6562 rorq(op2, 32);
6563 }
6564
6565 bind(L_third_loop);
6566 decrementl(len);
6567 jccb(Assembler::negative, L_third_loop_exit)jccb_0(Assembler::negative, L_third_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6567)
;
6568 decrementl(len);
6569 jccb(Assembler::negative, L_last_x)jccb_0(Assembler::negative, L_last_x, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6569)
;
6570
6571 movq(op1, Address(x, len, Address::times_4, 0));
6572 rorq(op1, 32);
6573
6574 bind(L_multiply);
6575 subl(zlen, 2);
6576 movq(sum, Address(z, zlen, Address::times_4, 0));
6577
6578 // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry.
6579 if (UseBMI2Instructions) {
6580 multiply_add_64_bmi2(sum, op1, op2, carry, tmp2);
6581 }
6582 else {
6583 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6584 }
6585
6586 movq(Address(z, zlen, Address::times_4, 0), sum);
6587
6588 jmp(L_third_loop);
6589 bind(L_third_loop_exit);
6590
6591 // Fourth loop
6592 // Add 64 bit long carry into z with carry propogation.
6593 // Uses offsetted zlen.
6594 add_one_64(z, zlen, carry, tmp1);
6595
6596 pop(len);
6597 pop(zlen);
6598 jmp(L_second_loop);
6599
6600 // Next infrequent code is moved outside loops.
6601 bind(L_last_x);
6602 movl(op1, Address(x, 0));
6603 jmp(L_multiply);
6604
6605 bind(L_second_loop_exit);
6606 pop(len);
6607 pop(zlen);
6608 pop(len);
6609 pop(zlen);
6610
6611 // Fifth loop
6612 // Shift z left 1 bit.
6613 lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4);
6614
6615 // z[zlen-1] |= x[len-1] & 1;
6616 movl(tmp3, Address(x, len, Address::times_4, -4));
6617 andl(tmp3, 1);
6618 orl(Address(z, zlen, Address::times_4, -4), tmp3);
6619
6620 pop(tmp5);
6621 pop(tmp4);
6622 pop(tmp3);
6623 pop(tmp2);
6624 pop(tmp1);
6625}
6626
6627/**
6628 * Helper function for mul_add()
6629 * Multiply the in[] by int k and add to out[] starting at offset offs using
6630 * 128 bit by 32 bit multiply and return the carry in tmp5.
6631 * Only quad int aligned length of in[] is operated on in this function.
6632 * k is in rdxReg for BMI2Instructions, for others it is in tmp2.
6633 * This function preserves out, in and k registers.
6634 * len and offset point to the appropriate index in "in" & "out" correspondingly
6635 * tmp5 has the carry.
6636 * other registers are temporary and are modified.
6637 *
6638 */
6639void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in,
6640 Register offset, Register len, Register tmp1, Register tmp2, Register tmp3,
6641 Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6642
6643 Label L_first_loop, L_first_loop_exit;
6644
6645 movl(tmp1, len);
6646 shrl(tmp1, 2);
6647
6648 bind(L_first_loop);
6649 subl(tmp1, 1);
6650 jccb(Assembler::negative, L_first_loop_exit)jccb_0(Assembler::negative, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6650)
;
6651
6652 subl(len, 4);
6653 subl(offset, 4);
6654
6655 Register op2 = tmp2;
6656 const Register sum = tmp3;
6657 const Register op1 = tmp4;
6658 const Register carry = tmp5;
6659
6660 if (UseBMI2Instructions) {
6661 op2 = rdxReg;
6662 }
6663
6664 movq(op1, Address(in, len, Address::times_4, 8));
6665 rorq(op1, 32);
6666 movq(sum, Address(out, offset, Address::times_4, 8));
6667 rorq(sum, 32);
6668 if (UseBMI2Instructions) {
6669 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6670 }
6671 else {
6672 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6673 }
6674 // Store back in big endian from little endian
6675 rorq(sum, 0x20);
6676 movq(Address(out, offset, Address::times_4, 8), sum);
6677
6678 movq(op1, Address(in, len, Address::times_4, 0));
6679 rorq(op1, 32);
6680 movq(sum, Address(out, offset, Address::times_4, 0));
6681 rorq(sum, 32);
6682 if (UseBMI2Instructions) {
6683 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6684 }
6685 else {
6686 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6687 }
6688 // Store back in big endian from little endian
6689 rorq(sum, 0x20);
6690 movq(Address(out, offset, Address::times_4, 0), sum);
6691
6692 jmp(L_first_loop);
6693 bind(L_first_loop_exit);
6694}
6695
6696/**
6697 * Code for BigInteger::mulAdd() intrinsic
6698 *
6699 * rdi: out
6700 * rsi: in
6701 * r11: offs (out.length - offset)
6702 * rcx: len
6703 * r8: k
6704 * r12: tmp1
6705 * r13: tmp2
6706 * r14: tmp3
6707 * r15: tmp4
6708 * rbx: tmp5
6709 * Multiply the in[] by word k and add to out[], return the carry in rax
6710 */
6711void MacroAssembler::mul_add(Register out, Register in, Register offs,
6712 Register len, Register k, Register tmp1, Register tmp2, Register tmp3,
6713 Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
6714
6715 Label L_carry, L_last_in, L_done;
6716
6717// carry = 0;
6718// for (int j=len-1; j >= 0; j--) {
6719// long product = (in[j] & LONG_MASK) * kLong +
6720// (out[offs] & LONG_MASK) + carry;
6721// out[offs--] = (int)product;
6722// carry = product >>> 32;
6723// }
6724//
6725 push(tmp1);
6726 push(tmp2);
6727 push(tmp3);
6728 push(tmp4);
6729 push(tmp5);
6730
6731 Register op2 = tmp2;
6732 const Register sum = tmp3;
6733 const Register op1 = tmp4;
6734 const Register carry = tmp5;
6735
6736 if (UseBMI2Instructions) {
6737 op2 = rdxReg;
6738 movl(op2, k);
6739 }
6740 else {
6741 movl(op2, k);
6742 }
6743
6744 xorq(carry, carry);
6745
6746 //First loop
6747
6748 //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply
6749 //The carry is in tmp5
6750 mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
6751
6752 //Multiply the trailing in[] entry using 64 bit by 32 bit, if any
6753 decrementl(len);
6754 jccb(Assembler::negative, L_carry)jccb_0(Assembler::negative, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6754)
;
6755 decrementl(len);
6756 jccb(Assembler::negative, L_last_in)jccb_0(Assembler::negative, L_last_in, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6756)
;
6757
6758 movq(op1, Address(in, len, Address::times_4, 0));
6759 rorq(op1, 32);
6760
6761 subl(offs, 2);
6762 movq(sum, Address(out, offs, Address::times_4, 0));
6763 rorq(sum, 32);
6764
6765 if (UseBMI2Instructions) {
6766 multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
6767 }
6768 else {
6769 multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
6770 }
6771
6772 // Store back in big endian from little endian
6773 rorq(sum, 0x20);
6774 movq(Address(out, offs, Address::times_4, 0), sum);
6775
6776 testl(len, len);
6777 jccb(Assembler::zero, L_carry)jccb_0(Assembler::zero, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6777)
;
6778
6779 //Multiply the last in[] entry, if any
6780 bind(L_last_in);
6781 movl(op1, Address(in, 0));
6782 movl(sum, Address(out, offs, Address::times_4, -4));
6783
6784 movl(raxReg, k);
6785 mull(op1); //tmp4 * eax -> edx:eax
6786 addl(sum, carry);
6787 adcl(rdxReg, 0);
6788 addl(sum, raxReg);
6789 adcl(rdxReg, 0);
6790 movl(carry, rdxReg);
6791
6792 movl(Address(out, offs, Address::times_4, -4), sum);
6793
6794 bind(L_carry);
6795 //return tmp5/carry as carry in rax
6796 movl(rax, carry);
6797
6798 bind(L_done);
6799 pop(tmp5);
6800 pop(tmp4);
6801 pop(tmp3);
6802 pop(tmp2);
6803 pop(tmp1);
6804}
6805#endif
6806
6807/**
6808 * Emits code to update CRC-32 with a byte value according to constants in table
6809 *
6810 * @param [in,out]crc Register containing the crc.
6811 * @param [in]val Register containing the byte to fold into the CRC.
6812 * @param [in]table Register containing the table of crc constants.
6813 *
6814 * uint32_t crc;
6815 * val = crc_table[(val ^ crc) & 0xFF];
6816 * crc = val ^ (crc >> 8);
6817 *
6818 */
6819void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
6820 xorl(val, crc);
6821 andl(val, 0xFF);
6822 shrl(crc, 8); // unsigned shift
6823 xorl(crc, Address(table, val, Address::times_4, 0));
3
Passing null pointer value via 2nd parameter 'index'
4
Calling constructor for 'Address'
6824}
6825
6826/**
6827 * Fold 128-bit data chunk
6828 */
6829void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
6830 if (UseAVX > 0) {
6831 vpclmulhdq(xtmp, xK, xcrc); // [123:64]
6832 vpclmulldq(xcrc, xK, xcrc); // [63:0]
6833 vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */);
6834 pxor(xcrc, xtmp);
6835 } else {
6836 movdqa(xtmp, xcrc);
6837 pclmulhdq(xtmp, xK); // [123:64]
6838 pclmulldq(xcrc, xK); // [63:0]
6839 pxor(xcrc, xtmp);
6840 movdqu(xtmp, Address(buf, offset));
6841 pxor(xcrc, xtmp);
6842 }
6843}
6844
6845void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
6846 if (UseAVX > 0) {
6847 vpclmulhdq(xtmp, xK, xcrc);
6848 vpclmulldq(xcrc, xK, xcrc);
6849 pxor(xcrc, xbuf);
6850 pxor(xcrc, xtmp);
6851 } else {
6852 movdqa(xtmp, xcrc);
6853 pclmulhdq(xtmp, xK);
6854 pclmulldq(xcrc, xK);
6855 pxor(xcrc, xbuf);
6856 pxor(xcrc, xtmp);
6857 }
6858}
6859
6860/**
6861 * 8-bit folds to compute 32-bit CRC
6862 *
6863 * uint64_t xcrc;
6864 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
6865 */
6866void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
6867 movdl(tmp, xcrc);
6868 andl(tmp, 0xFF);
6869 movdl(xtmp, Address(table, tmp, Address::times_4, 0));
6870 psrldq(xcrc, 1); // unsigned shift one byte
6871 pxor(xcrc, xtmp);
6872}
6873
6874/**
6875 * uint32_t crc;
6876 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
6877 */
6878void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
6879 movl(tmp, crc);
6880 andl(tmp, 0xFF);
6881 shrl(crc, 8);
6882 xorl(crc, Address(table, tmp, Address::times_4, 0));
6883}
6884
6885/**
6886 * @param crc register containing existing CRC (32-bit)
6887 * @param buf register pointing to input byte buffer (byte*)
6888 * @param len register containing number of bytes
6889 * @param table register that will contain address of CRC table
6890 * @param tmp scratch register
6891 */
6892void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) {
6893 assert_different_registers(crc, buf, len, table, tmp, rax);
6894
6895 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
6896 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
6897
6898 // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
6899 // context for the registers used, where all instructions below are using 128-bit mode
6900 // On EVEX without VL and BW, these instructions will all be AVX.
6901 lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
6902 notl(crc); // ~crc
6903 cmpl(len, 16);
6904 jcc(Assembler::less, L_tail);
6905
6906 // Align buffer to 16 bytes
6907 movl(tmp, buf);
6908 andl(tmp, 0xF);
6909 jccb(Assembler::zero, L_aligned)jccb_0(Assembler::zero, L_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6909)
;
6910 subl(tmp, 16);
6911 addl(len, tmp);
6912
6913 align(4);
6914 BIND(L_align_loop);
6915 movsbl(rax, Address(buf, 0)); // load byte with sign extension
6916 update_byte_crc32(crc, rax, table);
1
Passing null pointer value via 2nd parameter 'val'
2
Calling 'MacroAssembler::update_byte_crc32'
6917 increment(buf);
6918 incrementl(tmp);
6919 jccb(Assembler::less, L_align_loop)jccb_0(Assembler::less, L_align_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6919)
;
6920
6921 BIND(L_aligned);
6922 movl(tmp, len); // save
6923 shrl(len, 4);
6924 jcc(Assembler::zero, L_tail_restore);
6925
6926 // Fold crc into first bytes of vector
6927 movdqa(xmm1, Address(buf, 0));
6928 movdl(rax, xmm1);
6929 xorl(crc, rax);
6930 if (VM_Version::supports_sse4_1()) {
6931 pinsrd(xmm1, crc, 0);
6932 } else {
6933 pinsrw(xmm1, crc, 0);
6934 shrl(crc, 16);
6935 pinsrw(xmm1, crc, 1);
6936 }
6937 addptr(buf, 16);
6938 subl(len, 4); // len > 0
6939 jcc(Assembler::less, L_fold_tail);
6940
6941 movdqa(xmm2, Address(buf, 0));
6942 movdqa(xmm3, Address(buf, 16));
6943 movdqa(xmm4, Address(buf, 32));
6944 addptr(buf, 48);
6945 subl(len, 3);
6946 jcc(Assembler::lessEqual, L_fold_512b);
6947
6948 // Fold total 512 bits of polynomial on each iteration,
6949 // 128 bits per each of 4 parallel streams.
6950 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32));
6951
6952 align32();
6953 BIND(L_fold_512b_loop);
6954 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
6955 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16);
6956 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32);
6957 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48);
6958 addptr(buf, 64);
6959 subl(len, 4);
6960 jcc(Assembler::greater, L_fold_512b_loop);
6961
6962 // Fold 512 bits to 128 bits.
6963 BIND(L_fold_512b);
6964 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6965 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
6966 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
6967 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
6968
6969 // Fold the rest of 128 bits data chunks
6970 BIND(L_fold_tail);
6971 addl(len, 3);
6972 jccb(Assembler::lessEqual, L_fold_128b)jccb_0(Assembler::lessEqual, L_fold_128b, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6972)
;
6973 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
6974
6975 BIND(L_fold_tail_loop);
6976 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0);
6977 addptr(buf, 16);
6978 decrementl(len);
6979 jccb(Assembler::greater, L_fold_tail_loop)jccb_0(Assembler::greater, L_fold_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 6979)
;
6980
6981 // Fold 128 bits in xmm1 down into 32 bits in crc register.
6982 BIND(L_fold_128b);
6983 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
6984 if (UseAVX > 0) {
6985 vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
6986 vpand(xmm3, xmm0, xmm2, 0 /* vector_len */);
6987 vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
6988 } else {
6989 movdqa(xmm2, xmm0);
6990 pclmulqdq(xmm2, xmm1, 0x1);
6991 movdqa(xmm3, xmm0);
6992 pand(xmm3, xmm2);
6993 pclmulqdq(xmm0, xmm3, 0x1);
6994 }
6995 psrldq(xmm1, 8);
6996 psrldq(xmm2, 4);
6997 pxor(xmm0, xmm1);
6998 pxor(xmm0, xmm2);
6999
7000 // 8 8-bit folds to compute 32-bit CRC.
7001 for (int j = 0; j < 4; j++) {
7002 fold_8bit_crc32(xmm0, table, xmm1, rax);
7003 }
7004 movdl(crc, xmm0); // mov 32 bits to general register
7005 for (int j = 0; j < 4; j++) {
7006 fold_8bit_crc32(crc, table, rax);
7007 }
7008
7009 BIND(L_tail_restore);
7010 movl(len, tmp); // restore
7011 BIND(L_tail);
7012 andl(len, 0xf);
7013 jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7013)
;
7014
7015 // Fold the rest of bytes
7016 align(4);
7017 BIND(L_tail_loop);
7018 movsbl(rax, Address(buf, 0)); // load byte with sign extension
7019 update_byte_crc32(crc, rax, table);
7020 increment(buf);
7021 decrementl(len);
7022 jccb(Assembler::greater, L_tail_loop)jccb_0(Assembler::greater, L_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7022)
;
7023
7024 BIND(L_exit);
7025 notl(crc); // ~c
7026}
7027
7028#ifdef _LP641
7029// Helper function for AVX 512 CRC32
7030// Fold 512-bit data chunks
7031void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf,
7032 Register pos, int offset) {
7033 evmovdquq(xmm3, Address(buf, pos, Address::times_1, offset), Assembler::AVX_512bit);
7034 evpclmulqdq(xtmp, xcrc, xK, 0x10, Assembler::AVX_512bit); // [123:64]
7035 evpclmulqdq(xmm2, xcrc, xK, 0x01, Assembler::AVX_512bit); // [63:0]
7036 evpxorq(xcrc, xtmp, xmm2, Assembler::AVX_512bit /* vector_len */);
7037 evpxorq(xcrc, xcrc, xmm3, Assembler::AVX_512bit /* vector_len */);
7038}
7039
7040// Helper function for AVX 512 CRC32
7041// Compute CRC32 for < 256B buffers
7042void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register table, Register pos,
7043 Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop,
7044 Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup) {
7045
7046 Label L_less_than_32, L_exact_16_left, L_less_than_16_left;
7047 Label L_less_than_8_left, L_less_than_4_left, L_less_than_2_left, L_zero_left;
7048 Label L_only_less_than_4, L_only_less_than_3, L_only_less_than_2;
7049
7050 // check if there is enough buffer to be able to fold 16B at a time
7051 cmpl(len, 32);
7052 jcc(Assembler::less, L_less_than_32);
7053
7054 // if there is, load the constants
7055 movdqu(xmm10, Address(table, 1 * 16)); //rk1 and rk2 in xmm10
7056 movdl(xmm0, crc); // get the initial crc value
7057 movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext
7058 pxor(xmm7, xmm0);
7059
7060 // update the buffer pointer
7061 addl(pos, 16);
7062 //update the counter.subtract 32 instead of 16 to save one instruction from the loop
7063 subl(len, 32);
7064 jmp(L_16B_reduction_loop);
7065
7066 bind(L_less_than_32);
7067 //mov initial crc to the return value. this is necessary for zero - length buffers.
7068 movl(rax, crc);
7069 testl(len, len);
7070 jcc(Assembler::equal, L_cleanup);
7071
7072 movdl(xmm0, crc); //get the initial crc value
7073
7074 cmpl(len, 16);
7075 jcc(Assembler::equal, L_exact_16_left);
7076 jcc(Assembler::less, L_less_than_16_left);
7077
7078 movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext
7079 pxor(xmm7, xmm0); //xor the initial crc value
7080 addl(pos, 16);
7081 subl(len, 16);
7082 movdqu(xmm10, Address(table, 1 * 16)); // rk1 and rk2 in xmm10
7083 jmp(L_get_last_two_xmms);
7084
7085 bind(L_less_than_16_left);
7086 //use stack space to load data less than 16 bytes, zero - out the 16B in memory first.
7087 pxor(xmm1, xmm1);
7088 movptr(tmp1, rsp);
7089 movdqu(Address(tmp1, 0 * 16), xmm1);
7090
7091 cmpl(len, 4);
7092 jcc(Assembler::less, L_only_less_than_4);
7093
7094 //backup the counter value
7095 movl(tmp2, len);
7096 cmpl(len, 8);
7097 jcc(Assembler::less, L_less_than_8_left);
7098
7099 //load 8 Bytes
7100 movq(rax, Address(buf, pos, Address::times_1, 0 * 16));
7101 movq(Address(tmp1, 0 * 16), rax);
7102 addptr(tmp1, 8);
7103 subl(len, 8);
7104 addl(pos, 8);
7105
7106 bind(L_less_than_8_left);
7107 cmpl(len, 4);
7108 jcc(Assembler::less, L_less_than_4_left);
7109
7110 //load 4 Bytes
7111 movl(rax, Address(buf, pos, Address::times_1, 0));
7112 movl(Address(tmp1, 0 * 16), rax);
7113 addptr(tmp1, 4);
7114 subl(len, 4);
7115 addl(pos, 4);
7116
7117 bind(L_less_than_4_left);
7118 cmpl(len, 2);
7119 jcc(Assembler::less, L_less_than_2_left);
7120
7121 // load 2 Bytes
7122 movw(rax, Address(buf, pos, Address::times_1, 0));
7123 movl(Address(tmp1, 0 * 16), rax);
7124 addptr(tmp1, 2);
7125 subl(len, 2);
7126 addl(pos, 2);
7127
7128 bind(L_less_than_2_left);
7129 cmpl(len, 1);
7130 jcc(Assembler::less, L_zero_left);
7131
7132 // load 1 Byte
7133 movb(rax, Address(buf, pos, Address::times_1, 0));
7134 movb(Address(tmp1, 0 * 16), rax);
7135
7136 bind(L_zero_left);
7137 movdqu(xmm7, Address(rsp, 0));
7138 pxor(xmm7, xmm0); //xor the initial crc value
7139
7140 lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr()));
7141 movdqu(xmm0, Address(rax, tmp2));
7142 pshufb(xmm7, xmm0);
7143 jmp(L_128_done);
7144
7145 bind(L_exact_16_left);
7146 movdqu(xmm7, Address(buf, pos, Address::times_1, 0));
7147 pxor(xmm7, xmm0); //xor the initial crc value
7148 jmp(L_128_done);
7149
7150 bind(L_only_less_than_4);
7151 cmpl(len, 3);
7152 jcc(Assembler::less, L_only_less_than_3);
7153
7154 // load 3 Bytes
7155 movb(rax, Address(buf, pos, Address::times_1, 0));
7156 movb(Address(tmp1, 0), rax);
7157
7158 movb(rax, Address(buf, pos, Address::times_1, 1));
7159 movb(Address(tmp1, 1), rax);
7160
7161 movb(rax, Address(buf, pos, Address::times_1, 2));
7162 movb(Address(tmp1, 2), rax);
7163
7164 movdqu(xmm7, Address(rsp, 0));
7165 pxor(xmm7, xmm0); //xor the initial crc value
7166
7167 pslldq(xmm7, 0x5);
7168 jmp(L_barrett);
7169 bind(L_only_less_than_3);
7170 cmpl(len, 2);
7171 jcc(Assembler::less, L_only_less_than_2);
7172
7173 // load 2 Bytes
7174 movb(rax, Address(buf, pos, Address::times_1, 0));
7175 movb(Address(tmp1, 0), rax);
7176
7177 movb(rax, Address(buf, pos, Address::times_1, 1));
7178 movb(Address(tmp1, 1), rax);
7179
7180 movdqu(xmm7, Address(rsp, 0));
7181 pxor(xmm7, xmm0); //xor the initial crc value
7182
7183 pslldq(xmm7, 0x6);
7184 jmp(L_barrett);
7185
7186 bind(L_only_less_than_2);
7187 //load 1 Byte
7188 movb(rax, Address(buf, pos, Address::times_1, 0));
7189 movb(Address(tmp1, 0), rax);
7190
7191 movdqu(xmm7, Address(rsp, 0));
7192 pxor(xmm7, xmm0); //xor the initial crc value
7193
7194 pslldq(xmm7, 0x7);
7195}
7196
7197/**
7198* Compute CRC32 using AVX512 instructions
7199* param crc register containing existing CRC (32-bit)
7200* param buf register pointing to input byte buffer (byte*)
7201* param len register containing number of bytes
7202* param table address of crc or crc32c table
7203* param tmp1 scratch register
7204* param tmp2 scratch register
7205* return rax result register
7206*
7207* This routine is identical for crc32c with the exception of the precomputed constant
7208* table which will be passed as the table argument. The calculation steps are
7209* the same for both variants.
7210*/
7211void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2) {
7212 assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax, r12);
7213
7214 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
7215 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;
7216 Label L_less_than_256, L_fold_128_B_loop, L_fold_256_B_loop;
7217 Label L_fold_128_B_register, L_final_reduction_for_128, L_16B_reduction_loop;
7218 Label L_128_done, L_get_last_two_xmms, L_barrett, L_cleanup;
7219
7220 const Register pos = r12;
7221 push(r12);
7222 subptr(rsp, 16 * 2 + 8);
7223
7224 // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
7225 // context for the registers used, where all instructions below are using 128-bit mode
7226 // On EVEX without VL and BW, these instructions will all be AVX.
7227 movl(pos, 0);
7228
7229 // check if smaller than 256B
7230 cmpl(len, 256);
7231 jcc(Assembler::less, L_less_than_256);
7232
7233 // load the initial crc value
7234 movdl(xmm10, crc);
7235
7236 // receive the initial 64B data, xor the initial crc value
7237 evmovdquq(xmm0, Address(buf, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
7238 evmovdquq(xmm4, Address(buf, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
7239 evpxorq(xmm0, xmm0, xmm10, Assembler::AVX_512bit);
7240 evbroadcasti32x4(xmm10, Address(table, 2 * 16), Assembler::AVX_512bit); //zmm10 has rk3 and rk4
7241
7242 subl(len, 256);
7243 cmpl(len, 256);
7244 jcc(Assembler::less, L_fold_128_B_loop);
7245
7246 evmovdquq(xmm7, Address(buf, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
7247 evmovdquq(xmm8, Address(buf, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
7248 evbroadcasti32x4(xmm16, Address(table, 0 * 16), Assembler::AVX_512bit); //zmm16 has rk-1 and rk-2
7249 subl(len, 256);
7250
7251 bind(L_fold_256_B_loop);
7252 addl(pos, 256);
7253 fold512bit_crc32_avx512(xmm0, xmm16, xmm1, buf, pos, 0 * 64);
7254 fold512bit_crc32_avx512(xmm4, xmm16, xmm1, buf, pos, 1 * 64);
7255 fold512bit_crc32_avx512(xmm7, xmm16, xmm1, buf, pos, 2 * 64);
7256 fold512bit_crc32_avx512(xmm8, xmm16, xmm1, buf, pos, 3 * 64);
7257
7258 subl(len, 256);
7259 jcc(Assembler::greaterEqual, L_fold_256_B_loop);
7260
7261 // Fold 256 into 128
7262 addl(pos, 256);
7263 evpclmulqdq(xmm1, xmm0, xmm10, 0x01, Assembler::AVX_512bit);
7264 evpclmulqdq(xmm2, xmm0, xmm10, 0x10, Assembler::AVX_512bit);
7265 vpternlogq(xmm7, 0x96, xmm1, xmm2, Assembler::AVX_512bit); // xor ABC
7266
7267 evpclmulqdq(xmm5, xmm4, xmm10, 0x01, Assembler::AVX_512bit);
7268 evpclmulqdq(xmm6, xmm4, xmm10, 0x10, Assembler::AVX_512bit);
7269 vpternlogq(xmm8, 0x96, xmm5, xmm6, Assembler::AVX_512bit); // xor ABC
7270
7271 evmovdquq(xmm0, xmm7, Assembler::AVX_512bit);
7272 evmovdquq(xmm4, xmm8, Assembler::AVX_512bit);
7273
7274 addl(len, 128);
7275 jmp(L_fold_128_B_register);
7276
7277 // at this section of the code, there is 128 * x + y(0 <= y<128) bytes of buffer.The fold_128_B_loop
7278 // loop will fold 128B at a time until we have 128 + y Bytes of buffer
7279
7280 // fold 128B at a time.This section of the code folds 8 xmm registers in parallel
7281 bind(L_fold_128_B_loop);
7282 addl(pos, 128);
7283 fold512bit_crc32_avx512(xmm0, xmm10, xmm1, buf, pos, 0 * 64);
7284 fold512bit_crc32_avx512(xmm4, xmm10, xmm1, buf, pos, 1 * 64);
7285
7286 subl(len, 128);
7287 jcc(Assembler::greaterEqual, L_fold_128_B_loop);
7288
7289 addl(pos, 128);
7290
7291 // at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128
7292 // the 128B of folded data is in 8 of the xmm registers : xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
7293 bind(L_fold_128_B_register);
7294 evmovdquq(xmm16, Address(table, 5 * 16), Assembler::AVX_512bit); // multiply by rk9-rk16
7295 evmovdquq(xmm11, Address(table, 9 * 16), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0
7296 evpclmulqdq(xmm1, xmm0, xmm16, 0x01, Assembler::AVX_512bit);
7297 evpclmulqdq(xmm2, xmm0, xmm16, 0x10, Assembler::AVX_512bit);
7298 // save last that has no multiplicand
7299 vextracti64x2(xmm7, xmm4, 3);
7300
7301 evpclmulqdq(xmm5, xmm4, xmm11, 0x01, Assembler::AVX_512bit);
7302 evpclmulqdq(xmm6, xmm4, xmm11, 0x10, Assembler::AVX_512bit);
7303 // Needed later in reduction loop
7304 movdqu(xmm10, Address(table, 1 * 16));
7305 vpternlogq(xmm1, 0x96, xmm2, xmm5, Assembler::AVX_512bit); // xor ABC
7306 vpternlogq(xmm1, 0x96, xmm6, xmm7, Assembler::AVX_512bit); // xor ABC
7307
7308 // Swap 1,0,3,2 - 01 00 11 10
7309 evshufi64x2(xmm8, xmm1, xmm1, 0x4e, Assembler::AVX_512bit);
7310 evpxorq(xmm8, xmm8, xmm1, Assembler::AVX_256bit);
7311 vextracti128(xmm5, xmm8, 1);
7312 evpxorq(xmm7, xmm5, xmm8, Assembler::AVX_128bit);
7313
7314 // instead of 128, we add 128 - 16 to the loop counter to save 1 instruction from the loop
7315 // instead of a cmp instruction, we use the negative flag with the jl instruction
7316 addl(len, 128 - 16);
7317 jcc(Assembler::less, L_final_reduction_for_128);
7318
7319 bind(L_16B_reduction_loop);
7320 vpclmulqdq(xmm8, xmm7, xmm10, 0x01);
7321 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7322 vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit);
7323 movdqu(xmm0, Address(buf, pos, Address::times_1, 0 * 16));
7324 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7325 addl(pos, 16);
7326 subl(len, 16);
7327 jcc(Assembler::greaterEqual, L_16B_reduction_loop);
7328
7329 bind(L_final_reduction_for_128);
7330 addl(len, 16);
7331 jcc(Assembler::equal, L_128_done);
7332
7333 bind(L_get_last_two_xmms);
7334 movdqu(xmm2, xmm7);
7335 addl(pos, len);
7336 movdqu(xmm1, Address(buf, pos, Address::times_1, -16));
7337 subl(pos, len);
7338
7339 // get rid of the extra data that was loaded before
7340 // load the shift constant
7341 lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr()));
7342 movdqu(xmm0, Address(rax, len));
7343 addl(rax, len);
7344
7345 vpshufb(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7346 //Change mask to 512
7347 vpxor(xmm0, xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 2 * 16), Assembler::AVX_128bit, tmp2);
7348 vpshufb(xmm2, xmm2, xmm0, Assembler::AVX_128bit);
7349
7350 blendvpb(xmm2, xmm2, xmm1, xmm0, Assembler::AVX_128bit);
7351 vpclmulqdq(xmm8, xmm7, xmm10, 0x01);
7352 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7353 vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit);
7354 vpxor(xmm7, xmm7, xmm2, Assembler::AVX_128bit);
7355
7356 bind(L_128_done);
7357 // compute crc of a 128-bit value
7358 movdqu(xmm10, Address(table, 3 * 16));
7359 movdqu(xmm0, xmm7);
7360
7361 // 64b fold
7362 vpclmulqdq(xmm7, xmm7, xmm10, 0x0);
7363 vpsrldq(xmm0, xmm0, 0x8, Assembler::AVX_128bit);
7364 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7365
7366 // 32b fold
7367 movdqu(xmm0, xmm7);
7368 vpslldq(xmm7, xmm7, 0x4, Assembler::AVX_128bit);
7369 vpclmulqdq(xmm7, xmm7, xmm10, 0x10);
7370 vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit);
7371 jmp(L_barrett);
7372
7373 bind(L_less_than_256);
7374 kernel_crc32_avx512_256B(crc, buf, len, table, pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup);
7375
7376 //barrett reduction
7377 bind(L_barrett);
7378 vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 1 * 16), Assembler::AVX_128bit, tmp2);
7379 movdqu(xmm1, xmm7);
7380 movdqu(xmm2, xmm7);
7381 movdqu(xmm10, Address(table, 4 * 16));
7382
7383 pclmulqdq(xmm7, xmm10, 0x0);
7384 pxor(xmm7, xmm2);
7385 vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr()), Assembler::AVX_128bit, tmp2);
7386 movdqu(xmm2, xmm7);
7387 pclmulqdq(xmm7, xmm10, 0x10);
7388 pxor(xmm7, xmm2);
7389 pxor(xmm7, xmm1);
7390 pextrd(crc, xmm7, 2);
7391
7392 bind(L_cleanup);
7393 addptr(rsp, 16 * 2 + 8);
7394 pop(r12);
7395}
7396
7397// S. Gueron / Information Processing Letters 112 (2012) 184
7398// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
7399// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
7400// Output: the 64-bit carry-less product of B * CONST
7401void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
7402 Register tmp1, Register tmp2, Register tmp3) {
7403 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
7404 if (n > 0) {
7405 addq(tmp3, n * 256 * 8);
7406 }
7407 // Q1 = TABLEExt[n][B & 0xFF];
7408 movl(tmp1, in);
7409 andl(tmp1, 0x000000FF);
7410 shll(tmp1, 3);
7411 addq(tmp1, tmp3);
7412 movq(tmp1, Address(tmp1, 0));
7413
7414 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
7415 movl(tmp2, in);
7416 shrl(tmp2, 8);
7417 andl(tmp2, 0x000000FF);
7418 shll(tmp2, 3);
7419 addq(tmp2, tmp3);
7420 movq(tmp2, Address(tmp2, 0));
7421
7422 shlq(tmp2, 8);
7423 xorq(tmp1, tmp2);
7424
7425 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
7426 movl(tmp2, in);
7427 shrl(tmp2, 16);
7428 andl(tmp2, 0x000000FF);
7429 shll(tmp2, 3);
7430 addq(tmp2, tmp3);
7431 movq(tmp2, Address(tmp2, 0));
7432
7433 shlq(tmp2, 16);
7434 xorq(tmp1, tmp2);
7435
7436 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
7437 shrl(in, 24);
7438 andl(in, 0x000000FF);
7439 shll(in, 3);
7440 addq(in, tmp3);
7441 movq(in, Address(in, 0));
7442
7443 shlq(in, 24);
7444 xorq(in, tmp1);
7445 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
7446}
7447
7448void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
7449 Register in_out,
7450 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
7451 XMMRegister w_xtmp2,
7452 Register tmp1,
7453 Register n_tmp2, Register n_tmp3) {
7454 if (is_pclmulqdq_supported) {
7455 movdl(w_xtmp1, in_out); // modified blindly
7456
7457 movl(tmp1, const_or_pre_comp_const_index);
7458 movdl(w_xtmp2, tmp1);
7459 pclmulqdq(w_xtmp1, w_xtmp2, 0);
7460
7461 movdq(in_out, w_xtmp1);
7462 } else {
7463 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
7464 }
7465}
7466
7467// Recombination Alternative 2: No bit-reflections
7468// T1 = (CRC_A * U1) << 1
7469// T2 = (CRC_B * U2) << 1
7470// C1 = T1 >> 32
7471// C2 = T2 >> 32
7472// T1 = T1 & 0xFFFFFFFF
7473// T2 = T2 & 0xFFFFFFFF
7474// T1 = CRC32(0, T1)
7475// T2 = CRC32(0, T2)
7476// C1 = C1 ^ T1
7477// C2 = C2 ^ T2
7478// CRC = C1 ^ C2 ^ CRC_C
7479void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
7480 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7481 Register tmp1, Register tmp2,
7482 Register n_tmp3) {
7483 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7484 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7485 shlq(in_out, 1);
7486 movl(tmp1, in_out);
7487 shrq(in_out, 32);
7488 xorl(tmp2, tmp2);
7489 crc32(tmp2, tmp1, 4);
7490 xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
7491 shlq(in1, 1);
7492 movl(tmp1, in1);
7493 shrq(in1, 32);
7494 xorl(tmp2, tmp2);
7495 crc32(tmp2, tmp1, 4);
7496 xorl(in1, tmp2);
7497 xorl(in_out, in1);
7498 xorl(in_out, in2);
7499}
7500
7501// Set N to predefined value
7502// Subtract from a lenght of a buffer
7503// execute in a loop:
7504// CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
7505// for i = 1 to N do
7506// CRC_A = CRC32(CRC_A, A[i])
7507// CRC_B = CRC32(CRC_B, B[i])
7508// CRC_C = CRC32(CRC_C, C[i])
7509// end for
7510// Recombine
7511void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
7512 Register in_out1, Register in_out2, Register in_out3,
7513 Register tmp1, Register tmp2, Register tmp3,
7514 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7515 Register tmp4, Register tmp5,
7516 Register n_tmp6) {
7517 Label L_processPartitions;
7518 Label L_processPartition;
7519 Label L_exit;
7520
7521 bind(L_processPartitions);
7522 cmpl(in_out1, 3 * size);
7523 jcc(Assembler::less, L_exit);
7524 xorl(tmp1, tmp1);
7525 xorl(tmp2, tmp2);
7526 movq(tmp3, in_out2);
7527 addq(tmp3, size);
7528
7529 bind(L_processPartition);
7530 crc32(in_out3, Address(in_out2, 0), 8);
7531 crc32(tmp1, Address(in_out2, size), 8);
7532 crc32(tmp2, Address(in_out2, size * 2), 8);
7533 addq(in_out2, 8);
7534 cmpq(in_out2, tmp3);
7535 jcc(Assembler::less, L_processPartition);
7536 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
7537 w_xtmp1, w_xtmp2, w_xtmp3,
7538 tmp4, tmp5,
7539 n_tmp6);
7540 addq(in_out2, 2 * size);
7541 subl(in_out1, 3 * size);
7542 jmp(L_processPartitions);
7543
7544 bind(L_exit);
7545}
7546#else
7547void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
7548 Register tmp1, Register tmp2, Register tmp3,
7549 XMMRegister xtmp1, XMMRegister xtmp2) {
7550 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
7551 if (n > 0) {
7552 addl(tmp3, n * 256 * 8);
7553 }
7554 // Q1 = TABLEExt[n][B & 0xFF];
7555 movl(tmp1, in_out);
7556 andl(tmp1, 0x000000FF);
7557 shll(tmp1, 3);
7558 addl(tmp1, tmp3);
7559 movq(xtmp1, Address(tmp1, 0));
7560
7561 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
7562 movl(tmp2, in_out);
7563 shrl(tmp2, 8);
7564 andl(tmp2, 0x000000FF);
7565 shll(tmp2, 3);
7566 addl(tmp2, tmp3);
7567 movq(xtmp2, Address(tmp2, 0));
7568
7569 psllq(xtmp2, 8);
7570 pxor(xtmp1, xtmp2);
7571
7572 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
7573 movl(tmp2, in_out);
7574 shrl(tmp2, 16);
7575 andl(tmp2, 0x000000FF);
7576 shll(tmp2, 3);
7577 addl(tmp2, tmp3);
7578 movq(xtmp2, Address(tmp2, 0));
7579
7580 psllq(xtmp2, 16);
7581 pxor(xtmp1, xtmp2);
7582
7583 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
7584 shrl(in_out, 24);
7585 andl(in_out, 0x000000FF);
7586 shll(in_out, 3);
7587 addl(in_out, tmp3);
7588 movq(xtmp2, Address(in_out, 0));
7589
7590 psllq(xtmp2, 24);
7591 pxor(xtmp1, xtmp2); // Result in CXMM
7592 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
7593}
7594
7595void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
7596 Register in_out,
7597 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
7598 XMMRegister w_xtmp2,
7599 Register tmp1,
7600 Register n_tmp2, Register n_tmp3) {
7601 if (is_pclmulqdq_supported) {
7602 movdl(w_xtmp1, in_out);
7603
7604 movl(tmp1, const_or_pre_comp_const_index);
7605 movdl(w_xtmp2, tmp1);
7606 pclmulqdq(w_xtmp1, w_xtmp2, 0);
7607 // Keep result in XMM since GPR is 32 bit in length
7608 } else {
7609 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
7610 }
7611}
7612
7613void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
7614 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7615 Register tmp1, Register tmp2,
7616 Register n_tmp3) {
7617 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7618 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
7619
7620 psllq(w_xtmp1, 1);
7621 movdl(tmp1, w_xtmp1);
7622 psrlq(w_xtmp1, 32);
7623 movdl(in_out, w_xtmp1);
7624
7625 xorl(tmp2, tmp2);
7626 crc32(tmp2, tmp1, 4);
7627 xorl(in_out, tmp2);
7628
7629 psllq(w_xtmp2, 1);
7630 movdl(tmp1, w_xtmp2);
7631 psrlq(w_xtmp2, 32);
7632 movdl(in1, w_xtmp2);
7633
7634 xorl(tmp2, tmp2);
7635 crc32(tmp2, tmp1, 4);
7636 xorl(in1, tmp2);
7637 xorl(in_out, in1);
7638 xorl(in_out, in2);
7639}
7640
7641void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
7642 Register in_out1, Register in_out2, Register in_out3,
7643 Register tmp1, Register tmp2, Register tmp3,
7644 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7645 Register tmp4, Register tmp5,
7646 Register n_tmp6) {
7647 Label L_processPartitions;
7648 Label L_processPartition;
7649 Label L_exit;
7650
7651 bind(L_processPartitions);
7652 cmpl(in_out1, 3 * size);
7653 jcc(Assembler::less, L_exit);
7654 xorl(tmp1, tmp1);
7655 xorl(tmp2, tmp2);
7656 movl(tmp3, in_out2);
7657 addl(tmp3, size);
7658
7659 bind(L_processPartition);
7660 crc32(in_out3, Address(in_out2, 0), 4);
7661 crc32(tmp1, Address(in_out2, size), 4);
7662 crc32(tmp2, Address(in_out2, size*2), 4);
7663 crc32(in_out3, Address(in_out2, 0+4), 4);
7664 crc32(tmp1, Address(in_out2, size+4), 4);
7665 crc32(tmp2, Address(in_out2, size*2+4), 4);
7666 addl(in_out2, 8);
7667 cmpl(in_out2, tmp3);
7668 jcc(Assembler::less, L_processPartition);
7669
7670 push(tmp3);
7671 push(in_out1);
7672 push(in_out2);
7673 tmp4 = tmp3;
7674 tmp5 = in_out1;
7675 n_tmp6 = in_out2;
7676
7677 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
7678 w_xtmp1, w_xtmp2, w_xtmp3,
7679 tmp4, tmp5,
7680 n_tmp6);
7681
7682 pop(in_out2);
7683 pop(in_out1);
7684 pop(tmp3);
7685
7686 addl(in_out2, 2 * size);
7687 subl(in_out1, 3 * size);
7688 jmp(L_processPartitions);
7689
7690 bind(L_exit);
7691}
7692#endif //LP64
7693
7694#ifdef _LP641
7695// Algorithm 2: Pipelined usage of the CRC32 instruction.
7696// Input: A buffer I of L bytes.
7697// Output: the CRC32C value of the buffer.
7698// Notations:
7699// Write L = 24N + r, with N = floor (L/24).
7700// r = L mod 24 (0 <= r < 24).
7701// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
7702// N quadwords, and R consists of r bytes.
7703// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
7704// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
7705// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
7706// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
7707void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
7708 Register tmp1, Register tmp2, Register tmp3,
7709 Register tmp4, Register tmp5, Register tmp6,
7710 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7711 bool is_pclmulqdq_supported) {
7712 uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
7713 Label L_wordByWord;
7714 Label L_byteByByteProlog;
7715 Label L_byteByByte;
7716 Label L_exit;
7717
7718 if (is_pclmulqdq_supported ) {
7719 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
7720 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
7721
7722 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
7723 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
7724
7725 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
7726 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
7727 assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"")do { if (!((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7727, "assert(" "(CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5"
") failed", "Checking whether you declared all of the constants based on the number of \"chunks\""
); ::breakpoint(); } } while (0)
;
7728 } else {
7729 const_or_pre_comp_const_index[0] = 1;
7730 const_or_pre_comp_const_index[1] = 0;
7731
7732 const_or_pre_comp_const_index[2] = 3;
7733 const_or_pre_comp_const_index[3] = 2;
7734
7735 const_or_pre_comp_const_index[4] = 5;
7736 const_or_pre_comp_const_index[5] = 4;
7737 }
7738 crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
7739 in2, in1, in_out,
7740 tmp1, tmp2, tmp3,
7741 w_xtmp1, w_xtmp2, w_xtmp3,
7742 tmp4, tmp5,
7743 tmp6);
7744 crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
7745 in2, in1, in_out,
7746 tmp1, tmp2, tmp3,
7747 w_xtmp1, w_xtmp2, w_xtmp3,
7748 tmp4, tmp5,
7749 tmp6);
7750 crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
7751 in2, in1, in_out,
7752 tmp1, tmp2, tmp3,
7753 w_xtmp1, w_xtmp2, w_xtmp3,
7754 tmp4, tmp5,
7755 tmp6);
7756 movl(tmp1, in2);
7757 andl(tmp1, 0x00000007);
7758 negl(tmp1);
7759 addl(tmp1, in2);
7760 addq(tmp1, in1);
7761
7762 BIND(L_wordByWord);
7763 cmpq(in1, tmp1);
7764 jcc(Assembler::greaterEqual, L_byteByByteProlog);
7765 crc32(in_out, Address(in1, 0), 4);
7766 addq(in1, 4);
7767 jmp(L_wordByWord);
7768
7769 BIND(L_byteByByteProlog);
7770 andl(in2, 0x00000007);
7771 movl(tmp2, 1);
7772
7773 BIND(L_byteByByte);
7774 cmpl(tmp2, in2);
7775 jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7775)
;
7776 crc32(in_out, Address(in1, 0), 1);
7777 incq(in1);
7778 incl(tmp2);
7779 jmp(L_byteByByte);
7780
7781 BIND(L_exit);
7782}
7783#else
7784void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
7785 Register tmp1, Register tmp2, Register tmp3,
7786 Register tmp4, Register tmp5, Register tmp6,
7787 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
7788 bool is_pclmulqdq_supported) {
7789 uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
7790 Label L_wordByWord;
7791 Label L_byteByByteProlog;
7792 Label L_byteByByte;
7793 Label L_exit;
7794
7795 if (is_pclmulqdq_supported) {
7796 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
7797 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
7798
7799 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
7800 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
7801
7802 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
7803 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
7804 } else {
7805 const_or_pre_comp_const_index[0] = 1;
7806 const_or_pre_comp_const_index[1] = 0;
7807
7808 const_or_pre_comp_const_index[2] = 3;
7809 const_or_pre_comp_const_index[3] = 2;
7810
7811 const_or_pre_comp_const_index[4] = 5;
7812 const_or_pre_comp_const_index[5] = 4;
7813 }
7814 crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
7815 in2, in1, in_out,
7816 tmp1, tmp2, tmp3,
7817 w_xtmp1, w_xtmp2, w_xtmp3,
7818 tmp4, tmp5,
7819 tmp6);
7820 crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
7821 in2, in1, in_out,
7822 tmp1, tmp2, tmp3,
7823 w_xtmp1, w_xtmp2, w_xtmp3,
7824 tmp4, tmp5,
7825 tmp6);
7826 crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
7827 in2, in1, in_out,
7828 tmp1, tmp2, tmp3,
7829 w_xtmp1, w_xtmp2, w_xtmp3,
7830 tmp4, tmp5,
7831 tmp6);
7832 movl(tmp1, in2);
7833 andl(tmp1, 0x00000007);
7834 negl(tmp1);
7835 addl(tmp1, in2);
7836 addl(tmp1, in1);
7837
7838 BIND(L_wordByWord);
7839 cmpl(in1, tmp1);
7840 jcc(Assembler::greaterEqual, L_byteByByteProlog);
7841 crc32(in_out, Address(in1,0), 4);
7842 addl(in1, 4);
7843 jmp(L_wordByWord);
7844
7845 BIND(L_byteByByteProlog);
7846 andl(in2, 0x00000007);
7847 movl(tmp2, 1);
7848
7849 BIND(L_byteByByte);
7850 cmpl(tmp2, in2);
7851 jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7851)
;
7852 movb(tmp1, Address(in1, 0));
7853 crc32(in_out, tmp1, 1);
7854 incl(in1);
7855 incl(tmp2);
7856 jmp(L_byteByByte);
7857
7858 BIND(L_exit);
7859}
7860#endif // LP64
7861#undef BIND
7862#undef BLOCK_COMMENT
7863
7864// Compress char[] array to byte[].
7865// ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
7866// @IntrinsicCandidate
7867// private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
7868// for (int i = 0; i < len; i++) {
7869// int c = src[srcOff++];
7870// if (c >>> 8 != 0) {
7871// return 0;
7872// }
7873// dst[dstOff++] = (byte)c;
7874// }
7875// return len;
7876// }
7877void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
7878 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
7879 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
7880 Register tmp5, Register result, KRegister mask1, KRegister mask2) {
7881 Label copy_chars_loop, return_length, return_zero, done;
7882
7883 // rsi: src
7884 // rdi: dst
7885 // rdx: len
7886 // rcx: tmp5
7887 // rax: result
7888
7889 // rsi holds start addr of source char[] to be compressed
7890 // rdi holds start addr of destination byte[]
7891 // rdx holds length
7892
7893 assert(len != result, "")do { if (!(len != result)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 7893, "assert(" "len != result" ") failed", ""); ::breakpoint
(); } } while (0)
;
7894
7895 // save length for return
7896 push(len);
7897
7898 if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
7899 VM_Version::supports_avx512vlbw() &&
7900 VM_Version::supports_bmi2()) {
7901
7902 Label copy_32_loop, copy_loop_tail, below_threshold;
7903
7904 // alignment
7905 Label post_alignment;
7906
7907 // if length of the string is less than 16, handle it in an old fashioned way
7908 testl(len, -32);
7909 jcc(Assembler::zero, below_threshold);
7910
7911 // First check whether a character is compressable ( <= 0xFF).
7912 // Create mask to test for Unicode chars inside zmm vector
7913 movl(result, 0x00FF);
7914 evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7915
7916 testl(len, -64);
7917 jcc(Assembler::zero, post_alignment);
7918
7919 movl(tmp5, dst);
7920 andl(tmp5, (32 - 1));
7921 negl(tmp5);
7922 andl(tmp5, (32 - 1));
7923
7924 // bail out when there is nothing to be done
7925 testl(tmp5, 0xFFFFFFFF);
7926 jcc(Assembler::zero, post_alignment);
7927
7928 // ~(~0 << len), where len is the # of remaining elements to process
7929 movl(result, 0xFFFFFFFF);
7930 shlxl(result, result, tmp5);
7931 notl(result);
7932 kmovdl(mask2, result);
7933
7934 evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7935 evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
7936 ktestd(mask1, mask2);
7937 jcc(Assembler::carryClear, return_zero);
7938
7939 evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
7940
7941 addptr(src, tmp5);
7942 addptr(src, tmp5);
7943 addptr(dst, tmp5);
7944 subl(len, tmp5);
7945
7946 bind(post_alignment);
7947 // end of alignment
7948
7949 movl(tmp5, len);
7950 andl(tmp5, (32 - 1)); // tail count (in chars)
7951 andl(len, ~(32 - 1)); // vector count (in chars)
7952 jcc(Assembler::zero, copy_loop_tail);
7953
7954 lea(src, Address(src, len, Address::times_2));
7955 lea(dst, Address(dst, len, Address::times_1));
7956 negptr(len);
7957
7958 bind(copy_32_loop);
7959 evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
7960 evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7961 kortestdl(mask1, mask1);
7962 jcc(Assembler::carryClear, return_zero);
7963
7964 // All elements in current processed chunk are valid candidates for
7965 // compression. Write a truncated byte elements to the memory.
7966 evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7967 addptr(len, 32);
7968 jcc(Assembler::notZero, copy_32_loop);
7969
7970 bind(copy_loop_tail);
7971 // bail out when there is nothing to be done
7972 testl(tmp5, 0xFFFFFFFF);
7973 jcc(Assembler::zero, return_length);
7974
7975 movl(len, tmp5);
7976
7977 // ~(~0 << len), where len is the # of remaining elements to process
7978 movl(result, 0xFFFFFFFF);
7979 shlxl(result, result, len);
7980 notl(result);
7981
7982 kmovdl(mask2, result);
7983
7984 evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7985 evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit);
7986 ktestd(mask1, mask2);
7987 jcc(Assembler::carryClear, return_zero);
7988
7989 evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
7990 jmp(return_length);
7991
7992 bind(below_threshold);
7993 }
7994
7995 if (UseSSE42Intrinsics) {
7996 Label copy_32_loop, copy_16, copy_tail;
7997
7998 movl(result, len);
7999
8000 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
8001
8002 // vectored compression
8003 andl(len, 0xfffffff0); // vector count (in chars)
8004 andl(result, 0x0000000f); // tail count (in chars)
8005 testl(len, len);
8006 jcc(Assembler::zero, copy_16);
8007
8008 // compress 16 chars per iter
8009 movdl(tmp1Reg, tmp5);
8010 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
8011 pxor(tmp4Reg, tmp4Reg);
8012
8013 lea(src, Address(src, len, Address::times_2));
8014 lea(dst, Address(dst, len, Address::times_1));
8015 negptr(len);
8016
8017 bind(copy_32_loop);
8018 movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters
8019 por(tmp4Reg, tmp2Reg);
8020 movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
8021 por(tmp4Reg, tmp3Reg);
8022 ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
8023 jcc(Assembler::notZero, return_zero);
8024 packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
8025 movdqu(Address(dst, len, Address::times_1), tmp2Reg);
8026 addptr(len, 16);
8027 jcc(Assembler::notZero, copy_32_loop);
8028
8029 // compress next vector of 8 chars (if any)
8030 bind(copy_16);
8031 movl(len, result);
8032 andl(len, 0xfffffff8); // vector count (in chars)
8033 andl(result, 0x00000007); // tail count (in chars)
8034 testl(len, len);
8035 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8035)
;
8036
8037 movdl(tmp1Reg, tmp5);
8038 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
8039 pxor(tmp3Reg, tmp3Reg);
8040
8041 movdqu(tmp2Reg, Address(src, 0));
8042 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
8043 jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8043)
;
8044 packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
8045 movq(Address(dst, 0), tmp2Reg);
8046 addptr(src, 16);
8047 addptr(dst, 8);
8048
8049 bind(copy_tail);
8050 movl(len, result);
8051 }
8052 // compress 1 char per iter
8053 testl(len, len);
8054 jccb(Assembler::zero, return_length)jccb_0(Assembler::zero, return_length, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8054)
;
8055 lea(src, Address(src, len, Address::times_2));
8056 lea(dst, Address(dst, len, Address::times_1));
8057 negptr(len);
8058
8059 bind(copy_chars_loop);
8060 load_unsigned_short(result, Address(src, len, Address::times_2));
8061 testl(result, 0xff00); // check if Unicode char
8062 jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8062)
;
8063 movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
8064 increment(len);
8065 jcc(Assembler::notZero, copy_chars_loop);
8066
8067 // if compression succeeded, return length
8068 bind(return_length);
8069 pop(result);
8070 jmpb(done)jmpb_0(done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8070)
;
8071
8072 // if compression failed, return 0
8073 bind(return_zero);
8074 xorl(result, result);
8075 addptr(rsp, wordSize);
8076
8077 bind(done);
8078}
8079
8080// Inflate byte[] array to char[].
8081// ..\jdk\src\java.base\share\classes\java\lang\StringLatin1.java
8082// @IntrinsicCandidate
8083// private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
8084// for (int i = 0; i < len; i++) {
8085// dst[dstOff++] = (char)(src[srcOff++] & 0xff);
8086// }
8087// }
8088void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
8089 XMMRegister tmp1, Register tmp2, KRegister mask) {
8090 Label copy_chars_loop, done, below_threshold, avx3_threshold;
8091 // rsi: src
8092 // rdi: dst
8093 // rdx: len
8094 // rcx: tmp2
8095
8096 // rsi holds start addr of source byte[] to be inflated
8097 // rdi holds start addr of destination char[]
8098 // rdx holds length
8099 assert_different_registers(src, dst, len, tmp2);
8100 movl(tmp2, len);
8101 if ((UseAVX > 2) && // AVX512
8102 VM_Version::supports_avx512vlbw() &&
8103 VM_Version::supports_bmi2()) {
8104
8105 Label copy_32_loop, copy_tail;
8106 Register tmp3_aliased = len;
8107
8108 // if length of the string is less than 16, handle it in an old fashioned way
8109 testl(len, -16);
8110 jcc(Assembler::zero, below_threshold);
8111
8112 testl(len, -1 * AVX3Threshold);
8113 jcc(Assembler::zero, avx3_threshold);
8114
8115 // In order to use only one arithmetic operation for the main loop we use
8116 // this pre-calculation
8117 andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
8118 andl(len, -32); // vector count
8119 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8119)
;
8120
8121 lea(src, Address(src, len, Address::times_1));
8122 lea(dst, Address(dst, len, Address::times_2));
8123 negptr(len);
8124
8125
8126 // inflate 32 chars per iter
8127 bind(copy_32_loop);
8128 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
8129 evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
8130 addptr(len, 32);
8131 jcc(Assembler::notZero, copy_32_loop);
8132
8133 bind(copy_tail);
8134 // bail out when there is nothing to be done
8135 testl(tmp2, -1); // we don't destroy the contents of tmp2 here
8136 jcc(Assembler::zero, done);
8137
8138 // ~(~0 << length), where length is the # of remaining elements to process
8139 movl(tmp3_aliased, -1);
8140 shlxl(tmp3_aliased, tmp3_aliased, tmp2);
8141 notl(tmp3_aliased);
8142 kmovdl(mask, tmp3_aliased);
8143 evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit);
8144 evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit);
8145
8146 jmp(done);
8147 bind(avx3_threshold);
8148 }
8149 if (UseSSE42Intrinsics) {
8150 Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
8151
8152 if (UseAVX > 1) {
8153 andl(tmp2, (16 - 1));
8154 andl(len, -16);
8155 jccb(Assembler::zero, copy_new_tail)jccb_0(Assembler::zero, copy_new_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8155)
;
8156 } else {
8157 andl(tmp2, 0x00000007); // tail count (in chars)
8158 andl(len, 0xfffffff8); // vector count (in chars)
8159 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8159)
;
8160 }
8161
8162 // vectored inflation
8163 lea(src, Address(src, len, Address::times_1));
8164 lea(dst, Address(dst, len, Address::times_2));
8165 negptr(len);
8166
8167 if (UseAVX > 1) {
8168 bind(copy_16_loop);
8169 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_256bit);
8170 vmovdqu(Address(dst, len, Address::times_2), tmp1);
8171 addptr(len, 16);
8172 jcc(Assembler::notZero, copy_16_loop);
8173
8174 bind(below_threshold);
8175 bind(copy_new_tail);
8176 movl(len, tmp2);
8177 andl(tmp2, 0x00000007);
8178 andl(len, 0xFFFFFFF8);
8179 jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8179)
;
8180
8181 pmovzxbw(tmp1, Address(src, 0));
8182 movdqu(Address(dst, 0), tmp1);
8183 addptr(src, 8);
8184 addptr(dst, 2 * 8);
8185
8186 jmp(copy_tail, true);
8187 }
8188
8189 // inflate 8 chars per iter
8190 bind(copy_8_loop);
8191 pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words
8192 movdqu(Address(dst, len, Address::times_2), tmp1);
8193 addptr(len, 8);
8194 jcc(Assembler::notZero, copy_8_loop);
8195
8196 bind(copy_tail);
8197 movl(len, tmp2);
8198
8199 cmpl(len, 4);
8200 jccb(Assembler::less, copy_bytes)jccb_0(Assembler::less, copy_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8200)
;
8201
8202 movdl(tmp1, Address(src, 0)); // load 4 byte chars
8203 pmovzxbw(tmp1, tmp1);
8204 movq(Address(dst, 0), tmp1);
8205 subptr(len, 4);
8206 addptr(src, 4);
8207 addptr(dst, 8);
8208
8209 bind(copy_bytes);
8210 } else {
8211 bind(below_threshold);
8212 }
8213
8214 testl(len, len);
8215 jccb(Assembler::zero, done)jccb_0(Assembler::zero, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8215)
;
8216 lea(src, Address(src, len, Address::times_1));
8217 lea(dst, Address(dst, len, Address::times_2));
8218 negptr(len);
8219
8220 // inflate 1 char per iter
8221 bind(copy_chars_loop);
8222 load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char
8223 movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word
8224 increment(len);
8225 jcc(Assembler::notZero, copy_chars_loop);
8226
8227 bind(done);
8228}
8229
8230
8231void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
8232 switch(type) {
8233 case T_BYTE:
8234 case T_BOOLEAN:
8235 evmovdqub(dst, kmask, src, false, vector_len);
8236 break;
8237 case T_CHAR:
8238 case T_SHORT:
8239 evmovdquw(dst, kmask, src, false, vector_len);
8240 break;
8241 case T_INT:
8242 case T_FLOAT:
8243 evmovdqul(dst, kmask, src, false, vector_len);
8244 break;
8245 case T_LONG:
8246 case T_DOUBLE:
8247 evmovdquq(dst, kmask, src, false, vector_len);
8248 break;
8249 default:
8250 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8250, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8251 break;
8252 }
8253}
8254
8255void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
8256 switch(type) {
8257 case T_BYTE:
8258 case T_BOOLEAN:
8259 evmovdqub(dst, kmask, src, true, vector_len);
8260 break;
8261 case T_CHAR:
8262 case T_SHORT:
8263 evmovdquw(dst, kmask, src, true, vector_len);
8264 break;
8265 case T_INT:
8266 case T_FLOAT:
8267 evmovdqul(dst, kmask, src, true, vector_len);
8268 break;
8269 case T_LONG:
8270 case T_DOUBLE:
8271 evmovdquq(dst, kmask, src, true, vector_len);
8272 break;
8273 default:
8274 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8274, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8275 break;
8276 }
8277}
8278
8279void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) {
8280 switch(masklen) {
8281 case 2:
8282 knotbl(dst, src);
8283 movl(rtmp, 3);
8284 kmovbl(ktmp, rtmp);
8285 kandbl(dst, ktmp, dst);
8286 break;
8287 case 4:
8288 knotbl(dst, src);
8289 movl(rtmp, 15);
8290 kmovbl(ktmp, rtmp);
8291 kandbl(dst, ktmp, dst);
8292 break;
8293 case 8:
8294 knotbl(dst, src);
8295 break;
8296 case 16:
8297 knotwl(dst, src);
8298 break;
8299 case 32:
8300 knotdl(dst, src);
8301 break;
8302 case 64:
8303 knotql(dst, src);
8304 break;
8305 default:
8306 fatal("Unexpected vector length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8306, "Unexpected vector length %d", masklen); ::breakpoint
(); } while (0)
;
8307 break;
8308 }
8309}
8310
8311void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8312 switch(type) {
8313 case T_BOOLEAN:
8314 case T_BYTE:
8315 kandbl(dst, src1, src2);
8316 break;
8317 case T_CHAR:
8318 case T_SHORT:
8319 kandwl(dst, src1, src2);
8320 break;
8321 case T_INT:
8322 case T_FLOAT:
8323 kanddl(dst, src1, src2);
8324 break;
8325 case T_LONG:
8326 case T_DOUBLE:
8327 kandql(dst, src1, src2);
8328 break;
8329 default:
8330 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8330, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8331 break;
8332 }
8333}
8334
8335void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8336 switch(type) {
8337 case T_BOOLEAN:
8338 case T_BYTE:
8339 korbl(dst, src1, src2);
8340 break;
8341 case T_CHAR:
8342 case T_SHORT:
8343 korwl(dst, src1, src2);
8344 break;
8345 case T_INT:
8346 case T_FLOAT:
8347 kordl(dst, src1, src2);
8348 break;
8349 case T_LONG:
8350 case T_DOUBLE:
8351 korql(dst, src1, src2);
8352 break;
8353 default:
8354 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8354, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8355 break;
8356 }
8357}
8358
8359void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) {
8360 switch(type) {
8361 case T_BOOLEAN:
8362 case T_BYTE:
8363 kxorbl(dst, src1, src2);
8364 break;
8365 case T_CHAR:
8366 case T_SHORT:
8367 kxorwl(dst, src1, src2);
8368 break;
8369 case T_INT:
8370 case T_FLOAT:
8371 kxordl(dst, src1, src2);
8372 break;
8373 case T_LONG:
8374 case T_DOUBLE:
8375 kxorql(dst, src1, src2);
8376 break;
8377 default:
8378 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8378, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
;
8379 break;
8380 }
8381}
8382
8383void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8384 switch(type) {
8385 case T_BOOLEAN:
8386 case T_BYTE:
8387 evpermb(dst, mask, nds, src, merge, vector_len); break;
8388 case T_CHAR:
8389 case T_SHORT:
8390 evpermw(dst, mask, nds, src, merge, vector_len); break;
8391 case T_INT:
8392 case T_FLOAT:
8393 evpermd(dst, mask, nds, src, merge, vector_len); break;
8394 case T_LONG:
8395 case T_DOUBLE:
8396 evpermq(dst, mask, nds, src, merge, vector_len); break;
8397 default:
8398 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8398, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8399 }
8400}
8401
8402void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8403 switch(type) {
8404 case T_BOOLEAN:
8405 case T_BYTE:
8406 evpermb(dst, mask, nds, src, merge, vector_len); break;
8407 case T_CHAR:
8408 case T_SHORT:
8409 evpermw(dst, mask, nds, src, merge, vector_len); break;
8410 case T_INT:
8411 case T_FLOAT:
8412 evpermd(dst, mask, nds, src, merge, vector_len); break;
8413 case T_LONG:
8414 case T_DOUBLE:
8415 evpermq(dst, mask, nds, src, merge, vector_len); break;
8416 default:
8417 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8417, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8418 }
8419}
8420
8421void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8422 switch(type) {
8423 case T_BYTE:
8424 evpminsb(dst, mask, nds, src, merge, vector_len); break;
8425 case T_SHORT:
8426 evpminsw(dst, mask, nds, src, merge, vector_len); break;
8427 case T_INT:
8428 evpminsd(dst, mask, nds, src, merge, vector_len); break;
8429 case T_LONG:
8430 evpminsq(dst, mask, nds, src, merge, vector_len); break;
8431 default:
8432 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8432, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8433 }
8434}
8435
8436void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8437 switch(type) {
8438 case T_BYTE:
8439 evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
8440 case T_SHORT:
8441 evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
8442 case T_INT:
8443 evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
8444 case T_LONG:
8445 evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8446 default:
8447 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8447, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8448 }
8449}
8450
8451void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8452 switch(type) {
8453 case T_BYTE:
8454 evpminsb(dst, mask, nds, src, merge, vector_len); break;
8455 case T_SHORT:
8456 evpminsw(dst, mask, nds, src, merge, vector_len); break;
8457 case T_INT:
8458 evpminsd(dst, mask, nds, src, merge, vector_len); break;
8459 case T_LONG:
8460 evpminsq(dst, mask, nds, src, merge, vector_len); break;
8461 default:
8462 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8462, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8463 }
8464}
8465
8466void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8467 switch(type) {
8468 case T_BYTE:
8469 evpmaxsb(dst, mask, nds, src, merge, vector_len); break;
8470 case T_SHORT:
8471 evpmaxsw(dst, mask, nds, src, merge, vector_len); break;
8472 case T_INT:
8473 evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
8474 case T_LONG:
8475 evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8476 default:
8477 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8477, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8478 }
8479}
8480
8481void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8482 switch(type) {
8483 case T_INT:
8484 evpxord(dst, mask, nds, src, merge, vector_len); break;
8485 case T_LONG:
8486 evpxorq(dst, mask, nds, src, merge, vector_len); break;
8487 default:
8488 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8488, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8489 }
8490}
8491
8492void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8493 switch(type) {
8494 case T_INT:
8495 evpxord(dst, mask, nds, src, merge, vector_len); break;
8496 case T_LONG:
8497 evpxorq(dst, mask, nds, src, merge, vector_len); break;
8498 default:
8499 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8499, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8500 }
8501}
8502
8503void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8504 switch(type) {
8505 case T_INT:
8506 Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
8507 case T_LONG:
8508 evporq(dst, mask, nds, src, merge, vector_len); break;
8509 default:
8510 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8510, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8511 }
8512}
8513
8514void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8515 switch(type) {
8516 case T_INT:
8517 Assembler::evpord(dst, mask, nds, src, merge, vector_len); break;
8518 case T_LONG:
8519 evporq(dst, mask, nds, src, merge, vector_len); break;
8520 default:
8521 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8521, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8522 }
8523}
8524
8525void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
8526 switch(type) {
8527 case T_INT:
8528 evpandd(dst, mask, nds, src, merge, vector_len); break;
8529 case T_LONG:
8530 evpandq(dst, mask, nds, src, merge, vector_len); break;
8531 default:
8532 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8532, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8533 }
8534}
8535
8536void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
8537 switch(type) {
8538 case T_INT:
8539 evpandd(dst, mask, nds, src, merge, vector_len); break;
8540 case T_LONG:
8541 evpandq(dst, mask, nds, src, merge, vector_len); break;
8542 default:
8543 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8543, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8544 }
8545}
8546
8547void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) {
8548 masklen = masklen < 8 ? 8 : masklen;
8549 ktest(masklen, src1, src2);
8550 setb(Assembler::notZero, dst);
8551 movzbl(dst, dst);
8552}
8553
8554void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) {
8555 if (masklen < 8) {
8556 knotbl(kscratch, src2);
8557 kortestbl(src1, kscratch);
8558 setb(Assembler::carrySet, dst);
8559 movzbl(dst, dst);
8560 } else {
8561 ktest(masklen, src1, src2);
8562 setb(Assembler::carrySet, dst);
8563 movzbl(dst, dst);
8564 }
8565}
8566
8567void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) {
8568 switch(masklen) {
8569 case 8:
8570 kortestbl(src1, src2);
8571 break;
8572 case 16:
8573 kortestwl(src1, src2);
8574 break;
8575 case 32:
8576 kortestdl(src1, src2);
8577 break;
8578 case 64:
8579 kortestql(src1, src2);
8580 break;
8581 default:
8582 fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8582, "Unexpected mask length %d", masklen); ::breakpoint()
; } while (0)
;
8583 break;
8584 }
8585}
8586
8587
8588void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) {
8589 switch(masklen) {
8590 case 8:
8591 ktestbl(src1, src2);
8592 break;
8593 case 16:
8594 ktestwl(src1, src2);
8595 break;
8596 case 32:
8597 ktestdl(src1, src2);
8598 break;
8599 case 64:
8600 ktestql(src1, src2);
8601 break;
8602 default:
8603 fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8603, "Unexpected mask length %d", masklen); ::breakpoint()
; } while (0)
;
8604 break;
8605 }
8606}
8607
8608void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
8609 switch(type) {
8610 case T_INT:
8611 evprold(dst, mask, src, shift, merge, vlen_enc); break;
8612 case T_LONG:
8613 evprolq(dst, mask, src, shift, merge, vlen_enc); break;
8614 default:
8615 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8615, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8616 break;
8617 }
8618}
8619
8620void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) {
8621 switch(type) {
8622 case T_INT:
8623 evprord(dst, mask, src, shift, merge, vlen_enc); break;
8624 case T_LONG:
8625 evprorq(dst, mask, src, shift, merge, vlen_enc); break;
8626 default:
8627 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8627, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8628 }
8629}
8630
8631void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
8632 switch(type) {
8633 case T_INT:
8634 evprolvd(dst, mask, src1, src2, merge, vlen_enc); break;
8635 case T_LONG:
8636 evprolvq(dst, mask, src1, src2, merge, vlen_enc); break;
8637 default:
8638 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8638, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8639 }
8640}
8641
8642void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) {
8643 switch(type) {
8644 case T_INT:
8645 evprorvd(dst, mask, src1, src2, merge, vlen_enc); break;
8646 case T_LONG:
8647 evprorvq(dst, mask, src1, src2, merge, vlen_enc); break;
8648 default:
8649 fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8649, "Unexpected type argument %s", type2name(type)); ::breakpoint
(); } while (0)
; break;
8650 }
8651}
8652#if COMPILER2_OR_JVMCI1
8653
8654void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
8655 Register length, Register temp, int vec_enc) {
8656 // Computing mask for predicated vector store.
8657 movptr(temp, -1);
8658 bzhiq(temp, temp, length);
8659 kmov(mask, temp);
8660 evmovdqu(bt, mask, dst, xmm, vec_enc);
8661}
8662
8663// Set memory operation for length "less than" 64 bytes.
8664void MacroAssembler::fill64_masked(uint shift, Register dst, int disp,
8665 XMMRegister xmm, KRegister mask, Register length,
8666 Register temp, bool use64byteVector) {
8667 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8667, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8668 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
8669 if (!use64byteVector) {
8670 fill32(dst, disp, xmm);
8671 subptr(length, 32 >> shift);
8672 fill32_masked(shift, dst, disp + 32, xmm, mask, length, temp);
8673 } else {
8674 assert(MaxVectorSize == 64, "vector length != 64")do { if (!(MaxVectorSize == 64)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8674, "assert(" "MaxVectorSize == 64" ") failed", "vector length != 64"
); ::breakpoint(); } } while (0)
;
8675 fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_512bit);
8676 }
8677}
8678
8679
8680void MacroAssembler::fill32_masked(uint shift, Register dst, int disp,
8681 XMMRegister xmm, KRegister mask, Register length,
8682 Register temp) {
8683 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8683, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8684 BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
8685 fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_256bit);
8686}
8687
8688
8689void MacroAssembler::fill32(Register dst, int disp, XMMRegister xmm) {
8690 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8690, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8691 vmovdqu(Address(dst, disp), xmm);
8692}
8693
8694void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64byteVector) {
8695 assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8695, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32"
); ::breakpoint(); } } while (0)
;
8696 BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG};
8697 if (!use64byteVector) {
8698 fill32(dst, disp, xmm);
8699 fill32(dst, disp + 32, xmm);
8700 } else {
8701 evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit);
8702 }
8703}
8704
8705#ifdef _LP641
8706void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value,
8707 Register count, Register rtmp, XMMRegister xtmp) {
8708 Label L_exit;
8709 Label L_fill_start;
8710 Label L_fill_64_bytes;
8711 Label L_fill_96_bytes;
8712 Label L_fill_128_bytes;
8713 Label L_fill_128_bytes_loop;
8714 Label L_fill_128_loop_header;
8715 Label L_fill_128_bytes_loop_header;
8716 Label L_fill_128_bytes_loop_pre_header;
8717 Label L_fill_zmm_sequence;
8718
8719 int shift = -1;
8720 int avx3threshold = VM_Version::avx3_threshold();
8721 switch(type) {
8722 case T_BYTE: shift = 0;
8723 break;
8724 case T_SHORT: shift = 1;
8725 break;
8726 case T_INT: shift = 2;
8727 break;
8728 /* Uncomment when LONG fill stubs are supported.
8729 case T_LONG: shift = 3;
8730 break;
8731 */
8732 default:
8733 fatal("Unhandled type: %s\n", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8733, "Unhandled type: %s\n", type2name(type)); ::breakpoint
(); } while (0)
;
8734 }
8735
8736 if ((avx3threshold != 0) || (MaxVectorSize == 32)) {
8737
8738 if (MaxVectorSize == 64) {
8739 cmpq(count, avx3threshold >> shift);
8740 jcc(Assembler::greater, L_fill_zmm_sequence);
8741 }
8742
8743 evpbroadcast(type, xtmp, value, Assembler::AVX_256bit);
8744
8745 bind(L_fill_start);
8746
8747 cmpq(count, 32 >> shift);
8748 jccb(Assembler::greater, L_fill_64_bytes)jccb_0(Assembler::greater, L_fill_64_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8748)
;
8749 fill32_masked(shift, to, 0, xtmp, k2, count, rtmp);
8750 jmp(L_exit);
8751
8752 bind(L_fill_64_bytes);
8753 cmpq(count, 64 >> shift);
8754 jccb(Assembler::greater, L_fill_96_bytes)jccb_0(Assembler::greater, L_fill_96_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8754)
;
8755 fill64_masked(shift, to, 0, xtmp, k2, count, rtmp);
8756 jmp(L_exit);
8757
8758 bind(L_fill_96_bytes);
8759 cmpq(count, 96 >> shift);
8760 jccb(Assembler::greater, L_fill_128_bytes)jccb_0(Assembler::greater, L_fill_128_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8760)
;
8761 fill64(to, 0, xtmp);
8762 subq(count, 64 >> shift);
8763 fill32_masked(shift, to, 64, xtmp, k2, count, rtmp);
8764 jmp(L_exit);
8765
8766 bind(L_fill_128_bytes);
8767 cmpq(count, 128 >> shift);
8768 jccb(Assembler::greater, L_fill_128_bytes_loop_pre_header)jccb_0(Assembler::greater, L_fill_128_bytes_loop_pre_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8768)
;
8769 fill64(to, 0, xtmp);
8770 fill32(to, 64, xtmp);
8771 subq(count, 96 >> shift);
8772 fill32_masked(shift, to, 96, xtmp, k2, count, rtmp);
8773 jmp(L_exit);
8774
8775 bind(L_fill_128_bytes_loop_pre_header);
8776 {
8777 mov(rtmp, to);
8778 andq(rtmp, 31);
8779 jccb(Assembler::zero, L_fill_128_bytes_loop_header)jccb_0(Assembler::zero, L_fill_128_bytes_loop_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8779)
;
8780 negq(rtmp);
8781 addq(rtmp, 32);
8782 mov64(r8, -1L);
8783 bzhiq(r8, r8, rtmp);
8784 kmovql(k2, r8);
8785 evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit);
8786 addq(to, rtmp);
8787 shrq(rtmp, shift);
8788 subq(count, rtmp);
8789 }
8790
8791 cmpq(count, 128 >> shift);
8792 jcc(Assembler::less, L_fill_start);
8793
8794 bind(L_fill_128_bytes_loop_header);
8795 subq(count, 128 >> shift);
8796
8797 align32();
8798 bind(L_fill_128_bytes_loop);
8799 fill64(to, 0, xtmp);
8800 fill64(to, 64, xtmp);
8801 addq(to, 128);
8802 subq(count, 128 >> shift);
8803 jccb(Assembler::greaterEqual, L_fill_128_bytes_loop)jccb_0(Assembler::greaterEqual, L_fill_128_bytes_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8803)
;
8804
8805 addq(count, 128 >> shift);
8806 jcc(Assembler::zero, L_exit);
8807 jmp(L_fill_start);
8808 }
8809
8810 if (MaxVectorSize == 64) {
8811 // Sequence using 64 byte ZMM register.
8812 Label L_fill_128_bytes_zmm;
8813 Label L_fill_192_bytes_zmm;
8814 Label L_fill_192_bytes_loop_zmm;
8815 Label L_fill_192_bytes_loop_header_zmm;
8816 Label L_fill_192_bytes_loop_pre_header_zmm;
8817 Label L_fill_start_zmm_sequence;
8818
8819 bind(L_fill_zmm_sequence);
8820 evpbroadcast(type, xtmp, value, Assembler::AVX_512bit);
8821
8822 bind(L_fill_start_zmm_sequence);
8823 cmpq(count, 64 >> shift);
8824 jccb(Assembler::greater, L_fill_128_bytes_zmm)jccb_0(Assembler::greater, L_fill_128_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8824)
;
8825 fill64_masked(shift, to, 0, xtmp, k2, count, rtmp, true);
8826 jmp(L_exit);
8827
8828 bind(L_fill_128_bytes_zmm);
8829 cmpq(count, 128 >> shift);
8830 jccb(Assembler::greater, L_fill_192_bytes_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8830)
;
8831 fill64(to, 0, xtmp, true);
8832 subq(count, 64 >> shift);
8833 fill64_masked(shift, to, 64, xtmp, k2, count, rtmp, true);
8834 jmp(L_exit);
8835
8836 bind(L_fill_192_bytes_zmm);
8837 cmpq(count, 192 >> shift);
8838 jccb(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm
, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8838)
;
8839 fill64(to, 0, xtmp, true);
8840 fill64(to, 64, xtmp, true);
8841 subq(count, 128 >> shift);
8842 fill64_masked(shift, to, 128, xtmp, k2, count, rtmp, true);
8843 jmp(L_exit);
8844
8845 bind(L_fill_192_bytes_loop_pre_header_zmm);
8846 {
8847 movq(rtmp, to);
8848 andq(rtmp, 63);
8849 jccb(Assembler::zero, L_fill_192_bytes_loop_header_zmm)jccb_0(Assembler::zero, L_fill_192_bytes_loop_header_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8849)
;
8850 negq(rtmp);
8851 addq(rtmp, 64);
8852 mov64(r8, -1L);
8853 bzhiq(r8, r8, rtmp);
8854 kmovql(k2, r8);
8855 evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit);
8856 addq(to, rtmp);
8857 shrq(rtmp, shift);
8858 subq(count, rtmp);
8859 }
8860
8861 cmpq(count, 192 >> shift);
8862 jcc(Assembler::less, L_fill_start_zmm_sequence);
8863
8864 bind(L_fill_192_bytes_loop_header_zmm);
8865 subq(count, 192 >> shift);
8866
8867 align32();
8868 bind(L_fill_192_bytes_loop_zmm);
8869 fill64(to, 0, xtmp, true);
8870 fill64(to, 64, xtmp, true);
8871 fill64(to, 128, xtmp, true);
8872 addq(to, 192);
8873 subq(count, 192 >> shift);
8874 jccb(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm)jccb_0(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8874)
;
8875
8876 addq(count, 192 >> shift);
8877 jcc(Assembler::zero, L_exit);
8878 jmp(L_fill_start_zmm_sequence);
8879 }
8880 bind(L_exit);
8881}
8882#endif
8883#endif //COMPILER2_OR_JVMCI
8884
8885
8886#ifdef _LP641
8887void MacroAssembler::convert_f2i(Register dst, XMMRegister src) {
8888 Label done;
8889 cvttss2sil(dst, src);
8890 // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
8891 cmpl(dst, 0x80000000); // float_sign_flip
8892 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8892)
;
8893 subptr(rsp, 8);
8894 movflt(Address(rsp, 0), src);
8895 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())((address)((address_word)(StubRoutines::x86::f2i_fixup())))));
8896 pop(dst);
8897 bind(done);
8898}
8899
8900void MacroAssembler::convert_d2i(Register dst, XMMRegister src) {
8901 Label done;
8902 cvttsd2sil(dst, src);
8903 // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub
8904 cmpl(dst, 0x80000000); // float_sign_flip
8905 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8905)
;
8906 subptr(rsp, 8);
8907 movdbl(Address(rsp, 0), src);
8908 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())((address)((address_word)(StubRoutines::x86::d2i_fixup())))));
8909 pop(dst);
8910 bind(done);
8911}
8912
8913void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
8914 Label done;
8915 cvttss2siq(dst, src);
8916 cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
8917 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8917)
;
8918 subptr(rsp, 8);
8919 movflt(Address(rsp, 0), src);
8920 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())((address)((address_word)(StubRoutines::x86::f2l_fixup())))));
8921 pop(dst);
8922 bind(done);
8923}
8924
8925void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
8926 Label done;
8927 cvttsd2siq(dst, src);
8928 cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
8929 jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8929)
;
8930 subptr(rsp, 8);
8931 movdbl(Address(rsp, 0), src);
8932 call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())((address)((address_word)(StubRoutines::x86::d2l_fixup())))));
8933 pop(dst);
8934 bind(done);
8935}
8936
8937void MacroAssembler::cache_wb(Address line)
8938{
8939 // 64 bit cpus always support clflush
8940 assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8940, "assert(" "VM_Version::supports_clflush()" ") failed"
, "clflush should be available"); ::breakpoint(); } } while (
0)
;
8941 bool optimized = VM_Version::supports_clflushopt();
8942 bool no_evict = VM_Version::supports_clwb();
8943
8944 // prefer clwb (writeback without evict) otherwise
8945 // prefer clflushopt (potentially parallel writeback with evict)
8946 // otherwise fallback on clflush (serial writeback with evict)
8947
8948 if (optimized) {
8949 if (no_evict) {
8950 clwb(line);
8951 } else {
8952 clflushopt(line);
8953 }
8954 } else {
8955 // no need for fence when using CLFLUSH
8956 clflush(line);
8957 }
8958}
8959
8960void MacroAssembler::cache_wbsync(bool is_pre)
8961{
8962 assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8962, "assert(" "VM_Version::supports_clflush()" ") failed"
, "clflush should be available"); ::breakpoint(); } } while (
0)
;
8963 bool optimized = VM_Version::supports_clflushopt();
8964 bool no_evict = VM_Version::supports_clwb();
8965
8966 // pick the correct implementation
8967
8968 if (!is_pre && (optimized || no_evict)) {
8969 // need an sfence for post flush when using clflushopt or clwb
8970 // otherwise no no need for any synchroniaztion
8971
8972 sfence();
8973 }
8974}
8975
8976#endif // _LP64
8977
8978Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8979 switch (cond) {
8980 // Note some conditions are synonyms for others
8981 case Assembler::zero: return Assembler::notZero;
8982 case Assembler::notZero: return Assembler::zero;
8983 case Assembler::less: return Assembler::greaterEqual;
8984 case Assembler::lessEqual: return Assembler::greater;
8985 case Assembler::greater: return Assembler::lessEqual;
8986 case Assembler::greaterEqual: return Assembler::less;
8987 case Assembler::below: return Assembler::aboveEqual;
8988 case Assembler::belowEqual: return Assembler::above;
8989 case Assembler::above: return Assembler::belowEqual;
8990 case Assembler::aboveEqual: return Assembler::below;
8991 case Assembler::overflow: return Assembler::noOverflow;
8992 case Assembler::noOverflow: return Assembler::overflow;
8993 case Assembler::negative: return Assembler::positive;
8994 case Assembler::positive: return Assembler::negative;
8995 case Assembler::parity: return Assembler::noParity;
8996 case Assembler::noParity: return Assembler::parity;
8997 }
8998 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp"
, 8998); ::breakpoint(); } while (0)
; return Assembler::overflow;
8999}
9000
9001SkipIfEqual::SkipIfEqual(
9002 MacroAssembler* masm, const bool* flag_addr, bool value) {
9003 _masm = masm;
9004 _masm->cmp8(ExternalAddress((address)flag_addr), value);
9005 _masm->jcc(Assembler::equal, _label);
9006}
9007
9008SkipIfEqual::~SkipIfEqual() {
9009 _masm->bind(_label);
9010}
9011
9012// 32-bit Windows has its own fast-path implementation
9013// of get_thread
9014#if !defined(WIN32) || defined(_LP641)
9015
9016// This is simply a call to Thread::current()
9017void MacroAssembler::get_thread(Register thread) {
9018 if (thread != rax) {
9019 push(rax);
9020 }
9021 LP64_ONLY(push(rdi);)push(rdi);
9022 LP64_ONLY(push(rsi);)push(rsi);
9023 push(rdx);
9024 push(rcx);
9025#ifdef _LP641
9026 push(r8);
9027 push(r9);
9028 push(r10);
9029 push(r11);
9030#endif
9031
9032 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current)((address)((address_word)(Thread::current))), 0);
9033
9034#ifdef _LP641
9035 pop(r11);
9036 pop(r10);
9037 pop(r9);
9038 pop(r8);
9039#endif
9040 pop(rcx);
9041 pop(rdx);
9042 LP64_ONLY(pop(rsi);)pop(rsi);
9043 LP64_ONLY(pop(rdi);)pop(rdi);
9044 if (thread != rax) {
9045 mov(thread, rax);
9046 pop(rax);
9047 }
9048}
9049
9050
9051#endif // !WIN32 || _LP64

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_ASSEMBLER_X86_HPP
26#define CPU_X86_ASSEMBLER_X86_HPP
27
28#include "asm/register.hpp"
29#include "utilities/powerOfTwo.hpp"
30
31// Contains all the definitions needed for x86 assembly code generation.
32
33// Calling convention
34class Argument {
35 public:
36 enum {
37#ifdef _LP641
38#ifdef _WIN64
39 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
40 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
41 n_int_register_returns_c = 1, // rax
42 n_float_register_returns_c = 1, // xmm0
43#else
44 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
45 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
46 n_int_register_returns_c = 2, // rax, rdx
47 n_float_register_returns_c = 2, // xmm0, xmm1
48#endif // _WIN64
49 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
50 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
51#else
52 n_register_parameters = 0 // 0 registers used to pass arguments
53#endif // _LP64
54 };
55};
56
57
58#ifdef _LP641
59// Symbolically name the register arguments used by the c calling convention.
60// Windows is different from linux/solaris. So much for standards...
61
62#ifdef _WIN64
63
64REGISTER_DECLARATION(Register, c_rarg0, rcx)const Register c_rarg0 = ((Register)rcx);
65REGISTER_DECLARATION(Register, c_rarg1, rdx)const Register c_rarg1 = ((Register)rdx);
66REGISTER_DECLARATION(Register, c_rarg2, r8)const Register c_rarg2 = ((Register)r8);
67REGISTER_DECLARATION(Register, c_rarg3, r9)const Register c_rarg3 = ((Register)r9);
68
69REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
70REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
71REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
72REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
73
74#else
75
76REGISTER_DECLARATION(Register, c_rarg0, rdi)const Register c_rarg0 = ((Register)rdi);
77REGISTER_DECLARATION(Register, c_rarg1, rsi)const Register c_rarg1 = ((Register)rsi);
78REGISTER_DECLARATION(Register, c_rarg2, rdx)const Register c_rarg2 = ((Register)rdx);
79REGISTER_DECLARATION(Register, c_rarg3, rcx)const Register c_rarg3 = ((Register)rcx);
80REGISTER_DECLARATION(Register, c_rarg4, r8)const Register c_rarg4 = ((Register)r8);
81REGISTER_DECLARATION(Register, c_rarg5, r9)const Register c_rarg5 = ((Register)r9);
82
83REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
84REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
85REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
86REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
87REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4)const XMMRegister c_farg4 = ((XMMRegister)xmm4);
88REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5)const XMMRegister c_farg5 = ((XMMRegister)xmm5);
89REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6)const XMMRegister c_farg6 = ((XMMRegister)xmm6);
90REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7)const XMMRegister c_farg7 = ((XMMRegister)xmm7);
91
92#endif // _WIN64
93
94// Symbolically name the register arguments used by the Java calling convention.
95// We have control over the convention for java so we can do what we please.
96// What pleases us is to offset the java calling convention so that when
97// we call a suitable jni method the arguments are lined up and we don't
98// have to do little shuffling. A suitable jni method is non-static and a
99// small number of arguments (two fewer args on windows)
100//
101// |-------------------------------------------------------|
102// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
103// |-------------------------------------------------------|
104// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
105// | rdi rsi rdx rcx r8 r9 | solaris/linux
106// |-------------------------------------------------------|
107// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
108// |-------------------------------------------------------|
109
110REGISTER_DECLARATION(Register, j_rarg0, c_rarg1)const Register j_rarg0 = ((Register)c_rarg1);
111REGISTER_DECLARATION(Register, j_rarg1, c_rarg2)const Register j_rarg1 = ((Register)c_rarg2);
112REGISTER_DECLARATION(Register, j_rarg2, c_rarg3)const Register j_rarg2 = ((Register)c_rarg3);
113// Windows runs out of register args here
114#ifdef _WIN64
115REGISTER_DECLARATION(Register, j_rarg3, rdi)const Register j_rarg3 = ((Register)rdi);
116REGISTER_DECLARATION(Register, j_rarg4, rsi)const Register j_rarg4 = ((Register)rsi);
117#else
118REGISTER_DECLARATION(Register, j_rarg3, c_rarg4)const Register j_rarg3 = ((Register)c_rarg4);
119REGISTER_DECLARATION(Register, j_rarg4, c_rarg5)const Register j_rarg4 = ((Register)c_rarg5);
120#endif /* _WIN64 */
121REGISTER_DECLARATION(Register, j_rarg5, c_rarg0)const Register j_rarg5 = ((Register)c_rarg0);
122
123REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0)const XMMRegister j_farg0 = ((XMMRegister)xmm0);
124REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1)const XMMRegister j_farg1 = ((XMMRegister)xmm1);
125REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2)const XMMRegister j_farg2 = ((XMMRegister)xmm2);
126REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3)const XMMRegister j_farg3 = ((XMMRegister)xmm3);
127REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4)const XMMRegister j_farg4 = ((XMMRegister)xmm4);
128REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5)const XMMRegister j_farg5 = ((XMMRegister)xmm5);
129REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6)const XMMRegister j_farg6 = ((XMMRegister)xmm6);
130REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7)const XMMRegister j_farg7 = ((XMMRegister)xmm7);
131
132REGISTER_DECLARATION(Register, rscratch1, r10)const Register rscratch1 = ((Register)r10); // volatile
133REGISTER_DECLARATION(Register, rscratch2, r11)const Register rscratch2 = ((Register)r11); // volatile
134
135REGISTER_DECLARATION(Register, r12_heapbase, r12)const Register r12_heapbase = ((Register)r12); // callee-saved
136REGISTER_DECLARATION(Register, r15_thread, r15)const Register r15_thread = ((Register)r15); // callee-saved
137
138#else
139// rscratch1 will apear in 32bit code that is dead but of course must compile
140// Using noreg ensures if the dead code is incorrectly live and executed it
141// will cause an assertion failure
142#define rscratch1 noreg
143#define rscratch2 noreg
144
145#endif // _LP64
146
147// JSR 292
148// On x86, the SP does not have to be saved when invoking method handle intrinsics
149// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
150REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg)const Register rbp_mh_SP_save = ((Register)noreg);
151
152// Address is an abstraction used to represent a memory location
153// using any of the amd64 addressing modes with one object.
154//
155// Note: A register location is represented via a Register, not
156// via an address for efficiency & simplicity reasons.
157
158class ArrayAddress;
159
160class Address {
161 public:
162 enum ScaleFactor {
163 no_scale = -1,
164 times_1 = 0,
165 times_2 = 1,
166 times_4 = 2,
167 times_8 = 3,
168 times_ptr = LP64_ONLY(times_8)times_8 NOT_LP64(times_4)
169 };
170 static ScaleFactor times(int size) {
171 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size")do { if (!(size >= 1 && size <= 8 && is_power_of_2
(size))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 171, "assert(" "size >= 1 && size <= 8 && is_power_of_2(size)"
") failed", "bad scale size"); ::breakpoint(); } } while (0)
;
172 if (size == 8) return times_8;
173 if (size == 4) return times_4;
174 if (size == 2) return times_2;
175 return times_1;
176 }
177 static int scale_size(ScaleFactor scale) {
178 assert(scale != no_scale, "")do { if (!(scale != no_scale)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 178, "assert(" "scale != no_scale" ") failed", ""); ::breakpoint
(); } } while (0)
;
179 assert(((1 << (int)times_1) == 1 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
180 (1 << (int)times_2) == 2 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
181 (1 << (int)times_4) == 4 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
182 (1 << (int)times_8) == 8), "")do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
;
183 return (1 << (int)scale);
184 }
185
186 private:
187 Register _base;
188 Register _index;
189 XMMRegister _xmmindex;
190 ScaleFactor _scale;
191 int _disp;
192 bool _isxmmindex;
193 RelocationHolder _rspec;
194
195 // Easily misused constructors make them private
196 // %%% can we make these go away?
197 NOT_LP64(Address(address loc, RelocationHolder spec);)
198 Address(int disp, address loc, relocInfo::relocType rtype);
199 Address(int disp, address loc, RelocationHolder spec);
200
201 public:
202
203 int disp() { return _disp; }
204 // creation
205 Address()
206 : _base(noreg),
207 _index(noreg),
208 _xmmindex(xnoreg),
209 _scale(no_scale),
210 _disp(0),
211 _isxmmindex(false){
212 }
213
214 // No default displacement otherwise Register can be implicitly
215 // converted to 0(Register) which is quite a different animal.
216
217 Address(Register base, int disp)
218 : _base(base),
219 _index(noreg),
220 _xmmindex(xnoreg),
221 _scale(no_scale),
222 _disp(disp),
223 _isxmmindex(false){
224 }
225
226 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
227 : _base (base),
228 _index(index),
229 _xmmindex(xnoreg),
230 _scale(scale),
231 _disp (disp),
232 _isxmmindex(false) {
233 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
5
Called C++ object pointer is null
234 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
235 }
236
237 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
238 : _base (base),
239 _index(index.register_or_noreg()),
240 _xmmindex(xnoreg),
241 _scale(scale),
242 _disp (disp + (index.constant_or_zero() * scale_size(scale))),
243 _isxmmindex(false){
244 if (!index.is_register()) scale = Address::no_scale;
245 assert(!_index->is_valid() == (scale == Address::no_scale),do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
246 "inconsistent address")do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
247 }
248
249 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
250 : _base (base),
251 _index(noreg),
252 _xmmindex(index),
253 _scale(scale),
254 _disp(disp),
255 _isxmmindex(true) {
256 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
257 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
258 }
259
260 // The following overloads are used in connection with the
261 // ByteSize type (see sizes.hpp). They simplify the use of
262 // ByteSize'd arguments in assembly code.
263
264 Address(Register base, ByteSize disp)
265 : Address(base, in_bytes(disp)) {}
266
267 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
268 : Address(base, index, scale, in_bytes(disp)) {}
269
270 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
271 : Address(base, index, scale, in_bytes(disp)) {}
272
273 Address plus_disp(int disp) const {
274 Address a = (*this);
275 a._disp += disp;
276 return a;
277 }
278 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
279 Address a = (*this);
280 a._disp += disp.constant_or_zero() * scale_size(scale);
281 if (disp.is_register()) {
282 assert(!a.index()->is_valid(), "competing indexes")do { if (!(!a.index()->is_valid())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 282, "assert(" "!a.index()->is_valid()" ") failed", "competing indexes"
); ::breakpoint(); } } while (0)
;
283 a._index = disp.as_register();
284 a._scale = scale;
285 }
286 return a;
287 }
288 bool is_same_address(Address a) const {
289 // disregard _rspec
290 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
291 }
292
293 // accessors
294 bool uses(Register reg) const { return _base == reg || _index == reg; }
295 Register base() const { return _base; }
296 Register index() const { return _index; }
297 XMMRegister xmmindex() const { return _xmmindex; }
298 ScaleFactor scale() const { return _scale; }
299 int disp() const { return _disp; }
300 bool isxmmindex() const { return _isxmmindex; }
301
302 // Convert the raw encoding form into the form expected by the constructor for
303 // Address. An index of 4 (rsp) corresponds to having no index, so convert
304 // that to noreg for the Address constructor.
305 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
306
307 static Address make_array(ArrayAddress);
308
309 private:
310 bool base_needs_rex() const {
311 return _base->is_valid() && _base->encoding() >= 8;
312 }
313
314 bool index_needs_rex() const {
315 return _index->is_valid() &&_index->encoding() >= 8;
316 }
317
318 bool xmmindex_needs_rex() const {
319 return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
320 }
321
322 relocInfo::relocType reloc() const { return _rspec.type(); }
323
324 friend class Assembler;
325 friend class MacroAssembler;
326 friend class LIR_Assembler; // base/index/scale/disp
327};
328
329//
330// AddressLiteral has been split out from Address because operands of this type
331// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
332// the few instructions that need to deal with address literals are unique and the
333// MacroAssembler does not have to implement every instruction in the Assembler
334// in order to search for address literals that may need special handling depending
335// on the instruction and the platform. As small step on the way to merging i486/amd64
336// directories.
337//
338class AddressLiteral {
339 friend class ArrayAddress;
340 RelocationHolder _rspec;
341 // Typically we use AddressLiterals we want to use their rval
342 // However in some situations we want the lval (effect address) of the item.
343 // We provide a special factory for making those lvals.
344 bool _is_lval;
345
346 // If the target is far we'll need to load the ea of this to
347 // a register to reach it. Otherwise if near we can do rip
348 // relative addressing.
349
350 address _target;
351
352 protected:
353 // creation
354 AddressLiteral()
355 : _is_lval(false),
356 _target(NULL__null)
357 {}
358
359 public:
360
361
362 AddressLiteral(address target, relocInfo::relocType rtype);
363
364 AddressLiteral(address target, RelocationHolder const& rspec)
365 : _rspec(rspec),
366 _is_lval(false),
367 _target(target)
368 {}
369
370 AddressLiteral addr() {
371 AddressLiteral ret = *this;
372 ret._is_lval = true;
373 return ret;
374 }
375
376
377 private:
378
379 address target() { return _target; }
380 bool is_lval() { return _is_lval; }
381
382 relocInfo::relocType reloc() const { return _rspec.type(); }
383 const RelocationHolder& rspec() const { return _rspec; }
384
385 friend class Assembler;
386 friend class MacroAssembler;
387 friend class Address;
388 friend class LIR_Assembler;
389};
390
391// Convience classes
392class RuntimeAddress: public AddressLiteral {
393
394 public:
395
396 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
397
398};
399
400class ExternalAddress: public AddressLiteral {
401 private:
402 static relocInfo::relocType reloc_for_target(address target) {
403 // Sometimes ExternalAddress is used for values which aren't
404 // exactly addresses, like the card table base.
405 // external_word_type can't be used for values in the first page
406 // so just skip the reloc in that case.
407 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
408 }
409
410 public:
411
412 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
413
414};
415
416class InternalAddress: public AddressLiteral {
417
418 public:
419
420 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
421
422};
423
424// x86 can do array addressing as a single operation since disp can be an absolute
425// address amd64 can't. We create a class that expresses the concept but does extra
426// magic on amd64 to get the final result
427
428class ArrayAddress {
429 private:
430
431 AddressLiteral _base;
432 Address _index;
433
434 public:
435
436 ArrayAddress() {};
437 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
438 AddressLiteral base() { return _base; }
439 Address index() { return _index; }
440
441};
442
443class InstructionAttr;
444
445// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
446// See fxsave and xsave(EVEX enabled) documentation for layout
447const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize)2688 / wordSize;
448
449// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
450// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
451// is what you get. The Assembler is generating code into a CodeBuffer.
452
453class Assembler : public AbstractAssembler {
454 friend class AbstractAssembler; // for the non-virtual hack
455 friend class LIR_Assembler; // as_Address()
456 friend class StubGenerator;
457
458 public:
459 enum Condition { // The x86 condition codes used for conditional jumps/moves.
460 zero = 0x4,
461 notZero = 0x5,
462 equal = 0x4,
463 notEqual = 0x5,
464 less = 0xc,
465 lessEqual = 0xe,
466 greater = 0xf,
467 greaterEqual = 0xd,
468 below = 0x2,
469 belowEqual = 0x6,
470 above = 0x7,
471 aboveEqual = 0x3,
472 overflow = 0x0,
473 noOverflow = 0x1,
474 carrySet = 0x2,
475 carryClear = 0x3,
476 negative = 0x8,
477 positive = 0x9,
478 parity = 0xa,
479 noParity = 0xb
480 };
481
482 enum Prefix {
483 // segment overrides
484 CS_segment = 0x2e,
485 SS_segment = 0x36,
486 DS_segment = 0x3e,
487 ES_segment = 0x26,
488 FS_segment = 0x64,
489 GS_segment = 0x65,
490
491 REX = 0x40,
492
493 REX_B = 0x41,
494 REX_X = 0x42,
495 REX_XB = 0x43,
496 REX_R = 0x44,
497 REX_RB = 0x45,
498 REX_RX = 0x46,
499 REX_RXB = 0x47,
500
501 REX_W = 0x48,
502
503 REX_WB = 0x49,
504 REX_WX = 0x4A,
505 REX_WXB = 0x4B,
506 REX_WR = 0x4C,
507 REX_WRB = 0x4D,
508 REX_WRX = 0x4E,
509 REX_WRXB = 0x4F,
510
511 VEX_3bytes = 0xC4,
512 VEX_2bytes = 0xC5,
513 EVEX_4bytes = 0x62,
514 Prefix_EMPTY = 0x0
515 };
516
517 enum VexPrefix {
518 VEX_B = 0x20,
519 VEX_X = 0x40,
520 VEX_R = 0x80,
521 VEX_W = 0x80
522 };
523
524 enum ExexPrefix {
525 EVEX_F = 0x04,
526 EVEX_V = 0x08,
527 EVEX_Rb = 0x10,
528 EVEX_X = 0x40,
529 EVEX_Z = 0x80
530 };
531
532 enum VexSimdPrefix {
533 VEX_SIMD_NONE = 0x0,
534 VEX_SIMD_66 = 0x1,
535 VEX_SIMD_F3 = 0x2,
536 VEX_SIMD_F2 = 0x3
537 };
538
539 enum VexOpcode {
540 VEX_OPCODE_NONE = 0x0,
541 VEX_OPCODE_0F = 0x1,
542 VEX_OPCODE_0F_38 = 0x2,
543 VEX_OPCODE_0F_3A = 0x3,
544 VEX_OPCODE_MASK = 0x1F
545 };
546
547 enum AvxVectorLen {
548 AVX_128bit = 0x0,
549 AVX_256bit = 0x1,
550 AVX_512bit = 0x2,
551 AVX_NoVec = 0x4
552 };
553
554 enum EvexTupleType {
555 EVEX_FV = 0,
556 EVEX_HV = 4,
557 EVEX_FVM = 6,
558 EVEX_T1S = 7,
559 EVEX_T1F = 11,
560 EVEX_T2 = 13,
561 EVEX_T4 = 15,
562 EVEX_T8 = 17,
563 EVEX_HVM = 18,
564 EVEX_QVM = 19,
565 EVEX_OVM = 20,
566 EVEX_M128 = 21,
567 EVEX_DUP = 22,
568 EVEX_ETUP = 23
569 };
570
571 enum EvexInputSizeInBits {
572 EVEX_8bit = 0,
573 EVEX_16bit = 1,
574 EVEX_32bit = 2,
575 EVEX_64bit = 3,
576 EVEX_NObit = 4
577 };
578
579 enum WhichOperand {
580 // input to locate_operand, and format code for relocations
581 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
582 disp32_operand = 1, // embedded 32-bit displacement or address
583 call32_operand = 2, // embedded 32-bit self-relative displacement
584#ifndef _LP641
585 _WhichOperand_limit = 3
586#else
587 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
588 _WhichOperand_limit = 4
589#endif
590 };
591
592 // Comparison predicates for integral types & FP types when using SSE
593 enum ComparisonPredicate {
594 eq = 0,
595 lt = 1,
596 le = 2,
597 _false = 3,
598 neq = 4,
599 nlt = 5,
600 nle = 6,
601 _true = 7
602 };
603
604 // Comparison predicates for FP types when using AVX
605 // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
606 // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
607 enum ComparisonPredicateFP {
608 EQ_OQ = 0,
609 LT_OS = 1,
610 LE_OS = 2,
611 UNORD_Q = 3,
612 NEQ_UQ = 4,
613 NLT_US = 5,
614 NLE_US = 6,
615 ORD_Q = 7,
616 EQ_UQ = 8,
617 NGE_US = 9,
618 NGT_US = 0xA,
619 FALSE_OQ = 0XB,
620 NEQ_OQ = 0xC,
621 GE_OS = 0xD,
622 GT_OS = 0xE,
623 TRUE_UQ = 0xF,
624 EQ_OS = 0x10,
625 LT_OQ = 0x11,
626 LE_OQ = 0x12,
627 UNORD_S = 0x13,
628 NEQ_US = 0x14,
629 NLT_UQ = 0x15,
630 NLE_UQ = 0x16,
631 ORD_S = 0x17,
632 EQ_US = 0x18,
633 NGE_UQ = 0x19,
634 NGT_UQ = 0x1A,
635 FALSE_OS = 0x1B,
636 NEQ_OS = 0x1C,
637 GE_OQ = 0x1D,
638 GT_OQ = 0x1E,
639 TRUE_US =0x1F
640 };
641
642 enum Width {
643 B = 0,
644 W = 1,
645 D = 2,
646 Q = 3
647 };
648
649 //---< calculate length of instruction >---
650 // As instruction size can't be found out easily on x86/x64,
651 // we just use '4' for len and maxlen.
652 // instruction must start at passed address
653 static unsigned int instr_len(unsigned char *instr) { return 4; }
654
655 //---< longest instructions >---
656 // Max instruction length is not specified in architecture documentation.
657 // We could use a "safe enough" estimate (15), but just default to
658 // instruction length guess from above.
659 static unsigned int instr_maxlen() { return 4; }
660
661 // NOTE: The general philopsophy of the declarations here is that 64bit versions
662 // of instructions are freely declared without the need for wrapping them an ifdef.
663 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
664 // In the .cpp file the implementations are wrapped so that they are dropped out
665 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
666 // to the size it was prior to merging up the 32bit and 64bit assemblers.
667 //
668 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
669 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
670
671private:
672
673 bool _legacy_mode_bw;
674 bool _legacy_mode_dq;
675 bool _legacy_mode_vl;
676 bool _legacy_mode_vlbw;
677 NOT_LP64(bool _is_managed;)
678
679 class InstructionAttr *_attributes;
680
681 // 64bit prefixes
682 void prefix(Register reg);
683 void prefix(Register dst, Register src, Prefix p);
684 void prefix(Register dst, Address adr, Prefix p);
685
686 void prefix(Address adr);
687 void prefix(Address adr, Register reg, bool byteinst = false);
688 void prefix(Address adr, XMMRegister reg);
689
690 int prefix_and_encode(int reg_enc, bool byteinst = false);
691 int prefix_and_encode(int dst_enc, int src_enc) {
692 return prefix_and_encode(dst_enc, false, src_enc, false);
693 }
694 int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
695
696 // Some prefixq variants always emit exactly one prefix byte, so besides a
697 // prefix-emitting method we provide a method to get the prefix byte to emit,
698 // which can then be folded into a byte stream.
699 int8_t get_prefixq(Address adr);
700 int8_t get_prefixq(Address adr, Register reg);
701
702 void prefixq(Address adr);
703 void prefixq(Address adr, Register reg);
704 void prefixq(Address adr, XMMRegister reg);
705
706 int prefixq_and_encode(int reg_enc);
707 int prefixq_and_encode(int dst_enc, int src_enc);
708
709 void rex_prefix(Address adr, XMMRegister xreg,
710 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
711 int rex_prefix_and_encode(int dst_enc, int src_enc,
712 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
713
714 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
715
716 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
717 int nds_enc, VexSimdPrefix pre, VexOpcode opc);
718
719 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
720 VexSimdPrefix pre, VexOpcode opc,
721 InstructionAttr *attributes);
722
723 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
724 VexSimdPrefix pre, VexOpcode opc,
725 InstructionAttr *attributes);
726
727 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
728 VexOpcode opc, InstructionAttr *attributes);
729
730 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
731 VexOpcode opc, InstructionAttr *attributes);
732
733 // Helper functions for groups of instructions
734 void emit_arith_b(int op1, int op2, Register dst, int imm8);
735
736 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
737 // Force generation of a 4 byte immediate value even if it fits into 8bit
738 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
739 void emit_arith(int op1, int op2, Register dst, Register src);
740
741 bool emit_compressed_disp_byte(int &disp);
742
743 void emit_modrm(int mod, int dst_enc, int src_enc);
744 void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
745 int disp);
746 void emit_modrm_sib(int mod, int dst_enc, int src_enc,
747 Address::ScaleFactor scale, int index_enc, int base_enc);
748 void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
749 Address::ScaleFactor scale, int index_enc, int base_enc,
750 int disp);
751
752 void emit_operand_helper(int reg_enc,
753 int base_enc, int index_enc, Address::ScaleFactor scale,
754 int disp,
755 RelocationHolder const& rspec,
756 int rip_relative_correction = 0);
757
758 void emit_operand(Register reg,
759 Register base, Register index, Address::ScaleFactor scale,
760 int disp,
761 RelocationHolder const& rspec,
762 int rip_relative_correction = 0);
763
764 void emit_operand(Register reg,
765 Register base, XMMRegister index, Address::ScaleFactor scale,
766 int disp,
767 RelocationHolder const& rspec);
768
769 void emit_operand(XMMRegister xreg,
770 Register base, XMMRegister xindex, Address::ScaleFactor scale,
771 int disp,
772 RelocationHolder const& rspec);
773
774 void emit_operand(Register reg, Address adr,
775 int rip_relative_correction = 0);
776
777 void emit_operand(XMMRegister reg,
778 Register base, Register index, Address::ScaleFactor scale,
779 int disp,
780 RelocationHolder const& rspec);
781
782 void emit_operand(XMMRegister reg, Address adr);
783
784 // Immediate-to-memory forms
785 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
786
787 protected:
788 #ifdef ASSERT1
789 void check_relocation(RelocationHolder const& rspec, int format);
790 #endif
791
792 void emit_data(jint data, relocInfo::relocType rtype, int format);
793 void emit_data(jint data, RelocationHolder const& rspec, int format);
794 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
795 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
796
797 bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
798
799 // These are all easily abused and hence protected
800
801 // 32BIT ONLY SECTION
802#ifndef _LP641
803 // Make these disappear in 64bit mode since they would never be correct
804 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
805 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
806
807 void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
808 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
809
810 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
811#else
812 // 64BIT ONLY SECTION
813 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
814
815 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
816 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
817
818 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
819 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
820#endif // _LP64
821
822 // These are unique in that we are ensured by the caller that the 32bit
823 // relative in these instructions will always be able to reach the potentially
824 // 64bit address described by entry. Since they can take a 64bit address they
825 // don't have the 32 suffix like the other instructions in this class.
826
827 void call_literal(address entry, RelocationHolder const& rspec);
828 void jmp_literal(address entry, RelocationHolder const& rspec);
829
830 // Avoid using directly section
831 // Instructions in this section are actually usable by anyone without danger
832 // of failure but have performance issues that are addressed my enhanced
833 // instructions which will do the proper thing base on the particular cpu.
834 // We protect them because we don't trust you...
835
836 // Don't use next inc() and dec() methods directly. INC & DEC instructions
837 // could cause a partial flag stall since they don't set CF flag.
838 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
839 // which call inc() & dec() or add() & sub() in accordance with
840 // the product flag UseIncDec value.
841
842 void decl(Register dst);
843 void decl(Address dst);
844 void decq(Address dst);
845
846 void incl(Register dst);
847 void incl(Address dst);
848 void incq(Register dst);
849 void incq(Address dst);
850
851 // New cpus require use of movsd and movss to avoid partial register stall
852 // when loading from memory. But for old Opteron use movlpd instead of movsd.
853 // The selection is done in MacroAssembler::movdbl() and movflt().
854
855 // Move Scalar Single-Precision Floating-Point Values
856 void movss(XMMRegister dst, Address src);
857 void movss(XMMRegister dst, XMMRegister src);
858 void movss(Address dst, XMMRegister src);
859
860 // Move Scalar Double-Precision Floating-Point Values
861 void movsd(XMMRegister dst, Address src);
862 void movsd(XMMRegister dst, XMMRegister src);
863 void movsd(Address dst, XMMRegister src);
864 void movlpd(XMMRegister dst, Address src);
865
866 // New cpus require use of movaps and movapd to avoid partial register stall
867 // when moving between registers.
868 void movaps(XMMRegister dst, XMMRegister src);
869 void movapd(XMMRegister dst, XMMRegister src);
870
871 // End avoid using directly
872
873
874 // Instruction prefixes
875 void prefix(Prefix p);
876
877 public:
878
879 // Creation
880 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
881 init_attributes();
882 }
883
884 // Decoding
885 static address locate_operand(address inst, WhichOperand which);
886 static address locate_next_instruction(address inst);
887
888 // Utilities
889 static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
890 int cur_tuple_type, int in_size_in_bits, int cur_encoding);
891
892 // Generic instructions
893 // Does 32bit or 64bit as needed for the platform. In some sense these
894 // belong in macro assembler but there is no need for both varieties to exist
895
896 void init_attributes(void);
897
898 void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
899 void clear_attributes(void) { _attributes = NULL__null; }
900
901 void set_managed(void) { NOT_LP64(_is_managed = true;) }
902 void clear_managed(void) { NOT_LP64(_is_managed = false;) }
903 bool is_managed(void) {
904 NOT_LP64(return _is_managed;)
905 LP64_ONLY(return false;)return false; }
906
907 void lea(Register dst, Address src);
908
909 void mov(Register dst, Register src);
910
911#ifdef _LP641
912 // support caching the result of some routines
913
914 // must be called before pusha(), popa(), vzeroupper() - checked with asserts
915 static void precompute_instructions();
916
917 void pusha_uncached();
918 void popa_uncached();
919#endif
920 void vzeroupper_uncached();
921 void decq(Register dst);
922
923 void pusha();
924 void popa();
925
926 void pushf();
927 void popf();
928
929 void push(int32_t imm32);
930
931 void push(Register src);
932
933 void pop(Register dst);
934
935 // These are dummies to prevent surprise implicit conversions to Register
936 void push(void* v);
937 void pop(void* v);
938
939 // These do register sized moves/scans
940 void rep_mov();
941 void rep_stos();
942 void rep_stosb();
943 void repne_scan();
944#ifdef _LP641
945 void repne_scanl();
946#endif
947
948 // Vanilla instructions in lexical order
949
950 void adcl(Address dst, int32_t imm32);
951 void adcl(Address dst, Register src);
952 void adcl(Register dst, int32_t imm32);
953 void adcl(Register dst, Address src);
954 void adcl(Register dst, Register src);
955
956 void adcq(Register dst, int32_t imm32);
957 void adcq(Register dst, Address src);
958 void adcq(Register dst, Register src);
959
960 void addb(Address dst, int imm8);
961 void addw(Register dst, Register src);
962 void addw(Address dst, int imm16);
963
964 void addl(Address dst, int32_t imm32);
965 void addl(Address dst, Register src);
966 void addl(Register dst, int32_t imm32);
967 void addl(Register dst, Address src);
968 void addl(Register dst, Register src);
969
970 void addq(Address dst, int32_t imm32);
971 void addq(Address dst, Register src);
972 void addq(Register dst, int32_t imm32);
973 void addq(Register dst, Address src);
974 void addq(Register dst, Register src);
975
976#ifdef _LP641
977 //Add Unsigned Integers with Carry Flag
978 void adcxq(Register dst, Register src);
979
980 //Add Unsigned Integers with Overflow Flag
981 void adoxq(Register dst, Register src);
982#endif
983
984 void addr_nop_4();
985 void addr_nop_5();
986 void addr_nop_7();
987 void addr_nop_8();
988
989 // Add Scalar Double-Precision Floating-Point Values
990 void addsd(XMMRegister dst, Address src);
991 void addsd(XMMRegister dst, XMMRegister src);
992
993 // Add Scalar Single-Precision Floating-Point Values
994 void addss(XMMRegister dst, Address src);
995 void addss(XMMRegister dst, XMMRegister src);
996
997 // AES instructions
998 void aesdec(XMMRegister dst, Address src);
999 void aesdec(XMMRegister dst, XMMRegister src);
1000 void aesdeclast(XMMRegister dst, Address src);
1001 void aesdeclast(XMMRegister dst, XMMRegister src);
1002 void aesenc(XMMRegister dst, Address src);
1003 void aesenc(XMMRegister dst, XMMRegister src);
1004 void aesenclast(XMMRegister dst, Address src);
1005 void aesenclast(XMMRegister dst, XMMRegister src);
1006 // Vector AES instructions
1007 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1008 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1009 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1010 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1011
1012 void andw(Register dst, Register src);
1013 void andb(Address dst, Register src);
1014
1015 void andl(Address dst, int32_t imm32);
1016 void andl(Register dst, int32_t imm32);
1017 void andl(Register dst, Address src);
1018 void andl(Register dst, Register src);
1019 void andl(Address dst, Register src);
1020
1021 void andq(Address dst, int32_t imm32);
1022 void andq(Register dst, int32_t imm32);
1023 void andq(Register dst, Address src);
1024 void andq(Register dst, Register src);
1025 void andq(Address dst, Register src);
1026
1027 // BMI instructions
1028 void andnl(Register dst, Register src1, Register src2);
1029 void andnl(Register dst, Register src1, Address src2);
1030 void andnq(Register dst, Register src1, Register src2);
1031 void andnq(Register dst, Register src1, Address src2);
1032
1033 void blsil(Register dst, Register src);
1034 void blsil(Register dst, Address src);
1035 void blsiq(Register dst, Register src);
1036 void blsiq(Register dst, Address src);
1037
1038 void blsmskl(Register dst, Register src);
1039 void blsmskl(Register dst, Address src);
1040 void blsmskq(Register dst, Register src);
1041 void blsmskq(Register dst, Address src);
1042
1043 void blsrl(Register dst, Register src);
1044 void blsrl(Register dst, Address src);
1045 void blsrq(Register dst, Register src);
1046 void blsrq(Register dst, Address src);
1047
1048 void bsfl(Register dst, Register src);
1049 void bsrl(Register dst, Register src);
1050
1051#ifdef _LP641
1052 void bsfq(Register dst, Register src);
1053 void bsrq(Register dst, Register src);
1054#endif
1055
1056 void bswapl(Register reg);
1057
1058 void bswapq(Register reg);
1059
1060 void call(Label& L, relocInfo::relocType rtype);
1061 void call(Register reg); // push pc; pc <- reg
1062 void call(Address adr); // push pc; pc <- adr
1063
1064 void cdql();
1065
1066 void cdqq();
1067
1068 void cld();
1069
1070 void clflush(Address adr);
1071 void clflushopt(Address adr);
1072 void clwb(Address adr);
1073
1074 void cmovl(Condition cc, Register dst, Register src);
1075 void cmovl(Condition cc, Register dst, Address src);
1076
1077 void cmovq(Condition cc, Register dst, Register src);
1078 void cmovq(Condition cc, Register dst, Address src);
1079
1080
1081 void cmpb(Address dst, int imm8);
1082
1083 void cmpl(Address dst, int32_t imm32);
1084
1085 void cmp(Register dst, int32_t imm32);
1086 void cmpl(Register dst, int32_t imm32);
1087 void cmpl(Register dst, Register src);
1088 void cmpl(Register dst, Address src);
1089
1090 void cmpq(Address dst, int32_t imm32);
1091 void cmpq(Address dst, Register src);
1092
1093 void cmpq(Register dst, int32_t imm32);
1094 void cmpq(Register dst, Register src);
1095 void cmpq(Register dst, Address src);
1096
1097 // these are dummies used to catch attempting to convert NULL to Register
1098 void cmpl(Register dst, void* junk); // dummy
1099 void cmpq(Register dst, void* junk); // dummy
1100
1101 void cmpw(Address dst, int imm16);
1102
1103 void cmpxchg8 (Address adr);
1104
1105 void cmpxchgb(Register reg, Address adr);
1106 void cmpxchgl(Register reg, Address adr);
1107
1108 void cmpxchgq(Register reg, Address adr);
1109 void cmpxchgw(Register reg, Address adr);
1110
1111 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1112 void comisd(XMMRegister dst, Address src);
1113 void comisd(XMMRegister dst, XMMRegister src);
1114
1115 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1116 void comiss(XMMRegister dst, Address src);
1117 void comiss(XMMRegister dst, XMMRegister src);
1118
1119 // Identify processor type and features
1120 void cpuid();
1121
1122 // CRC32C
1123 void crc32(Register crc, Register v, int8_t sizeInBytes);
1124 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1125
1126 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1127 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1128 void cvtsd2ss(XMMRegister dst, Address src);
1129
1130 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1131 void cvtsi2sdl(XMMRegister dst, Register src);
1132 void cvtsi2sdl(XMMRegister dst, Address src);
1133 void cvtsi2sdq(XMMRegister dst, Register src);
1134 void cvtsi2sdq(XMMRegister dst, Address src);
1135
1136 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1137 void cvtsi2ssl(XMMRegister dst, Register src);
1138 void cvtsi2ssl(XMMRegister dst, Address src);
1139 void cvtsi2ssq(XMMRegister dst, Register src);
1140 void cvtsi2ssq(XMMRegister dst, Address src);
1141
1142 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1143 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1144 void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1145
1146 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1147 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1148 void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1149
1150 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1151 void cvtss2sd(XMMRegister dst, XMMRegister src);
1152 void cvtss2sd(XMMRegister dst, Address src);
1153
1154 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1155 void cvttsd2sil(Register dst, Address src);
1156 void cvttsd2sil(Register dst, XMMRegister src);
1157 void cvttsd2siq(Register dst, Address src);
1158 void cvttsd2siq(Register dst, XMMRegister src);
1159
1160 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1161 void cvttss2sil(Register dst, XMMRegister src);
1162 void cvttss2siq(Register dst, XMMRegister src);
1163
1164 // Convert vector double to int
1165 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1166
1167 // Convert vector float and double
1168 void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1169 void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1170
1171 // Convert vector float and int
1172 void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1173
1174 // Convert vector long to vector FP
1175 void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1176 void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1177
1178 // Convert vector double to long
1179 void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
1180
1181 // Evex casts with truncation
1182 void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1183 void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1184 void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1185 void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1186 void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1187 void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1188
1189 //Abs of packed Integer values
1190 void pabsb(XMMRegister dst, XMMRegister src);
1191 void pabsw(XMMRegister dst, XMMRegister src);
1192 void pabsd(XMMRegister dst, XMMRegister src);
1193 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1194 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1195 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1196 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1197
1198 // Divide Scalar Double-Precision Floating-Point Values
1199 void divsd(XMMRegister dst, Address src);
1200 void divsd(XMMRegister dst, XMMRegister src);
1201
1202 // Divide Scalar Single-Precision Floating-Point Values
1203 void divss(XMMRegister dst, Address src);
1204 void divss(XMMRegister dst, XMMRegister src);
1205
1206
1207#ifndef _LP641
1208 private:
1209
1210 void emit_farith(int b1, int b2, int i);
1211
1212 public:
1213 void emms();
1214
1215 void fabs();
1216
1217 void fadd(int i);
1218
1219 void fadd_d(Address src);
1220 void fadd_s(Address src);
1221
1222 // "Alternate" versions of x87 instructions place result down in FPU
1223 // stack instead of on TOS
1224
1225 void fadda(int i); // "alternate" fadd
1226 void faddp(int i = 1);
1227
1228 void fchs();
1229
1230 void fcom(int i);
1231
1232 void fcomp(int i = 1);
1233 void fcomp_d(Address src);
1234 void fcomp_s(Address src);
1235
1236 void fcompp();
1237
1238 void fcos();
1239
1240 void fdecstp();
1241
1242 void fdiv(int i);
1243 void fdiv_d(Address src);
1244 void fdivr_s(Address src);
1245 void fdiva(int i); // "alternate" fdiv
1246 void fdivp(int i = 1);
1247
1248 void fdivr(int i);
1249 void fdivr_d(Address src);
1250 void fdiv_s(Address src);
1251
1252 void fdivra(int i); // "alternate" reversed fdiv
1253
1254 void fdivrp(int i = 1);
1255
1256 void ffree(int i = 0);
1257
1258 void fild_d(Address adr);
1259 void fild_s(Address adr);
1260
1261 void fincstp();
1262
1263 void finit();
1264
1265 void fist_s (Address adr);
1266 void fistp_d(Address adr);
1267 void fistp_s(Address adr);
1268
1269 void fld1();
1270
1271 void fld_d(Address adr);
1272 void fld_s(Address adr);
1273 void fld_s(int index);
1274
1275 void fldcw(Address src);
1276
1277 void fldenv(Address src);
1278
1279 void fldlg2();
1280
1281 void fldln2();
1282
1283 void fldz();
1284
1285 void flog();
1286 void flog10();
1287
1288 void fmul(int i);
1289
1290 void fmul_d(Address src);
1291 void fmul_s(Address src);
1292
1293 void fmula(int i); // "alternate" fmul
1294
1295 void fmulp(int i = 1);
1296
1297 void fnsave(Address dst);
1298
1299 void fnstcw(Address src);
1300
1301 void fnstsw_ax();
1302
1303 void fprem();
1304 void fprem1();
1305
1306 void frstor(Address src);
1307
1308 void fsin();
1309
1310 void fsqrt();
1311
1312 void fst_d(Address adr);
1313 void fst_s(Address adr);
1314
1315 void fstp_d(Address adr);
1316 void fstp_d(int index);
1317 void fstp_s(Address adr);
1318
1319 void fsub(int i);
1320 void fsub_d(Address src);
1321 void fsub_s(Address src);
1322
1323 void fsuba(int i); // "alternate" fsub
1324
1325 void fsubp(int i = 1);
1326
1327 void fsubr(int i);
1328 void fsubr_d(Address src);
1329 void fsubr_s(Address src);
1330
1331 void fsubra(int i); // "alternate" reversed fsub
1332
1333 void fsubrp(int i = 1);
1334
1335 void ftan();
1336
1337 void ftst();
1338
1339 void fucomi(int i = 1);
1340 void fucomip(int i = 1);
1341
1342 void fwait();
1343
1344 void fxch(int i = 1);
1345
1346 void fyl2x();
1347 void frndint();
1348 void f2xm1();
1349 void fldl2e();
1350#endif // !_LP64
1351
1352 // operands that only take the original 32bit registers
1353 void emit_operand32(Register reg, Address adr);
1354
1355 void fld_x(Address adr); // extended-precision (80-bit) format
1356 void fstp_x(Address adr); // extended-precision (80-bit) format
1357 void fxrstor(Address src);
1358 void xrstor(Address src);
1359
1360 void fxsave(Address dst);
1361 void xsave(Address dst);
1362
1363 void hlt();
1364
1365 void idivl(Register src);
1366 void divl(Register src); // Unsigned division
1367
1368#ifdef _LP641
1369 void idivq(Register src);
1370#endif
1371
1372 void imull(Register src);
1373 void imull(Register dst, Register src);
1374 void imull(Register dst, Register src, int value);
1375 void imull(Register dst, Address src, int value);
1376 void imull(Register dst, Address src);
1377
1378#ifdef _LP641
1379 void imulq(Register dst, Register src);
1380 void imulq(Register dst, Register src, int value);
1381 void imulq(Register dst, Address src, int value);
1382 void imulq(Register dst, Address src);
1383 void imulq(Register dst);
1384#endif
1385
1386 // jcc is the generic conditional branch generator to run-
1387 // time routines, jcc is used for branches to labels. jcc
1388 // takes a branch opcode (cc) and a label (L) and generates
1389 // either a backward branch or a forward branch and links it
1390 // to the label fixup chain. Usage:
1391 //
1392 // Label L; // unbound label
1393 // jcc(cc, L); // forward branch to unbound label
1394 // bind(L); // bind label to the current pc
1395 // jcc(cc, L); // backward branch to bound label
1396 // bind(L); // illegal: a label may be bound only once
1397 //
1398 // Note: The same Label can be used for forward and backward branches
1399 // but it may be bound only once.
1400
1401 void jcc(Condition cc, Label& L, bool maybe_short = true);
1402
1403 // Conditional jump to a 8-bit offset to L.
1404 // WARNING: be very careful using this for forward jumps. If the label is
1405 // not bound within an 8-bit offset of this instruction, a run-time error
1406 // will occur.
1407
1408 // Use macro to record file and line number.
1409 #define jccb(cc, L)jccb_0(cc, L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1409)
jccb_0(cc, L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1409)
1410
1411 void jccb_0(Condition cc, Label& L, const char* file, int line);
1412
1413 void jmp(Address entry); // pc <- entry
1414
1415 // Label operations & relative jumps (PPUM Appendix D)
1416 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1417
1418 void jmp(Register entry); // pc <- entry
1419
1420 // Unconditional 8-bit offset jump to L.
1421 // WARNING: be very careful using this for forward jumps. If the label is
1422 // not bound within an 8-bit offset of this instruction, a run-time error
1423 // will occur.
1424
1425 // Use macro to record file and line number.
1426 #define jmpb(L)jmpb_0(L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1426)
jmpb_0(L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1426)
1427
1428 void jmpb_0(Label& L, const char* file, int line);
1429
1430 void ldmxcsr( Address src );
1431
1432 void leal(Register dst, Address src);
1433
1434 void leaq(Register dst, Address src);
1435
1436 void lfence();
1437
1438 void lock();
1439 void size_prefix();
1440
1441 void lzcntl(Register dst, Register src);
1442
1443#ifdef _LP641
1444 void lzcntq(Register dst, Register src);
1445#endif
1446
1447 enum Membar_mask_bits {
1448 StoreStore = 1 << 3,
1449 LoadStore = 1 << 2,
1450 StoreLoad = 1 << 1,
1451 LoadLoad = 1 << 0
1452 };
1453
1454 // Serializes memory and blows flags
1455 void membar(Membar_mask_bits order_constraint);
1456
1457 void mfence();
1458 void sfence();
1459
1460 // Moves
1461
1462 void mov64(Register dst, int64_t imm64);
1463 void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1464
1465 void movb(Address dst, Register src);
1466 void movb(Address dst, int imm8);
1467 void movb(Register dst, Address src);
1468
1469 void movddup(XMMRegister dst, XMMRegister src);
1470
1471 void kandbl(KRegister dst, KRegister src1, KRegister src2);
1472 void kandwl(KRegister dst, KRegister src1, KRegister src2);
1473 void kanddl(KRegister dst, KRegister src1, KRegister src2);
1474 void kandql(KRegister dst, KRegister src1, KRegister src2);
1475
1476 void korbl(KRegister dst, KRegister src1, KRegister src2);
1477 void korwl(KRegister dst, KRegister src1, KRegister src2);
1478 void kordl(KRegister dst, KRegister src1, KRegister src2);
1479 void korql(KRegister dst, KRegister src1, KRegister src2);
1480
1481 void kxorbl(KRegister dst, KRegister src1, KRegister src2);
1482 void kxorwl(KRegister dst, KRegister src1, KRegister src2);
1483 void kxordl(KRegister dst, KRegister src1, KRegister src2);
1484 void kxorql(KRegister dst, KRegister src1, KRegister src2);
1485 void kmovbl(KRegister dst, Register src);
1486 void kmovbl(Register dst, KRegister src);
1487 void kmovbl(KRegister dst, KRegister src);
1488 void kmovwl(KRegister dst, Register src);
1489 void kmovwl(KRegister dst, Address src);
1490 void kmovwl(Register dst, KRegister src);
1491 void kmovwl(Address dst, KRegister src);
1492 void kmovwl(KRegister dst, KRegister src);
1493 void kmovdl(KRegister dst, Register src);
1494 void kmovdl(Register dst, KRegister src);
1495 void kmovql(KRegister dst, KRegister src);
1496 void kmovql(Address dst, KRegister src);
1497 void kmovql(KRegister dst, Address src);
1498 void kmovql(KRegister dst, Register src);
1499 void kmovql(Register dst, KRegister src);
1500
1501 void knotbl(KRegister dst, KRegister src);
1502 void knotwl(KRegister dst, KRegister src);
1503 void knotdl(KRegister dst, KRegister src);
1504 void knotql(KRegister dst, KRegister src);
1505
1506 void kortestbl(KRegister dst, KRegister src);
1507 void kortestwl(KRegister dst, KRegister src);
1508 void kortestdl(KRegister dst, KRegister src);
1509 void kortestql(KRegister dst, KRegister src);
1510
1511 void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
1512 void kshiftlbl(KRegister dst, KRegister src, int imm8);
1513 void kshiftrbl(KRegister dst, KRegister src, int imm8);
1514 void kshiftrwl(KRegister dst, KRegister src, int imm8);
1515 void kshiftrdl(KRegister dst, KRegister src, int imm8);
1516 void kshiftrql(KRegister dst, KRegister src, int imm8);
1517 void ktestq(KRegister src1, KRegister src2);
1518 void ktestd(KRegister src1, KRegister src2);
1519
1520 void ktestql(KRegister dst, KRegister src);
1521 void ktestdl(KRegister dst, KRegister src);
1522 void ktestwl(KRegister dst, KRegister src);
1523 void ktestbl(KRegister dst, KRegister src);
1524
1525 void movdl(XMMRegister dst, Register src);
1526 void movdl(Register dst, XMMRegister src);
1527 void movdl(XMMRegister dst, Address src);
1528 void movdl(Address dst, XMMRegister src);
1529
1530 // Move Double Quadword
1531 void movdq(XMMRegister dst, Register src);
1532 void movdq(Register dst, XMMRegister src);
1533
1534 // Move Aligned Double Quadword
1535 void movdqa(XMMRegister dst, XMMRegister src);
1536 void movdqa(XMMRegister dst, Address src);
1537
1538 // Move Unaligned Double Quadword
1539 void movdqu(Address dst, XMMRegister src);
1540 void movdqu(XMMRegister dst, Address src);
1541 void movdqu(XMMRegister dst, XMMRegister src);
1542
1543 // Move Unaligned 256bit Vector
1544 void vmovdqu(Address dst, XMMRegister src);
1545 void vmovdqu(XMMRegister dst, Address src);
1546 void vmovdqu(XMMRegister dst, XMMRegister src);
1547
1548 // Move Unaligned 512bit Vector
1549 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1550 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1551 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1552 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1553 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1554 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1555 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1556 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1557 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1558 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1559 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1560 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1561 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1562 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1563 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1564 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1565 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1566 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1567 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1568 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1569 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1570
1571 // Move lower 64bit to high 64bit in 128bit register
1572 void movlhps(XMMRegister dst, XMMRegister src);
1573
1574 void movl(Register dst, int32_t imm32);
1575 void movl(Address dst, int32_t imm32);
1576 void movl(Register dst, Register src);
1577 void movl(Register dst, Address src);
1578 void movl(Address dst, Register src);
1579
1580 // These dummies prevent using movl from converting a zero (like NULL) into Register
1581 // by giving the compiler two choices it can't resolve
1582
1583 void movl(Address dst, void* junk);
1584 void movl(Register dst, void* junk);
1585
1586#ifdef _LP641
1587 void movq(Register dst, Register src);
1588 void movq(Register dst, Address src);
1589 void movq(Address dst, Register src);
1590 void movq(Address dst, int32_t imm32);
1591 void movq(Register dst, int32_t imm32);
1592
1593 // These dummies prevent using movq from converting a zero (like NULL) into Register
1594 // by giving the compiler two choices it can't resolve
1595
1596 void movq(Address dst, void* dummy);
1597 void movq(Register dst, void* dummy);
1598#endif
1599
1600 // Move Quadword
1601 void movq(Address dst, XMMRegister src);
1602 void movq(XMMRegister dst, Address src);
1603 void movq(XMMRegister dst, XMMRegister src);
1604 void movq(Register dst, XMMRegister src);
1605 void movq(XMMRegister dst, Register src);
1606
1607 void movsbl(Register dst, Address src);
1608 void movsbl(Register dst, Register src);
1609
1610#ifdef _LP641
1611 void movsbq(Register dst, Address src);
1612 void movsbq(Register dst, Register src);
1613
1614 // Move signed 32bit immediate to 64bit extending sign
1615 void movslq(Address dst, int32_t imm64);
1616 void movslq(Register dst, int32_t imm64);
1617
1618 void movslq(Register dst, Address src);
1619 void movslq(Register dst, Register src);
1620 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1621#endif
1622
1623 void movswl(Register dst, Address src);
1624 void movswl(Register dst, Register src);
1625
1626#ifdef _LP641
1627 void movswq(Register dst, Address src);
1628 void movswq(Register dst, Register src);
1629#endif
1630
1631 void movw(Address dst, int imm16);
1632 void movw(Register dst, Address src);
1633 void movw(Address dst, Register src);
1634
1635 void movzbl(Register dst, Address src);
1636 void movzbl(Register dst, Register src);
1637
1638#ifdef _LP641
1639 void movzbq(Register dst, Address src);
1640 void movzbq(Register dst, Register src);
1641#endif
1642
1643 void movzwl(Register dst, Address src);
1644 void movzwl(Register dst, Register src);
1645
1646#ifdef _LP641
1647 void movzwq(Register dst, Address src);
1648 void movzwq(Register dst, Register src);
1649#endif
1650
1651 // Unsigned multiply with RAX destination register
1652 void mull(Address src);
1653 void mull(Register src);
1654
1655#ifdef _LP641
1656 void mulq(Address src);
1657 void mulq(Register src);
1658 void mulxq(Register dst1, Register dst2, Register src);
1659#endif
1660
1661 // Multiply Scalar Double-Precision Floating-Point Values
1662 void mulsd(XMMRegister dst, Address src);
1663 void mulsd(XMMRegister dst, XMMRegister src);
1664
1665 // Multiply Scalar Single-Precision Floating-Point Values
1666 void mulss(XMMRegister dst, Address src);
1667 void mulss(XMMRegister dst, XMMRegister src);
1668
1669 void negl(Register dst);
1670 void negl(Address dst);
1671
1672#ifdef _LP641
1673 void negq(Register dst);
1674 void negq(Address dst);
1675#endif
1676
1677 void nop(int i = 1);
1678
1679 void notl(Register dst);
1680
1681#ifdef _LP641
1682 void notq(Register dst);
1683
1684 void btsq(Address dst, int imm8);
1685 void btrq(Address dst, int imm8);
1686#endif
1687
1688 void orw(Register dst, Register src);
1689
1690 void orl(Address dst, int32_t imm32);
1691 void orl(Register dst, int32_t imm32);
1692 void orl(Register dst, Address src);
1693 void orl(Register dst, Register src);
1694 void orl(Address dst, Register src);
1695
1696 void orb(Address dst, int imm8);
1697 void orb(Address dst, Register src);
1698
1699 void orq(Address dst, int32_t imm32);
1700 void orq(Address dst, Register src);
1701 void orq(Register dst, int32_t imm32);
1702 void orq(Register dst, Address src);
1703 void orq(Register dst, Register src);
1704
1705 // Pack with signed saturation
1706 void packsswb(XMMRegister dst, XMMRegister src);
1707 void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1708 void packssdw(XMMRegister dst, XMMRegister src);
1709 void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1710
1711 // Pack with unsigned saturation
1712 void packuswb(XMMRegister dst, XMMRegister src);
1713 void packuswb(XMMRegister dst, Address src);
1714 void packusdw(XMMRegister dst, XMMRegister src);
1715 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1716 void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1717
1718 // Permutations
1719 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1720 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1721 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1722 void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1723 void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1724 void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1725 void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1726 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1727 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1728 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1729 void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1730 void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1731 void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1732 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1733 void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1734 void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);
1735
1736 void pause();
1737
1738 // Undefined Instruction
1739 void ud2();
1740
1741 // SSE4.2 string instructions
1742 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1743 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1744
1745 void pcmpeqb(XMMRegister dst, XMMRegister src);
1746 void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1747
1748 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1749 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1750 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1751 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1752
1753 void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1754 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1755 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1756
1757 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1758 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1759
1760 void pcmpeqw(XMMRegister dst, XMMRegister src);
1761 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1762 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1763 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1764
1765 void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1766
1767 void pcmpeqd(XMMRegister dst, XMMRegister src);
1768 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1769 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1770 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1771
1772 void pcmpeqq(XMMRegister dst, XMMRegister src);
1773 void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1774 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1775 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1776 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1777
1778 void pcmpgtq(XMMRegister dst, XMMRegister src);
1779 void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1780
1781 void pmovmskb(Register dst, XMMRegister src);
1782 void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1783 void vmovmskps(Register dst, XMMRegister src, int vec_enc);
1784 void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
1785 void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1786
1787 // SSE 4.1 extract
1788 void pextrd(Register dst, XMMRegister src, int imm8);
1789 void pextrq(Register dst, XMMRegister src, int imm8);
1790 void pextrd(Address dst, XMMRegister src, int imm8);
1791 void pextrq(Address dst, XMMRegister src, int imm8);
1792 void pextrb(Register dst, XMMRegister src, int imm8);
1793 void pextrb(Address dst, XMMRegister src, int imm8);
1794 // SSE 2 extract
1795 void pextrw(Register dst, XMMRegister src, int imm8);
1796 void pextrw(Address dst, XMMRegister src, int imm8);
1797
1798 // SSE 4.1 insert
1799 void pinsrd(XMMRegister dst, Register src, int imm8);
1800 void pinsrq(XMMRegister dst, Register src, int imm8);
1801 void pinsrb(XMMRegister dst, Register src, int imm8);
1802 void pinsrd(XMMRegister dst, Address src, int imm8);
1803 void pinsrq(XMMRegister dst, Address src, int imm8);
1804 void pinsrb(XMMRegister dst, Address src, int imm8);
1805 void insertps(XMMRegister dst, XMMRegister src, int imm8);
1806 // SSE 2 insert
1807 void pinsrw(XMMRegister dst, Register src, int imm8);
1808 void pinsrw(XMMRegister dst, Address src, int imm8);
1809
1810 // AVX insert
1811 void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1812 void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1813 void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1814 void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1815 void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1816
1817 // Zero extend moves
1818 void pmovzxbw(XMMRegister dst, XMMRegister src);
1819 void pmovzxbw(XMMRegister dst, Address src);
1820 void pmovzxbd(XMMRegister dst, XMMRegister src);
1821 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1822 void pmovzxdq(XMMRegister dst, XMMRegister src);
1823 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1824 void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1825 void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1826 void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1827 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1828
1829 // Sign extend moves
1830 void pmovsxbd(XMMRegister dst, XMMRegister src);
1831 void pmovsxbq(XMMRegister dst, XMMRegister src);
1832 void pmovsxbw(XMMRegister dst, XMMRegister src);
1833 void pmovsxwd(XMMRegister dst, XMMRegister src);
1834 void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1835 void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1836 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1837 void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1838 void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1839 void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1840
1841 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1842 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1843
1844 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1845
1846 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1847
1848 // Multiply add
1849 void pmaddwd(XMMRegister dst, XMMRegister src);
1850 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1851 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1852
1853 // Multiply add accumulate
1854 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1855
1856#ifndef _LP641 // no 32bit push/pop on amd64
1857 void popl(Address dst);
1858#endif
1859
1860#ifdef _LP641
1861 void popq(Address dst);
1862 void popq(Register dst);
1863#endif
1864
1865 void popcntl(Register dst, Address src);
1866 void popcntl(Register dst, Register src);
1867
1868 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1869
1870#ifdef _LP641
1871 void popcntq(Register dst, Address src);
1872 void popcntq(Register dst, Register src);
1873#endif
1874
1875 // Prefetches (SSE, SSE2, 3DNOW only)
1876
1877 void prefetchnta(Address src);
1878 void prefetchr(Address src);
1879 void prefetcht0(Address src);
1880 void prefetcht1(Address src);
1881 void prefetcht2(Address src);
1882 void prefetchw(Address src);
1883
1884 // Shuffle Bytes
1885 void pshufb(XMMRegister dst, XMMRegister src);
1886 void pshufb(XMMRegister dst, Address src);
1887 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1888
1889 // Shuffle Packed Doublewords
1890 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1891 void pshufd(XMMRegister dst, Address src, int mode);
1892 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1893
1894 // Shuffle Packed High/Low Words
1895 void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1896 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1897 void pshuflw(XMMRegister dst, Address src, int mode);
1898
1899 //shuffle floats and doubles
1900 void pshufps(XMMRegister, XMMRegister, int);
1901 void pshufpd(XMMRegister, XMMRegister, int);
1902 void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1903 void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1904
1905 // Shuffle packed values at 128 bit granularity
1906 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1907
1908 // Shift Right by bytes Logical DoubleQuadword Immediate
1909 void psrldq(XMMRegister dst, int shift);
1910 // Shift Left by bytes Logical DoubleQuadword Immediate
1911 void pslldq(XMMRegister dst, int shift);
1912
1913 // Logical Compare 128bit
1914 void ptest(XMMRegister dst, XMMRegister src);
1915 void ptest(XMMRegister dst, Address src);
1916 // Logical Compare 256bit
1917 void vptest(XMMRegister dst, XMMRegister src);
1918 void vptest(XMMRegister dst, Address src);
1919
1920 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1921
1922 // Vector compare
1923 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1924
1925 // Interleave Low Bytes
1926 void punpcklbw(XMMRegister dst, XMMRegister src);
1927 void punpcklbw(XMMRegister dst, Address src);
1928
1929 // Interleave Low Doublewords
1930 void punpckldq(XMMRegister dst, XMMRegister src);
1931 void punpckldq(XMMRegister dst, Address src);
1932
1933 // Interleave Low Quadwords
1934 void punpcklqdq(XMMRegister dst, XMMRegister src);
1935
1936#ifndef _LP641 // no 32bit push/pop on amd64
1937 void pushl(Address src);
1938#endif
1939
1940 void pushq(Address src);
1941
1942 void rcll(Register dst, int imm8);
1943
1944 void rclq(Register dst, int imm8);
1945
1946 void rcrq(Register dst, int imm8);
1947
1948 void rcpps(XMMRegister dst, XMMRegister src);
1949
1950 void rcpss(XMMRegister dst, XMMRegister src);
1951
1952 void rdtsc();
1953
1954 void ret(int imm16);
1955
1956 void roll(Register dst);
1957
1958 void roll(Register dst, int imm8);
1959
1960 void rorl(Register dst);
1961
1962 void rorl(Register dst, int imm8);
1963
1964#ifdef _LP641
1965 void rolq(Register dst);
1966 void rolq(Register dst, int imm8);
1967 void rorq(Register dst);
1968 void rorq(Register dst, int imm8);
1969 void rorxq(Register dst, Register src, int imm8);
1970 void rorxd(Register dst, Register src, int imm8);
1971#endif
1972
1973 void sahf();
1974
1975 void sall(Register dst, int imm8);
1976 void sall(Register dst);
1977 void sall(Address dst, int imm8);
1978 void sall(Address dst);
1979
1980 void sarl(Address dst, int imm8);
1981 void sarl(Address dst);
1982 void sarl(Register dst, int imm8);
1983 void sarl(Register dst);
1984
1985#ifdef _LP641
1986 void salq(Register dst, int imm8);
1987 void salq(Register dst);
1988 void salq(Address dst, int imm8);
1989 void salq(Address dst);
1990
1991 void sarq(Address dst, int imm8);
1992 void sarq(Address dst);
1993 void sarq(Register dst, int imm8);
1994 void sarq(Register dst);
1995#endif
1996
1997 void sbbl(Address dst, int32_t imm32);
1998 void sbbl(Register dst, int32_t imm32);
1999 void sbbl(Register dst, Address src);
2000 void sbbl(Register dst, Register src);
2001
2002 void sbbq(Address dst, int32_t imm32);
2003 void sbbq(Register dst, int32_t imm32);
2004 void sbbq(Register dst, Address src);
2005 void sbbq(Register dst, Register src);
2006
2007 void setb(Condition cc, Register dst);
2008
2009 void sete(Register dst);
2010 void setl(Register dst);
2011 void setne(Register dst);
2012
2013 void palignr(XMMRegister dst, XMMRegister src, int imm8);
2014 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2015 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2016
2017 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
2018 void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2019
2020 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
2021 void sha1nexte(XMMRegister dst, XMMRegister src);
2022 void sha1msg1(XMMRegister dst, XMMRegister src);
2023 void sha1msg2(XMMRegister dst, XMMRegister src);
2024 // xmm0 is implicit additional source to the following instruction.
2025 void sha256rnds2(XMMRegister dst, XMMRegister src);
2026 void sha256msg1(XMMRegister dst, XMMRegister src);
2027 void sha256msg2(XMMRegister dst, XMMRegister src);
2028
2029 void shldl(Register dst, Register src);
2030 void shldl(Register dst, Register src, int8_t imm8);
2031 void shrdl(Register dst, Register src);
2032 void shrdl(Register dst, Register src, int8_t imm8);
2033
2034 void shll(Register dst, int imm8);
2035 void shll(Register dst);
2036
2037 void shlq(Register dst, int imm8);
2038 void shlq(Register dst);
2039
2040 void shrl(Register dst, int imm8);
2041 void shrl(Register dst);
2042 void shrl(Address dst);
2043 void shrl(Address dst, int imm8);
2044
2045 void shrq(Register dst, int imm8);
2046 void shrq(Register dst);
2047 void shrq(Address dst);
2048 void shrq(Address dst, int imm8);
2049
2050 void smovl(); // QQQ generic?
2051
2052 // Compute Square Root of Scalar Double-Precision Floating-Point Value
2053 void sqrtsd(XMMRegister dst, Address src);
2054 void sqrtsd(XMMRegister dst, XMMRegister src);
2055
2056 void roundsd(XMMRegister dst, Address src, int32_t rmode);
2057 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2058
2059 // Compute Square Root of Scalar Single-Precision Floating-Point Value
2060 void sqrtss(XMMRegister dst, Address src);
2061 void sqrtss(XMMRegister dst, XMMRegister src);
2062
2063 void std();
2064
2065 void stmxcsr( Address dst );
2066
2067 void subl(Address dst, int32_t imm32);
2068 void subl(Address dst, Register src);
2069 void subl(Register dst, int32_t imm32);
2070 void subl(Register dst, Address src);
2071 void subl(Register dst, Register src);
2072
2073 void subq(Address dst, int32_t imm32);
2074 void subq(Address dst, Register src);
2075 void subq(Register dst, int32_t imm32);
2076 void subq(Register dst, Address src);
2077 void subq(Register dst, Register src);
2078
2079 // Force generation of a 4 byte immediate value even if it fits into 8bit
2080 void subl_imm32(Register dst, int32_t imm32);
2081 void subq_imm32(Register dst, int32_t imm32);
2082
2083 // Subtract Scalar Double-Precision Floating-Point Values
2084 void subsd(XMMRegister dst, Address src);
2085 void subsd(XMMRegister dst, XMMRegister src);
2086
2087 // Subtract Scalar Single-Precision Floating-Point Values
2088 void subss(XMMRegister dst, Address src);
2089 void subss(XMMRegister dst, XMMRegister src);
2090
2091 void testb(Register dst, int imm8);
2092 void testb(Address dst, int imm8);
2093
2094 void testl(Register dst, int32_t imm32);
2095 void testl(Register dst, Register src);
2096 void testl(Register dst, Address src);
2097
2098 void testq(Address dst, int32_t imm32);
2099 void testq(Register dst, int32_t imm32);
2100 void testq(Register dst, Register src);
2101 void testq(Register dst, Address src);
2102
2103 // BMI - count trailing zeros
2104 void tzcntl(Register dst, Register src);
2105 void tzcntq(Register dst, Register src);
2106
2107 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2108 void ucomisd(XMMRegister dst, Address src);
2109 void ucomisd(XMMRegister dst, XMMRegister src);
2110
2111 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2112 void ucomiss(XMMRegister dst, Address src);
2113 void ucomiss(XMMRegister dst, XMMRegister src);
2114
2115 void xabort(int8_t imm8);
2116
2117 void xaddb(Address dst, Register src);
2118 void xaddw(Address dst, Register src);
2119 void xaddl(Address dst, Register src);
2120 void xaddq(Address dst, Register src);
2121
2122 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2123
2124 void xchgb(Register reg, Address adr);
2125 void xchgw(Register reg, Address adr);
2126 void xchgl(Register reg, Address adr);
2127 void xchgl(Register dst, Register src);
2128
2129 void xchgq(Register reg, Address adr);
2130 void xchgq(Register dst, Register src);
2131
2132 void xend();
2133
2134 // Get Value of Extended Control Register
2135 void xgetbv();
2136
2137 void xorl(Register dst, int32_t imm32);
2138 void xorl(Address dst, int32_t imm32);
2139 void xorl(Register dst, Address src);
2140 void xorl(Register dst, Register src);
2141 void xorl(Address dst, Register src);
2142
2143 void xorb(Address dst, Register src);
2144 void xorb(Register dst, Address src);
2145 void xorw(Register dst, Register src);
2146
2147 void xorq(Register dst, Address src);
2148 void xorq(Address dst, int32_t imm32);
2149 void xorq(Register dst, Register src);
2150 void xorq(Register dst, int32_t imm32);
2151 void xorq(Address dst, Register src);
2152
2153 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2154
2155 // AVX 3-operands scalar instructions (encoded with VEX prefix)
2156
2157 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2158 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2159 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2160 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2161 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2162 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2163 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2164 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2165 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2166 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2167 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2168 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2169 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2170 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2171 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2172 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2173 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2174 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2175
2176 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2177 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2178 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2179 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2180
2181 void shlxl(Register dst, Register src1, Register src2);
2182 void shlxq(Register dst, Register src1, Register src2);
2183 void shrxl(Register dst, Register src1, Register src2);
2184 void shrxq(Register dst, Register src1, Register src2);
2185
2186 void bzhiq(Register dst, Register src1, Register src2);
2187 void pdep(Register dst, Register src1, Register src2);
2188 void pext(Register dst, Register src1, Register src2);
2189
2190
2191 //====================VECTOR ARITHMETIC=====================================
2192 // Add Packed Floating-Point Values
2193 void addpd(XMMRegister dst, XMMRegister src);
2194 void addpd(XMMRegister dst, Address src);
2195 void addps(XMMRegister dst, XMMRegister src);
2196 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2197 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2198 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2199 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2200
2201 // Subtract Packed Floating-Point Values
2202 void subpd(XMMRegister dst, XMMRegister src);
2203 void subps(XMMRegister dst, XMMRegister src);
2204 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2205 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2206 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2207 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2208
2209 // Multiply Packed Floating-Point Values
2210 void mulpd(XMMRegister dst, XMMRegister src);
2211 void mulpd(XMMRegister dst, Address src);
2212 void mulps(XMMRegister dst, XMMRegister src);
2213 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2214 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2215 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2216 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2217
2218 void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219 void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2220 void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221 void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2222
2223 // Divide Packed Floating-Point Values
2224 void divpd(XMMRegister dst, XMMRegister src);
2225 void divps(XMMRegister dst, XMMRegister src);
2226 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2227 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2228 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2229 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2230
2231 // Sqrt Packed Floating-Point Values
2232 void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2233 void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2234 void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2235 void vsqrtps(XMMRegister dst, Address src, int vector_len);
2236
2237 // Round Packed Double precision value.
2238 void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2239 void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2240 void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2241 void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2242
2243 // Bitwise Logical AND of Packed Floating-Point Values
2244 void andpd(XMMRegister dst, XMMRegister src);
2245 void andps(XMMRegister dst, XMMRegister src);
2246 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2247 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2248 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2249 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2250
2251 void unpckhpd(XMMRegister dst, XMMRegister src);
2252 void unpcklpd(XMMRegister dst, XMMRegister src);
2253
2254 // Bitwise Logical XOR of Packed Floating-Point Values
2255 void xorpd(XMMRegister dst, XMMRegister src);
2256 void xorps(XMMRegister dst, XMMRegister src);
2257 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2258 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2259 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2260 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2261
2262 // Add horizontal packed integers
2263 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2264 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2265 void phaddw(XMMRegister dst, XMMRegister src);
2266 void phaddd(XMMRegister dst, XMMRegister src);
2267
2268 // Add packed integers
2269 void paddb(XMMRegister dst, XMMRegister src);
2270 void paddw(XMMRegister dst, XMMRegister src);
2271 void paddd(XMMRegister dst, XMMRegister src);
2272 void paddd(XMMRegister dst, Address src);
2273 void paddq(XMMRegister dst, XMMRegister src);
2274 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2275 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2277 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2278 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2279 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2280 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2281 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2282
2283 // Leaf level assembler routines for masked operations.
2284 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2285 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2286 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2287 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2288 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2289 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2290 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2291 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2292 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2293 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2294 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2295 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2296 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2297 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2298 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2299 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2300 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2301 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2302 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2303 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2304 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2305 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2306 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2307 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2308 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2309 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2310 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2311 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2312 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2313 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2314 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2315 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2316 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2317 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2318 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2319 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2320 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2321 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2322 void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2323 void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2324 void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2325 void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2326 void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2327 void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2328 void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2329 void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2330 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2331 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2332 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2333 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2334 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2335 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2336 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2337 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2338 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2339 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2340 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2341 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2342 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2343 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2344 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2345 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2346 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2347 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2348 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2349 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2350 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2351 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2352 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2353 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2354 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2355
2356 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2357 void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2358 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2359 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2360 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2361 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2362 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2363 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2364 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2365
2366 void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2367 void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2368 void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2369 void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2370 void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2371 void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2372 void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2373 void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2374 void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2375 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2376 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2377 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2378 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2379 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2380 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2381 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2382 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2383 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2384 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2385 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2386 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2387 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2388 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2389 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2390 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2391 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2392 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2393 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2394 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2395 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2396 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2397 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2398 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2399 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2400 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2401 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2402 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2403
2404 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2405 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2406 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2407 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2408 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2409 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2410 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2411 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2412
2413 // Sub packed integers
2414 void psubb(XMMRegister dst, XMMRegister src);
2415 void psubw(XMMRegister dst, XMMRegister src);
2416 void psubd(XMMRegister dst, XMMRegister src);
2417 void psubq(XMMRegister dst, XMMRegister src);
2418 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2419 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2420 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2421 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2422 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2423 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2424 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2425 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2426 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2427
2428 // Multiply packed integers (only shorts and ints)
2429 void pmullw(XMMRegister dst, XMMRegister src);
2430 void pmulld(XMMRegister dst, XMMRegister src);
2431 void pmuludq(XMMRegister dst, XMMRegister src);
2432 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2433 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2434 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2435 void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2436 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2437 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2438 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2439 void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2440
2441 // Minimum of packed integers
2442 void pminsb(XMMRegister dst, XMMRegister src);
2443 void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2444 void pminsw(XMMRegister dst, XMMRegister src);
2445 void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2446 void pminsd(XMMRegister dst, XMMRegister src);
2447 void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2448 void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2449 void minps(XMMRegister dst, XMMRegister src);
2450 void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2451 void minpd(XMMRegister dst, XMMRegister src);
2452 void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2453
2454 // Maximum of packed integers
2455 void pmaxsb(XMMRegister dst, XMMRegister src);
2456 void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2457 void pmaxsw(XMMRegister dst, XMMRegister src);
2458 void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2459 void pmaxsd(XMMRegister dst, XMMRegister src);
2460 void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2461 void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2462 void maxps(XMMRegister dst, XMMRegister src);
2463 void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2464 void maxpd(XMMRegister dst, XMMRegister src);
2465 void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2466
2467 // Shift left packed integers
2468 void psllw(XMMRegister dst, int shift);
2469 void pslld(XMMRegister dst, int shift);
2470 void psllq(XMMRegister dst, int shift);
2471 void psllw(XMMRegister dst, XMMRegister shift);
2472 void pslld(XMMRegister dst, XMMRegister shift);
2473 void psllq(XMMRegister dst, XMMRegister shift);
2474 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2475 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2476 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2477 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2478 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2479 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2480 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2481
2482 // Logical shift right packed integers
2483 void psrlw(XMMRegister dst, int shift);
2484 void psrld(XMMRegister dst, int shift);
2485 void psrlq(XMMRegister dst, int shift);
2486 void psrlw(XMMRegister dst, XMMRegister shift);
2487 void psrld(XMMRegister dst, XMMRegister shift);
2488 void psrlq(XMMRegister dst, XMMRegister shift);
2489 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2490 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2491 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2492 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2493 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2494 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2495 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2496 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2497 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2498
2499 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2500 void psraw(XMMRegister dst, int shift);
2501 void psrad(XMMRegister dst, int shift);
2502 void psraw(XMMRegister dst, XMMRegister shift);
2503 void psrad(XMMRegister dst, XMMRegister shift);
2504 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2505 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2506 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2507 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2508 void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2509 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2510 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2511
2512 // Variable shift left packed integers
2513 void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2514 void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2515
2516 // Variable shift right packed integers
2517 void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2518 void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2519
2520 // Variable shift right arithmetic packed integers
2521 void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2522 void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2523
2524 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2525 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2526
2527 // And packed integers
2528 void pand(XMMRegister dst, XMMRegister src);
2529 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2530 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2531 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2532
2533 // Andn packed integers
2534 void pandn(XMMRegister dst, XMMRegister src);
2535 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2536
2537 // Or packed integers
2538 void por(XMMRegister dst, XMMRegister src);
2539 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2540 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2541 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2542
2543 // Xor packed integers
2544 void pxor(XMMRegister dst, XMMRegister src);
2545 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2546 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2547 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2548 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2549 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2550
2551 // Ternary logic instruction.
2552 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2553 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2554 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2555
2556 // Vector Rotate Left/Right instruction.
2557 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2558 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2559 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2560 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2561 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2562 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2563 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2564 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2565
2566 // vinserti forms
2567 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2568 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2569 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2570 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2571 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2572
2573 // vinsertf forms
2574 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2575 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2576 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2577 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2578 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2579 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2580
2581 // vextracti forms
2582 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2583 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2584 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2585 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2586 void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2587 void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2588 void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2589
2590 // vextractf forms
2591 void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2592 void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2593 void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2594 void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2595 void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2596 void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2597 void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2598
2599 // xmm/mem sourced byte/word/dword/qword replicate
2600 void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2601 void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2602 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2603 void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2604 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2605 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2606 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2607 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2608
2609 void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
2610 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2611 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2612
2613 // scalar single/double/128bit precision replicate
2614 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2615 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2616 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2617 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2618 void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
2619
2620 // gpr sourced byte/word/dword/qword replicate
2621 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2622 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2623 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2624 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2625
2626 // Gather AVX2 and AVX3
2627 void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2628 void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2629 void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2630 void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2631 void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2632 void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2633 void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2634 void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2635
2636 //Scatter AVX3 only
2637 void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2638 void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2639 void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2640 void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2641
2642 // Carry-Less Multiplication Quadword
2643 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2644 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2645 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2646 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2647 // to avoid transaction penalty between AVX and SSE states. There is no
2648 // penalty if legacy SSE instructions are encoded using VEX prefix because
2649 // they always clear upper 128 bits. It should be used before calling
2650 // runtime code and native libraries.
2651 void vzeroupper();
2652
2653 // Vector double compares
2654 void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2655 void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2656 ComparisonPredicateFP comparison, int vector_len);
2657
2658 // Vector float compares
2659 void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2660 void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2661 ComparisonPredicateFP comparison, int vector_len);
2662
2663 // Vector integer compares
2664 void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2665 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2666 int comparison, bool is_signed, int vector_len);
2667 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2668 int comparison, bool is_signed, int vector_len);
2669
2670 // Vector long compares
2671 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2672 int comparison, bool is_signed, int vector_len);
2673 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2674 int comparison, bool is_signed, int vector_len);
2675
2676 // Vector byte compares
2677 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2678 int comparison, bool is_signed, int vector_len);
2679 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2680 int comparison, bool is_signed, int vector_len);
2681
2682 // Vector short compares
2683 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2684 int comparison, bool is_signed, int vector_len);
2685 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2686 int comparison, bool is_signed, int vector_len);
2687
2688 void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
2689 void evpmovw2m(KRegister dst, XMMRegister src, int vector_len);
2690 void evpmovd2m(KRegister dst, XMMRegister src, int vector_len);
2691 void evpmovq2m(KRegister dst, XMMRegister src, int vector_len);
2692 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2693 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2694 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2695 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2696
2697 // Vector blends
2698 void blendvps(XMMRegister dst, XMMRegister src);
2699 void blendvpd(XMMRegister dst, XMMRegister src);
2700 void pblendvb(XMMRegister dst, XMMRegister src);
2701 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2702 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2703 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2704 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2705 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2706 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2707 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2708 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2709 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2710 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2711 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2712 protected:
2713 // Next instructions require address alignment 16 bytes SSE mode.
2714 // They should be called only from corresponding MacroAssembler instructions.
2715 void andpd(XMMRegister dst, Address src);
2716 void andps(XMMRegister dst, Address src);
2717 void xorpd(XMMRegister dst, Address src);
2718 void xorps(XMMRegister dst, Address src);
2719
2720};
2721
2722// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2723// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2724// are applied.
2725class InstructionAttr {
2726public:
2727 InstructionAttr(
2728 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2729 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2730 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2731 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2732 bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
2733 :
2734 _rex_vex_w(rex_vex_w),
2735 _legacy_mode(legacy_mode || UseAVX < 3),
2736 _no_reg_mask(no_reg_mask),
2737 _uses_vl(uses_vl),
2738 _rex_vex_w_reverted(false),
2739 _is_evex_instruction(false),
2740 _is_clear_context(true),
2741 _is_extended_context(false),
2742 _avx_vector_len(vector_len),
2743 _tuple_type(Assembler::EVEX_ETUP),
2744 _input_size_in_bits(Assembler::EVEX_NObit),
2745 _evex_encoding(0),
2746 _embedded_opmask_register_specifier(0), // hard code k0
2747 _current_assembler(NULL__null) { }
2748
2749 ~InstructionAttr() {
2750 if (_current_assembler != NULL__null) {
2751 _current_assembler->clear_attributes();
2752 }
2753 _current_assembler = NULL__null;
2754 }
2755
2756private:
2757 bool _rex_vex_w;
2758 bool _legacy_mode;
2759 bool _no_reg_mask;
2760 bool _uses_vl;
2761 bool _rex_vex_w_reverted;
2762 bool _is_evex_instruction;
2763 bool _is_clear_context;
2764 bool _is_extended_context;
2765 int _avx_vector_len;
2766 int _tuple_type;
2767 int _input_size_in_bits;
2768 int _evex_encoding;
2769 int _embedded_opmask_register_specifier;
2770
2771 Assembler *_current_assembler;
2772
2773public:
2774 // query functions for field accessors
2775 bool is_rex_vex_w(void) const { return _rex_vex_w; }
2776 bool is_legacy_mode(void) const { return _legacy_mode; }
2777 bool is_no_reg_mask(void) const { return _no_reg_mask; }
2778 bool uses_vl(void) const { return _uses_vl; }
2779 bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2780 bool is_evex_instruction(void) const { return _is_evex_instruction; }
2781 bool is_clear_context(void) const { return _is_clear_context; }
2782 bool is_extended_context(void) const { return _is_extended_context; }
2783 int get_vector_len(void) const { return _avx_vector_len; }
2784 int get_tuple_type(void) const { return _tuple_type; }
2785 int get_input_size(void) const { return _input_size_in_bits; }
2786 int get_evex_encoding(void) const { return _evex_encoding; }
2787 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2788
2789 // Set the vector len manually
2790 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2791
2792 // Set revert rex_vex_w for avx encoding
2793 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2794
2795 // Set rex_vex_w based on state
2796 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2797
2798 // Set the instruction to be encoded in AVX mode
2799 void set_is_legacy_mode(void) { _legacy_mode = true; }
2800
2801 // Set the current instuction to be encoded as an EVEX instuction
2802 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2803
2804 // Internal encoding data used in compressed immediate offset programming
2805 void set_evex_encoding(int value) { _evex_encoding = value; }
2806
2807 // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2808 // This method unsets it so that merge semantics are used instead.
2809 void reset_is_clear_context(void) { _is_clear_context = false; }
2810
2811 // Map back to current asembler so that we can manage object level assocation
2812 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2813
2814 // Address modifiers used for compressed displacement calculation
2815 void set_address_attributes(int tuple_type, int input_size_in_bits);
2816
2817 // Set embedded opmask register specifier.
2818 void set_embedded_opmask_register_specifier(KRegister mask) {
2819 _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2820 }
2821
2822};
2823
2824#endif // CPU_X86_ASSEMBLER_X86_HPP