Bug Summary

File:jdk/src/hotspot/cpu/x86/assembler_x86.hpp
Warning:line 233, column 5
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name c2_MacroAssembler_x86.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

1/*
2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "oops/methodData.hpp"
29#include "opto/c2_MacroAssembler.hpp"
30#include "opto/intrinsicnode.hpp"
31#include "opto/opcodes.hpp"
32#include "opto/subnode.hpp"
33#include "runtime/objectMonitor.hpp"
34#include "runtime/stubRoutines.hpp"
35
36inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
37 switch (vlen_in_bytes) {
38 case 4: // fall-through
39 case 8: // fall-through
40 case 16: return Assembler::AVX_128bit;
41 case 32: return Assembler::AVX_256bit;
42 case 64: return Assembler::AVX_512bit;
43
44 default: {
45 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 45); ::breakpoint(); } while (0)
;
46 return Assembler::AVX_NoVec;
47 }
48 }
49}
50
51void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
52 guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 52, "guarantee(" "PostLoopMultiversioning" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
53 Assembler::movl(dst, 1);
54 Assembler::shlxl(dst, dst, src);
55 Assembler::decl(dst);
56 Assembler::kmovdl(mask, dst);
57 Assembler::movl(dst, src);
58}
59
60void C2_MacroAssembler::restorevectmask(KRegister mask) {
61 guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 61, "guarantee(" "PostLoopMultiversioning" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
62 Assembler::knotwl(mask, k0);
63}
64
65#if INCLUDE_RTM_OPT1
66
67// Update rtm_counters based on abort status
68// input: abort_status
69// rtm_counters (RTMLockingCounters*)
70// flags are killed
71void C2_MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
72
73 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
74 if (PrintPreciseRTMLockingStatistics) {
75 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
76 Label check_abort;
77 testl(abort_status, (1<<i));
78 jccb(Assembler::equal, check_abort)jccb_0(Assembler::equal, check_abort, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 78)
;
79 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
80 bind(check_abort);
81 }
82 }
83}
84
85// Branch if (random & (count-1) != 0), count is 2^n
86// tmp, scr and flags are killed
87void C2_MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
88 assert(tmp == rax, "")do { if (!(tmp == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 88, "assert(" "tmp == rax" ") failed", ""); ::breakpoint();
} } while (0)
;
89 assert(scr == rdx, "")do { if (!(scr == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 89, "assert(" "scr == rdx" ") failed", ""); ::breakpoint();
} } while (0)
;
90 rdtsc(); // modifies EDX:EAX
91 andptr(tmp, count-1);
92 jccb(Assembler::notZero, brLabel)jccb_0(Assembler::notZero, brLabel, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 92)
;
93}
94
95// Perform abort ratio calculation, set no_rtm bit if high ratio
96// input: rtm_counters_Reg (RTMLockingCounters* address)
97// tmpReg, rtm_counters_Reg and flags are killed
98void C2_MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
99 Register rtm_counters_Reg,
100 RTMLockingCounters* rtm_counters,
101 Metadata* method_data) {
102 Label L_done, L_check_always_rtm1, L_check_always_rtm2;
103
104 if (RTMLockingCalculationDelay > 0) {
105 // Delay calculation
106 movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
107 testptr(tmpReg, tmpReg);
108 jccb(Assembler::equal, L_done)jccb_0(Assembler::equal, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 108)
;
109 }
110 // Abort ratio calculation only if abort_count > RTMAbortThreshold
111 // Aborted transactions = abort_count * 100
112 // All transactions = total_count * RTMTotalCountIncrRate
113 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
114
115 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
116 cmpptr(tmpReg, RTMAbortThreshold);
117 jccb(Assembler::below, L_check_always_rtm2)jccb_0(Assembler::below, L_check_always_rtm2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 117)
;
118 imulptr(tmpReg, tmpReg, 100);
119
120 Register scrReg = rtm_counters_Reg;
121 movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
122 imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
123 imulptr(scrReg, scrReg, RTMAbortRatio);
124 cmpptr(tmpReg, scrReg);
125 jccb(Assembler::below, L_check_always_rtm1)jccb_0(Assembler::below, L_check_always_rtm1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 125)
;
126 if (method_data != NULL__null) {
127 // set rtm_state to "no rtm" in MDO
128 mov_metadata(tmpReg, method_data);
129 lock();
130 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
131 }
132 jmpb(L_done)jmpb_0(L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 132)
;
133 bind(L_check_always_rtm1);
134 // Reload RTMLockingCounters* address
135 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
136 bind(L_check_always_rtm2);
137 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
138 cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
139 jccb(Assembler::below, L_done)jccb_0(Assembler::below, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 139)
;
140 if (method_data != NULL__null) {
141 // set rtm_state to "always rtm" in MDO
142 mov_metadata(tmpReg, method_data);
143 lock();
144 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
145 }
146 bind(L_done);
147}
148
149// Update counters and perform abort ratio calculation
150// input: abort_status_Reg
151// rtm_counters_Reg, flags are killed
152void C2_MacroAssembler::rtm_profiling(Register abort_status_Reg,
153 Register rtm_counters_Reg,
154 RTMLockingCounters* rtm_counters,
155 Metadata* method_data,
156 bool profile_rtm) {
157
158 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 158, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
159 // update rtm counters based on rax value at abort
160 // reads abort_status_Reg, updates flags
161 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
162 rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
163 if (profile_rtm) {
164 // Save abort status because abort_status_Reg is used by following code.
165 if (RTMRetryCount > 0) {
166 push(abort_status_Reg);
167 }
168 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 168, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
169 rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
170 // restore abort status
171 if (RTMRetryCount > 0) {
172 pop(abort_status_Reg);
173 }
174 }
175}
176
177// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
178// inputs: retry_count_Reg
179// : abort_status_Reg
180// output: retry_count_Reg decremented by 1
181// flags are killed
182void C2_MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
183 Label doneRetry;
184 assert(abort_status_Reg == rax, "")do { if (!(abort_status_Reg == rax)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 184, "assert(" "abort_status_Reg == rax" ") failed", ""); ::
breakpoint(); } } while (0)
;
185 // The abort reason bits are in eax (see all states in rtmLocking.hpp)
186 // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
187 // if reason is in 0x6 and retry count != 0 then retry
188 andptr(abort_status_Reg, 0x6);
189 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 189)
;
190 testl(retry_count_Reg, retry_count_Reg);
191 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 191)
;
192 pause();
193 decrementl(retry_count_Reg);
194 jmp(retryLabel);
195 bind(doneRetry);
196}
197
198// Spin and retry if lock is busy,
199// inputs: box_Reg (monitor address)
200// : retry_count_Reg
201// output: retry_count_Reg decremented by 1
202// : clear z flag if retry count exceeded
203// tmp_Reg, scr_Reg, flags are killed
204void C2_MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
205 Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
206 Label SpinLoop, SpinExit, doneRetry;
207 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
208
209 testl(retry_count_Reg, retry_count_Reg);
210 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 210)
;
211 decrementl(retry_count_Reg);
212 movptr(scr_Reg, RTMSpinLoopCount);
213
214 bind(SpinLoop);
215 pause();
216 decrementl(scr_Reg);
217 jccb(Assembler::lessEqual, SpinExit)jccb_0(Assembler::lessEqual, SpinExit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 217)
;
218 movptr(tmp_Reg, Address(box_Reg, owner_offset));
219 testptr(tmp_Reg, tmp_Reg);
220 jccb(Assembler::notZero, SpinLoop)jccb_0(Assembler::notZero, SpinLoop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 220)
;
221
222 bind(SpinExit);
223 jmp(retryLabel);
224 bind(doneRetry);
225 incrementl(retry_count_Reg); // clear z flag
226}
227
228// Use RTM for normal stack locks
229// Input: objReg (object to lock)
230void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
231 Register retry_on_abort_count_Reg,
232 RTMLockingCounters* stack_rtm_counters,
233 Metadata* method_data, bool profile_rtm,
234 Label& DONE_LABEL, Label& IsInflated) {
235 assert(UseRTMForStackLocks, "why call this otherwise?")do { if (!(UseRTMForStackLocks)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 235, "assert(" "UseRTMForStackLocks" ") failed", "why call this otherwise?"
); ::breakpoint(); } } while (0)
;
236 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 236, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
237 assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 237, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint
(); } } while (0)
;
238 Label L_rtm_retry, L_decrement_retry, L_on_abort;
239
240 if (RTMRetryCount > 0) {
241 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
242 bind(L_rtm_retry);
243 }
244 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
245 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
246 jcc(Assembler::notZero, IsInflated);
247
248 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
249 Label L_noincrement;
250 if (RTMTotalCountIncrRate > 1) {
251 // tmpReg, scrReg and flags are killed
252 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
253 }
254 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(stack_rtm_counters != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 254, "assert(" "stack_rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
255 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
256 bind(L_noincrement);
257 }
258 xbegin(L_on_abort);
259 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
260 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
261 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
262 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
263
264 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
265 if (UseRTMXendForLockBusy) {
266 xend();
267 movptr(abort_status_Reg, 0x2); // Set the abort status to 2 (so we can retry)
268 jmp(L_decrement_retry);
269 }
270 else {
271 xabort(0);
272 }
273 bind(L_on_abort);
274 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
275 rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
276 }
277 bind(L_decrement_retry);
278 if (RTMRetryCount > 0) {
279 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
280 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
281 }
282}
283
284// Use RTM for inflating locks
285// inputs: objReg (object to lock)
286// boxReg (on-stack box address (displaced header location) - KILLED)
287// tmpReg (ObjectMonitor address + markWord::monitor_value)
288void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
289 Register scrReg, Register retry_on_busy_count_Reg,
290 Register retry_on_abort_count_Reg,
291 RTMLockingCounters* rtm_counters,
292 Metadata* method_data, bool profile_rtm,
293 Label& DONE_LABEL) {
294 assert(UseRTMLocking, "why call this otherwise?")do { if (!(UseRTMLocking)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 294, "assert(" "UseRTMLocking" ") failed", "why call this otherwise?"
); ::breakpoint(); } } while (0)
;
295 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 295, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
296 assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 296, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint
(); } } while (0)
;
297 Label L_rtm_retry, L_decrement_retry, L_on_abort;
298 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
299
300 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
301 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
302 movptr(boxReg, tmpReg); // Save ObjectMonitor address
303
304 if (RTMRetryCount > 0) {
305 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
306 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
307 bind(L_rtm_retry);
308 }
309 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
310 Label L_noincrement;
311 if (RTMTotalCountIncrRate > 1) {
312 // tmpReg, scrReg and flags are killed
313 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
314 }
315 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 315, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
316 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
317 bind(L_noincrement);
318 }
319 xbegin(L_on_abort);
320 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
321 movptr(tmpReg, Address(tmpReg, owner_offset));
322 testptr(tmpReg, tmpReg);
323 jcc(Assembler::zero, DONE_LABEL);
324 if (UseRTMXendForLockBusy) {
325 xend();
326 jmp(L_decrement_retry);
327 }
328 else {
329 xabort(0);
330 }
331 bind(L_on_abort);
332 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
333 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
334 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
335 }
336 if (RTMRetryCount > 0) {
337 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
338 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
339 }
340
341 movptr(tmpReg, Address(boxReg, owner_offset)) ;
342 testptr(tmpReg, tmpReg) ;
343 jccb(Assembler::notZero, L_decrement_retry)jccb_0(Assembler::notZero, L_decrement_retry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 343)
;
344
345 // Appears unlocked - try to swing _owner from null to non-null.
346 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
347#ifdef _LP641
348 Register threadReg = r15_thread;
349#else
350 get_thread(scrReg);
351 Register threadReg = scrReg;
352#endif
353 lock();
354 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
355
356 if (RTMRetryCount > 0) {
357 // success done else retry
358 jccb(Assembler::equal, DONE_LABEL)jccb_0(Assembler::equal, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 358)
;
359 bind(L_decrement_retry);
360 // Spin and retry if lock is busy.
361 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
362 }
363 else {
364 bind(L_decrement_retry);
365 }
366}
367
368#endif // INCLUDE_RTM_OPT
369
370// fast_lock and fast_unlock used by C2
371
372// Because the transitions from emitted code to the runtime
373// monitorenter/exit helper stubs are so slow it's critical that
374// we inline both the stack-locking fast path and the inflated fast path.
375//
376// See also: cmpFastLock and cmpFastUnlock.
377//
378// What follows is a specialized inline transliteration of the code
379// in enter() and exit(). If we're concerned about I$ bloat another
380// option would be to emit TrySlowEnter and TrySlowExit methods
381// at startup-time. These methods would accept arguments as
382// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
383// indications in the icc.ZFlag. fast_lock and fast_unlock would simply
384// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
385// In practice, however, the # of lock sites is bounded and is usually small.
386// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
387// if the processor uses simple bimodal branch predictors keyed by EIP
388// Since the helper routines would be called from multiple synchronization
389// sites.
390//
391// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
392// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
393// to those specialized methods. That'd give us a mostly platform-independent
394// implementation that the JITs could optimize and inline at their pleasure.
395// Done correctly, the only time we'd need to cross to native could would be
396// to park() or unpark() threads. We'd also need a few more unsafe operators
397// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
398// (b) explicit barriers or fence operations.
399//
400// TODO:
401//
402// * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr).
403// This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals.
404// Given TLAB allocation, Self is usually manifested in a register, so passing it into
405// the lock operators would typically be faster than reifying Self.
406//
407// * Ideally I'd define the primitives as:
408// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
409// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
410// Unfortunately ADLC bugs prevent us from expressing the ideal form.
411// Instead, we're stuck with a rather awkward and brittle register assignments below.
412// Furthermore the register assignments are overconstrained, possibly resulting in
413// sub-optimal code near the synchronization site.
414//
415// * Eliminate the sp-proximity tests and just use "== Self" tests instead.
416// Alternately, use a better sp-proximity test.
417//
418// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
419// Either one is sufficient to uniquely identify a thread.
420// TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
421//
422// * Intrinsify notify() and notifyAll() for the common cases where the
423// object is locked by the calling thread but the waitlist is empty.
424// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
425//
426// * use jccb and jmpb instead of jcc and jmp to improve code density.
427// But beware of excessive branch density on AMD Opterons.
428//
429// * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
430// or failure of the fast path. If the fast path fails then we pass
431// control to the slow path, typically in C. In fast_lock and
432// fast_unlock we often branch to DONE_LABEL, just to find that C2
433// will emit a conditional branch immediately after the node.
434// So we have branches to branches and lots of ICC.ZF games.
435// Instead, it might be better to have C2 pass a "FailureLabel"
436// into fast_lock and fast_unlock. In the case of success, control
437// will drop through the node. ICC.ZF is undefined at exit.
438// In the case of failure, the node will branch directly to the
439// FailureLabel
440
441
442// obj: object to lock
443// box: on-stack box address (displaced header location) - KILLED
444// rax,: tmp -- KILLED
445// scr: tmp -- KILLED
446void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
447 Register scrReg, Register cx1Reg, Register cx2Reg,
448 RTMLockingCounters* rtm_counters,
449 RTMLockingCounters* stack_rtm_counters,
450 Metadata* method_data,
451 bool use_rtm, bool profile_rtm) {
452 // Ensure the register assignments are disjoint
453 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 453, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
454
455 if (use_rtm) {
456 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
457 } else {
458 assert(cx2Reg == noreg, "")do { if (!(cx2Reg == noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 458, "assert(" "cx2Reg == noreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
459 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
460 }
461
462 // Possible cases that we'll encounter in fast_lock
463 // ------------------------------------------------
464 // * Inflated
465 // -- unlocked
466 // -- Locked
467 // = by self
468 // = by other
469 // * neutral
470 // * stack-locked
471 // -- by self
472 // = sp-proximity test hits
473 // = sp-proximity test generates false-negative
474 // -- by other
475 //
476
477 Label IsInflated, DONE_LABEL;
478
479 if (DiagnoseSyncOnValueBasedClasses != 0) {
480 load_klass(tmpReg, objReg, cx1Reg);
481 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
482 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
483 jcc(Assembler::notZero, DONE_LABEL);
484 }
485
486#if INCLUDE_RTM_OPT1
487 if (UseRTMForStackLocks && use_rtm) {
488 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 488, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"
); ::breakpoint(); } } while (0)
;
489 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
490 stack_rtm_counters, method_data, profile_rtm,
491 DONE_LABEL, IsInflated);
492 }
493#endif // INCLUDE_RTM_OPT
494
495 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
496 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
497 jccb(Assembler::notZero, IsInflated)jccb_0(Assembler::notZero, IsInflated, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 497)
;
498
499 if (!UseHeavyMonitors) {
500 // Attempt stack-locking ...
501 orptr (tmpReg, markWord::unlocked_value);
502 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
503 lock();
504 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
505 jcc(Assembler::equal, DONE_LABEL); // Success
506
507 // Recursive locking.
508 // The object is stack-locked: markword contains stack pointer to BasicLock.
509 // Locked by current thread if difference with current SP is less than one page.
510 subptr(tmpReg, rsp);
511 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
512 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())7 - os::vm_page_size()) );
513 movptr(Address(boxReg, 0), tmpReg);
514 } else {
515 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
516 testptr(objReg, objReg);
517 }
518 jmp(DONE_LABEL);
519
520 bind(IsInflated);
521 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
522
523#if INCLUDE_RTM_OPT1
524 // Use the same RTM locking code in 32- and 64-bit VM.
525 if (use_rtm) {
526 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
527 rtm_counters, method_data, profile_rtm, DONE_LABEL);
528 } else {
529#endif // INCLUDE_RTM_OPT
530
531#ifndef _LP641
532 // The object is inflated.
533
534 // boxReg refers to the on-stack BasicLock in the current frame.
535 // We'd like to write:
536 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
537 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
538 // additional latency as we have another ST in the store buffer that must drain.
539
540 // avoid ST-before-CAS
541 // register juggle because we need tmpReg for cmpxchgptr below
542 movptr(scrReg, boxReg);
543 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
544
545 // Optimistic form: consider XORL tmpReg,tmpReg
546 movptr(tmpReg, NULL_WORD0L);
547
548 // Appears unlocked - try to swing _owner from null to non-null.
549 // Ideally, I'd manifest "Self" with get_thread and then attempt
550 // to CAS the register containing Self into m->Owner.
551 // But we don't have enough registers, so instead we can either try to CAS
552 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
553 // we later store "Self" into m->Owner. Transiently storing a stack address
554 // (rsp or the address of the box) into m->owner is harmless.
555 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
556 lock();
557 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
558 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
559 // If we weren't able to swing _owner from NULL to the BasicLock
560 // then take the slow path.
561 jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 561)
;
562 // update _owner from BasicLock to thread
563 get_thread (scrReg); // beware: clobbers ICCs
564 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), scrReg);
565 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
566
567 // If the CAS fails we can either retry or pass control to the slow path.
568 // We use the latter tactic.
569 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
570 // If the CAS was successful ...
571 // Self has acquired the lock
572 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
573 // Intentional fall-through into DONE_LABEL ...
574#else // _LP64
575 // It's inflated and we use scrReg for ObjectMonitor* in this section.
576 movq(scrReg, tmpReg);
577 xorq(tmpReg, tmpReg);
578 lock();
579 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
580 // Unconditionally set box->_displaced_header = markWord::unused_mark().
581 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
582 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
583 // Propagate ICC.ZF from CAS above into DONE_LABEL.
584 jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success)
585
586 cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock)
587 jcc(Assembler::notEqual, DONE_LABEL); // If not recursive, ZF = 0 at this point (fail)
588 incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
589 xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
590#endif // _LP64
591#if INCLUDE_RTM_OPT1
592 } // use_rtm()
593#endif
594 // DONE_LABEL is a hot target - we'd really like to place it at the
595 // start of cache line by padding with NOPs.
596 // See the AMD and Intel software optimization manuals for the
597 // most efficient "long" NOP encodings.
598 // Unfortunately none of our alignment mechanisms suffice.
599 bind(DONE_LABEL);
600
601 // At DONE_LABEL the icc ZFlag is set as follows ...
602 // fast_unlock uses the same protocol.
603 // ZFlag == 1 -> Success
604 // ZFlag == 0 -> Failure - force control through the slow path
605}
606
607// obj: object to unlock
608// box: box address (displaced header location), killed. Must be EAX.
609// tmp: killed, cannot be obj nor box.
610//
611// Some commentary on balanced locking:
612//
613// fast_lock and fast_unlock are emitted only for provably balanced lock sites.
614// Methods that don't have provably balanced locking are forced to run in the
615// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
616// The interpreter provides two properties:
617// I1: At return-time the interpreter automatically and quietly unlocks any
618// objects acquired the current activation (frame). Recall that the
619// interpreter maintains an on-stack list of locks currently held by
620// a frame.
621// I2: If a method attempts to unlock an object that is not held by the
622// the frame the interpreter throws IMSX.
623//
624// Lets say A(), which has provably balanced locking, acquires O and then calls B().
625// B() doesn't have provably balanced locking so it runs in the interpreter.
626// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
627// is still locked by A().
628//
629// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
630// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
631// should not be unlocked by "normal" java-level locking and vice-versa. The specification
632// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
633// Arguably given that the spec legislates the JNI case as undefined our implementation
634// could reasonably *avoid* checking owner in fast_unlock().
635// In the interest of performance we elide m->Owner==Self check in unlock.
636// A perfectly viable alternative is to elide the owner check except when
637// Xcheck:jni is enabled.
638
639void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
640 assert(boxReg == rax, "")do { if (!(boxReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 640, "assert(" "boxReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
641 assert_different_registers(objReg, boxReg, tmpReg);
642
643 Label DONE_LABEL, Stacked, CheckSucc;
644
645#if INCLUDE_RTM_OPT1
646 if (UseRTMForStackLocks && use_rtm) {
647 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 647, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"
); ::breakpoint(); } } while (0)
;
648 Label L_regular_unlock;
649 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
650 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
651 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
652 jccb(Assembler::notEqual, L_regular_unlock)jccb_0(Assembler::notEqual, L_regular_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 652)
; // if !HLE RegularLock
653 xend(); // otherwise end...
654 jmp(DONE_LABEL); // ... and we're done
655 bind(L_regular_unlock);
656 }
657#endif
658
659 if (!UseHeavyMonitors) {
660 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD0L); // Examine the displaced header
661 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
662 }
663 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
664 if (!UseHeavyMonitors) {
665 testptr(tmpReg, markWord::monitor_value); // Inflated?
666 jccb (Assembler::zero, Stacked)jccb_0(Assembler::zero, Stacked, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 666)
;
667 }
668
669 // It's inflated.
670#if INCLUDE_RTM_OPT1
671 if (use_rtm) {
672 Label L_regular_inflated_unlock;
673 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
674 movptr(boxReg, Address(tmpReg, owner_offset));
675 testptr(boxReg, boxReg);
676 jccb(Assembler::notZero, L_regular_inflated_unlock)jccb_0(Assembler::notZero, L_regular_inflated_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 676)
;
677 xend();
678 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 678)
;
679 bind(L_regular_inflated_unlock);
680 }
681#endif
682
683 // Despite our balanced locking property we still check that m->_owner == Self
684 // as java routines or native JNI code called by this thread might
685 // have released the lock.
686 // Refer to the comments in synchronizer.cpp for how we might encode extra
687 // state in _succ so we can avoid fetching EntryList|cxq.
688 //
689 // If there's no contention try a 1-0 exit. That is, exit without
690 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
691 // we detect and recover from the race that the 1-0 exit admits.
692 //
693 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
694 // before it STs null into _owner, releasing the lock. Updates
695 // to data protected by the critical section must be visible before
696 // we drop the lock (and thus before any other thread could acquire
697 // the lock and observe the fields protected by the lock).
698 // IA32's memory-model is SPO, so STs are ordered with respect to
699 // each other and there's no need for an explicit barrier (fence).
700 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
701#ifndef _LP641
702 get_thread (boxReg);
703
704 // Note that we could employ various encoding schemes to reduce
705 // the number of loads below (currently 4) to just 2 or 3.
706 // Refer to the comments in synchronizer.cpp.
707 // In practice the chain of fetches doesn't seem to impact performance, however.
708 xorptr(boxReg, boxReg);
709 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
710 jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 710)
;
711 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value
)
));
712 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value
)
));
713 jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 713)
;
714 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), NULL_WORD0L);
715 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 715)
;
716
717 bind (Stacked);
718 // It's not inflated and it's not recursively stack-locked.
719 // It must be stack-locked.
720 // Try to reset the header to displaced header.
721 // The "box" value on the stack is stable, so we can reload
722 // and be assured we observe the same value as above.
723 movptr(tmpReg, Address(boxReg, 0));
724 lock();
725 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
726 // Intention fall-thru into DONE_LABEL
727
728 // DONE_LABEL is a hot target - we'd really like to place it at the
729 // start of cache line by padding with NOPs.
730 // See the AMD and Intel software optimization manuals for the
731 // most efficient "long" NOP encodings.
732 // Unfortunately none of our alignment mechanisms suffice.
733 bind (CheckSucc);
734#else // _LP64
735 // It's inflated
736 Label LNotRecursive, LSuccess, LGoSlowPath;
737
738 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
), 0);
739 jccb(Assembler::equal, LNotRecursive)jccb_0(Assembler::equal, LNotRecursive, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 739)
;
740
741 // Recursive inflated unlock
742 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
743 jmpb(LSuccess)jmpb_0(LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 743)
;
744
745 bind(LNotRecursive);
746 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value
)
));
747 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value
)
));
748 jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 748)
;
749 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
750 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
751 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 751)
;
752
753 // Try to avoid passing control into the slow_path ...
754 bind (CheckSucc);
755
756 // The following optional optimization can be elided if necessary
757 // Effectively: if (succ == null) goto slow path
758 // The code reduces the window for a race, however,
759 // and thus benefits performance.
760 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
761 jccb (Assembler::zero, LGoSlowPath)jccb_0(Assembler::zero, LGoSlowPath, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 761)
;
762
763 xorptr(boxReg, boxReg);
764 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
765 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
766
767 // Memory barrier/fence
768 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
769 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
770 // This is faster on Nehalem and AMD Shanghai/Barcelona.
771 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
772 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
773 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
774 lock(); addl(Address(rsp, 0), 0);
775
776 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
777 jccb (Assembler::notZero, LSuccess)jccb_0(Assembler::notZero, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 777)
;
778
779 // Rare inopportune interleaving - race.
780 // The successor vanished in the small window above.
781 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
782 // We need to ensure progress and succession.
783 // Try to reacquire the lock.
784 // If that fails then the new owner is responsible for succession and this
785 // thread needs to take no further action and can exit via the fast path (success).
786 // If the re-acquire succeeds then pass control into the slow path.
787 // As implemented, this latter mode is horrible because we generated more
788 // coherence traffic on the lock *and* artifically extended the critical section
789 // length while by virtue of passing control into the slow path.
790
791 // box is really RAX -- the following CMPXCHG depends on that binding
792 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
793 lock();
794 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
795 // There's no successor so we tried to regrab the lock.
796 // If that didn't work, then another thread grabbed the
797 // lock so we're done (and exit was a success).
798 jccb (Assembler::notEqual, LSuccess)jccb_0(Assembler::notEqual, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 798)
;
799 // Intentional fall-through into slow path
800
801 bind (LGoSlowPath);
802 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
803 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 803)
;
804
805 bind (LSuccess);
806 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
807 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 807)
;
808
809 if (!UseHeavyMonitors) {
810 bind (Stacked);
811 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
812 lock();
813 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
814 }
815#endif
816 bind(DONE_LABEL);
817}
818
819//-------------------------------------------------------------------------------------------
820// Generic instructions support for use in .ad files C2 code generation
821
822void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
823 if (dst != src) {
824 movdqu(dst, src);
825 }
826 if (opcode == Op_AbsVD) {
827 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
828 } else {
829 assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 829, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD"
); ::breakpoint(); } } while (0)
;
830 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
831 }
832}
833
834void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
835 if (opcode == Op_AbsVD) {
836 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
837 } else {
838 assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 838, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD"
); ::breakpoint(); } } while (0)
;
839 vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr);
840 }
841}
842
843void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
844 if (dst != src) {
845 movdqu(dst, src);
846 }
847 if (opcode == Op_AbsVF) {
848 andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr);
849 } else {
850 assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 850, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF"
); ::breakpoint(); } } while (0)
;
851 xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr);
852 }
853}
854
855void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
856 if (opcode == Op_AbsVF) {
857 vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr);
858 } else {
859 assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 859, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF"
); ::breakpoint(); } } while (0)
;
860 vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr);
861 }
862}
863
864void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) {
865 assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 865, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
866 assert(tmp == xnoreg || elem_bt == T_LONG, "unused")do { if (!(tmp == xnoreg || elem_bt == T_LONG)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 866, "assert(" "tmp == xnoreg || elem_bt == T_LONG" ") failed"
, "unused"); ::breakpoint(); } } while (0)
;
867
868 if (opcode == Op_MinV) {
869 if (elem_bt == T_BYTE) {
870 pminsb(dst, src);
871 } else if (elem_bt == T_SHORT) {
872 pminsw(dst, src);
873 } else if (elem_bt == T_INT) {
874 pminsd(dst, src);
875 } else {
876 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 876, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
877 assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 877, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
878 assert_different_registers(dst, src, tmp);
879 movdqu(xmm0, dst);
880 pcmpgtq(xmm0, src);
881 blendvpd(dst, src); // xmm0 as mask
882 }
883 } else { // opcode == Op_MaxV
884 if (elem_bt == T_BYTE) {
885 pmaxsb(dst, src);
886 } else if (elem_bt == T_SHORT) {
887 pmaxsw(dst, src);
888 } else if (elem_bt == T_INT) {
889 pmaxsd(dst, src);
890 } else {
891 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 891, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
892 assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 892, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
893 assert_different_registers(dst, src, tmp);
894 movdqu(xmm0, src);
895 pcmpgtq(xmm0, dst);
896 blendvpd(dst, src); // xmm0 as mask
897 }
898 }
899}
900
901void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt,
902 XMMRegister dst, XMMRegister src1, XMMRegister src2,
903 int vlen_enc) {
904 assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 904, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
905
906 if (opcode == Op_MinV) {
907 if (elem_bt == T_BYTE) {
908 vpminsb(dst, src1, src2, vlen_enc);
909 } else if (elem_bt == T_SHORT) {
910 vpminsw(dst, src1, src2, vlen_enc);
911 } else if (elem_bt == T_INT) {
912 vpminsd(dst, src1, src2, vlen_enc);
913 } else {
914 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 914, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
915 if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
916 vpminsq(dst, src1, src2, vlen_enc);
917 } else {
918 assert_different_registers(dst, src1, src2);
919 vpcmpgtq(dst, src1, src2, vlen_enc);
920 vblendvpd(dst, src1, src2, dst, vlen_enc);
921 }
922 }
923 } else { // opcode == Op_MaxV
924 if (elem_bt == T_BYTE) {
925 vpmaxsb(dst, src1, src2, vlen_enc);
926 } else if (elem_bt == T_SHORT) {
927 vpmaxsw(dst, src1, src2, vlen_enc);
928 } else if (elem_bt == T_INT) {
929 vpmaxsd(dst, src1, src2, vlen_enc);
930 } else {
931 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 931, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
932 if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
933 vpmaxsq(dst, src1, src2, vlen_enc);
934 } else {
935 assert_different_registers(dst, src1, src2);
936 vpcmpgtq(dst, src1, src2, vlen_enc);
937 vblendvpd(dst, src2, src1, dst, vlen_enc);
938 }
939 }
940 }
941}
942
943// Float/Double min max
944
945void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt,
946 XMMRegister dst, XMMRegister a, XMMRegister b,
947 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
948 int vlen_enc) {
949 assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 949, "assert(" "UseAVX > 0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
950 assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
951 opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
952 assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 952, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
953 assert_different_registers(a, b, tmp, atmp, btmp);
954
955 bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
956 bool is_double_word = is_double_word_type(elem_bt);
957
958 if (!is_double_word && is_min) {
959 vblendvps(atmp, a, b, a, vlen_enc);
960 vblendvps(btmp, b, a, a, vlen_enc);
961 vminps(tmp, atmp, btmp, vlen_enc);
962 vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
963 vblendvps(dst, tmp, atmp, btmp, vlen_enc);
964 } else if (!is_double_word && !is_min) {
965 vblendvps(btmp, b, a, b, vlen_enc);
966 vblendvps(atmp, a, b, b, vlen_enc);
967 vmaxps(tmp, atmp, btmp, vlen_enc);
968 vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
969 vblendvps(dst, tmp, atmp, btmp, vlen_enc);
970 } else if (is_double_word && is_min) {
971 vblendvpd(atmp, a, b, a, vlen_enc);
972 vblendvpd(btmp, b, a, a, vlen_enc);
973 vminpd(tmp, atmp, btmp, vlen_enc);
974 vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
975 vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
976 } else {
977 assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 977, "assert(" "is_double_word && !is_min" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
978 vblendvpd(btmp, b, a, b, vlen_enc);
979 vblendvpd(atmp, a, b, b, vlen_enc);
980 vmaxpd(tmp, atmp, btmp, vlen_enc);
981 vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
982 vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
983 }
984}
985
986void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
987 XMMRegister dst, XMMRegister a, XMMRegister b,
988 KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
989 int vlen_enc) {
990 assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 990, "assert(" "UseAVX > 2" ") failed", "required"); ::breakpoint
(); } } while (0)
;
991 assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
992 opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
993 assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 993, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
994 assert_different_registers(dst, a, b, atmp, btmp);
995
996 bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
997 bool is_double_word = is_double_word_type(elem_bt);
998 bool merge = true;
999
1000 if (!is_double_word && is_min) {
1001 evpmovd2m(ktmp, a, vlen_enc);
1002 evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
1003 evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
1004 vminps(dst, atmp, btmp, vlen_enc);
1005 evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1006 evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
1007 } else if (!is_double_word && !is_min) {
1008 evpmovd2m(ktmp, b, vlen_enc);
1009 evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
1010 evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
1011 vmaxps(dst, atmp, btmp, vlen_enc);
1012 evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1013 evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
1014 } else if (is_double_word && is_min) {
1015 evpmovq2m(ktmp, a, vlen_enc);
1016 evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
1017 evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
1018 vminpd(dst, atmp, btmp, vlen_enc);
1019 evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1020 evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
1021 } else {
1022 assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1022, "assert(" "is_double_word && !is_min" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
1023 evpmovq2m(ktmp, b, vlen_enc);
1024 evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
1025 evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
1026 vmaxpd(dst, atmp, btmp, vlen_enc);
1027 evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1028 evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
1029 }
1030}
1031
1032// Float/Double signum
1033void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
1034 XMMRegister zero, XMMRegister one,
1035 Register scratch) {
1036 assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity")do { if (!(opcode == Op_SignumF || opcode == Op_SignumD)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1036, "assert(" "opcode == Op_SignumF || opcode == Op_SignumD"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
1037
1038 Label DONE_LABEL;
1039
1040 if (opcode == Op_SignumF) {
1041 assert(UseSSE > 0, "required")do { if (!(UseSSE > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1041, "assert(" "UseSSE > 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1042 ucomiss(dst, zero);
1043 jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1044 jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1045 movflt(dst, one);
1046 jcc(Assembler::above, DONE_LABEL);
1047 xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
1048 } else if (opcode == Op_SignumD) {
1049 assert(UseSSE > 1, "required")do { if (!(UseSSE > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1049, "assert(" "UseSSE > 1" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1050 ucomisd(dst, zero);
1051 jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1052 jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1053 movdbl(dst, one);
1054 jcc(Assembler::above, DONE_LABEL);
1055 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
1056 }
1057
1058 bind(DONE_LABEL);
1059}
1060
1061void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
1062 if (sign) {
1063 pmovsxbw(dst, src);
1064 } else {
1065 pmovzxbw(dst, src);
1066 }
1067}
1068
1069void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1070 if (sign) {
1071 vpmovsxbw(dst, src, vector_len);
1072 } else {
1073 vpmovzxbw(dst, src, vector_len);
1074 }
1075}
1076
1077void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1078 if (sign) {
1079 vpmovsxbd(dst, src, vector_len);
1080 } else {
1081 vpmovzxbd(dst, src, vector_len);
1082 }
1083}
1084
1085void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1086 if (sign) {
1087 vpmovsxwd(dst, src, vector_len);
1088 } else {
1089 vpmovzxwd(dst, src, vector_len);
1090 }
1091}
1092
1093void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
1094 int shift, int vector_len) {
1095 if (opcode == Op_RotateLeftV) {
1096 if (etype == T_INT) {
1097 evprold(dst, src, shift, vector_len);
1098 } else {
1099 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1099, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1100 evprolq(dst, src, shift, vector_len);
1101 }
1102 } else {
1103 assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1103, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV"
); ::breakpoint(); } } while (0)
;
1104 if (etype == T_INT) {
1105 evprord(dst, src, shift, vector_len);
1106 } else {
1107 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1107, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1108 evprorq(dst, src, shift, vector_len);
1109 }
1110 }
1111}
1112
1113void C2_MacroAssembler::vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
1114 XMMRegister shift, int vector_len) {
1115 if (opcode == Op_RotateLeftV) {
1116 if (etype == T_INT) {
1117 evprolvd(dst, src, shift, vector_len);
1118 } else {
1119 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1119, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1120 evprolvq(dst, src, shift, vector_len);
1121 }
1122 } else {
1123 assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1123, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV"
); ::breakpoint(); } } while (0)
;
1124 if (etype == T_INT) {
1125 evprorvd(dst, src, shift, vector_len);
1126 } else {
1127 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1127, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1128 evprorvq(dst, src, shift, vector_len);
1129 }
1130 }
1131}
1132
1133void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
1134 if (opcode == Op_RShiftVI) {
1135 psrad(dst, shift);
1136 } else if (opcode == Op_LShiftVI) {
1137 pslld(dst, shift);
1138 } else {
1139 assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1139, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI"
); ::breakpoint(); } } while (0)
;
1140 psrld(dst, shift);
1141 }
1142}
1143
1144void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) {
1145 switch (opcode) {
1146 case Op_RShiftVI: psrad(dst, shift); break;
1147 case Op_LShiftVI: pslld(dst, shift); break;
1148 case Op_URShiftVI: psrld(dst, shift); break;
1149
1150 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1150, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1151 }
1152}
1153
1154void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
1155 if (opcode == Op_RShiftVI) {
1156 vpsrad(dst, nds, shift, vector_len);
1157 } else if (opcode == Op_LShiftVI) {
1158 vpslld(dst, nds, shift, vector_len);
1159 } else {
1160 assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1160, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI"
); ::breakpoint(); } } while (0)
;
1161 vpsrld(dst, nds, shift, vector_len);
1162 }
1163}
1164
1165void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1166 switch (opcode) {
1167 case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break;
1168 case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break;
1169 case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break;
1170
1171 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1171, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1172 }
1173}
1174
1175void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) {
1176 switch (opcode) {
1177 case Op_RShiftVB: // fall-through
1178 case Op_RShiftVS: psraw(dst, shift); break;
1179
1180 case Op_LShiftVB: // fall-through
1181 case Op_LShiftVS: psllw(dst, shift); break;
1182
1183 case Op_URShiftVS: // fall-through
1184 case Op_URShiftVB: psrlw(dst, shift); break;
1185
1186 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1186, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1187 }
1188}
1189
1190void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1191 switch (opcode) {
1192 case Op_RShiftVB: // fall-through
1193 case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break;
1194
1195 case Op_LShiftVB: // fall-through
1196 case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break;
1197
1198 case Op_URShiftVS: // fall-through
1199 case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break;
1200
1201 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1201, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1202 }
1203}
1204
1205void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) {
1206 switch (opcode) {
1207 case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems
1208 case Op_LShiftVL: psllq(dst, shift); break;
1209 case Op_URShiftVL: psrlq(dst, shift); break;
1210
1211 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1211, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1212 }
1213}
1214
1215void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
1216 if (opcode == Op_RShiftVL) {
1217 psrlq(dst, shift); // using srl to implement sra on pre-avs512 systems
1218 } else if (opcode == Op_LShiftVL) {
1219 psllq(dst, shift);
1220 } else {
1221 assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1221, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL"
); ::breakpoint(); } } while (0)
;
1222 psrlq(dst, shift);
1223 }
1224}
1225
1226void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1227 switch (opcode) {
1228 case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break;
1229 case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break;
1230 case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break;
1231
1232 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1232, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1233 }
1234}
1235
1236void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
1237 if (opcode == Op_RShiftVL) {
1238 evpsraq(dst, nds, shift, vector_len);
1239 } else if (opcode == Op_LShiftVL) {
1240 vpsllq(dst, nds, shift, vector_len);
1241 } else {
1242 assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1242, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL"
); ::breakpoint(); } } while (0)
;
1243 vpsrlq(dst, nds, shift, vector_len);
1244 }
1245}
1246
1247void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1248 switch (opcode) {
1249 case Op_RShiftVB: // fall-through
1250 case Op_RShiftVS: // fall-through
1251 case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break;
1252
1253 case Op_LShiftVB: // fall-through
1254 case Op_LShiftVS: // fall-through
1255 case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break;
1256
1257 case Op_URShiftVB: // fall-through
1258 case Op_URShiftVS: // fall-through
1259 case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break;
1260
1261 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1261, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1262 }
1263}
1264
1265void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1266 switch (opcode) {
1267 case Op_RShiftVB: // fall-through
1268 case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break;
1269
1270 case Op_LShiftVB: // fall-through
1271 case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break;
1272
1273 case Op_URShiftVB: // fall-through
1274 case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break;
1275
1276 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1276, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1277 }
1278}
1279
1280void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) {
1281 assert(UseAVX >= 2, "required")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1281, "assert(" "UseAVX >= 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1282 switch (opcode) {
1283 case Op_RShiftVL: {
1284 if (UseAVX > 2) {
1285 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1285, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1286 if (!VM_Version::supports_avx512vl()) {
1287 vlen_enc = Assembler::AVX_512bit;
1288 }
1289 evpsravq(dst, src, shift, vlen_enc);
1290 } else {
1291 vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask()));
1292 vpsrlvq(dst, src, shift, vlen_enc);
1293 vpsrlvq(tmp, tmp, shift, vlen_enc);
1294 vpxor(dst, dst, tmp, vlen_enc);
1295 vpsubq(dst, dst, tmp, vlen_enc);
1296 }
1297 break;
1298 }
1299 case Op_LShiftVL: {
1300 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1300, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1301 vpsllvq(dst, src, shift, vlen_enc);
1302 break;
1303 }
1304 case Op_URShiftVL: {
1305 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1305, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1306 vpsrlvq(dst, src, shift, vlen_enc);
1307 break;
1308 }
1309 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1309, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1310 }
1311}
1312
1313// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst
1314void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
1315 assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1316 opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1317 opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
;
1318 bool sign = (opcode != Op_URShiftVB);
1319 assert(vector_len == 0, "required")do { if (!(vector_len == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1319, "assert(" "vector_len == 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1320 vextendbd(sign, dst, src, 1);
1321 vpmovzxbd(vtmp, shift, 1);
1322 varshiftd(opcode, dst, dst, vtmp, 1);
1323 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch);
1324 vextracti128_high(vtmp, dst);
1325 vpackusdw(dst, dst, vtmp, 0);
1326}
1327
1328// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst
1329void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
1330 assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1331 opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1332 opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
;
1333 bool sign = (opcode != Op_URShiftVB);
1334 int ext_vector_len = vector_len + 1;
1335 vextendbw(sign, dst, src, ext_vector_len);
1336 vpmovzxbw(vtmp, shift, ext_vector_len);
1337 varshiftw(opcode, dst, dst, vtmp, ext_vector_len);
1338 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch);
1339 if (vector_len == 0) {
1340 vextracti128_high(vtmp, dst);
1341 vpackuswb(dst, dst, vtmp, vector_len);
1342 } else {
1343 vextracti64x4_high(vtmp, dst);
1344 vpackuswb(dst, dst, vtmp, vector_len);
1345 vpermq(dst, dst, 0xD8, vector_len);
1346 }
1347}
1348
1349void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) {
1350 switch(typ) {
1351 case T_BYTE:
1352 pinsrb(dst, val, idx);
1353 break;
1354 case T_SHORT:
1355 pinsrw(dst, val, idx);
1356 break;
1357 case T_INT:
1358 pinsrd(dst, val, idx);
1359 break;
1360 case T_LONG:
1361 pinsrq(dst, val, idx);
1362 break;
1363 default:
1364 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1364, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1365 break;
1366 }
1367}
1368
1369void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) {
1370 switch(typ) {
1371 case T_BYTE:
1372 vpinsrb(dst, src, val, idx);
1373 break;
1374 case T_SHORT:
1375 vpinsrw(dst, src, val, idx);
1376 break;
1377 case T_INT:
1378 vpinsrd(dst, src, val, idx);
1379 break;
1380 case T_LONG:
1381 vpinsrq(dst, src, val, idx);
1382 break;
1383 default:
1384 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1384, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1385 break;
1386 }
1387}
1388
1389void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) {
1390 switch(typ) {
1391 case T_INT:
1392 vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len);
1393 break;
1394 case T_FLOAT:
1395 vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len);
1396 break;
1397 case T_LONG:
1398 vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len);
1399 break;
1400 case T_DOUBLE:
1401 vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len);
1402 break;
1403 default:
1404 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1404, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1405 break;
1406 }
1407}
1408
1409void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) {
1410 switch(typ) {
1411 case T_INT:
1412 evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len);
1413 break;
1414 case T_FLOAT:
1415 evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len);
1416 break;
1417 case T_LONG:
1418 evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len);
1419 break;
1420 case T_DOUBLE:
1421 evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len);
1422 break;
1423 default:
1424 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1424, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1425 break;
1426 }
1427}
1428
1429void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) {
1430 switch(typ) {
1431 case T_INT:
1432 evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len);
1433 break;
1434 case T_FLOAT:
1435 evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len);
1436 break;
1437 case T_LONG:
1438 evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len);
1439 break;
1440 case T_DOUBLE:
1441 evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len);
1442 break;
1443 default:
1444 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1444, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1445 break;
1446 }
1447}
1448
1449void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy) {
1450 if (vlen_in_bytes <= 16) {
1451 pxor (dst, dst);
1452 psubb(dst, src);
1453 switch (elem_bt) {
1454 case T_BYTE: /* nothing to do */ break;
1455 case T_SHORT: pmovsxbw(dst, dst); break;
1456 case T_INT: pmovsxbd(dst, dst); break;
1457 case T_FLOAT: pmovsxbd(dst, dst); break;
1458 case T_LONG: pmovsxbq(dst, dst); break;
1459 case T_DOUBLE: pmovsxbq(dst, dst); break;
1460
1461 default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1461, "assert(" "false" ") failed", "%s", type2name(elem_bt
)); ::breakpoint(); } } while (0)
;
1462 }
1463 } else {
1464 assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, "")do { if (!(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes
< 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1464, "assert(" "!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64"
") failed", ""); ::breakpoint(); } } while (0)
;
1465 int vlen_enc = vector_length_encoding(vlen_in_bytes);
1466
1467 vpxor (dst, dst, dst, vlen_enc);
1468 vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc);
1469
1470 switch (elem_bt) {
1471 case T_BYTE: /* nothing to do */ break;
1472 case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break;
1473 case T_INT: vpmovsxbd(dst, dst, vlen_enc); break;
1474 case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break;
1475 case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break;
1476 case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
1477
1478 default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1478, "assert(" "false" ") failed", "%s", type2name(elem_bt
)); ::breakpoint(); } } while (0)
;
1479 }
1480 }
1481}
1482
1483void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp,
1484 Register tmp, bool novlbwdq, int vlen_enc) {
1485 if (novlbwdq) {
1486 vpmovsxbd(xtmp, src, vlen_enc);
1487 evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()),
1488 Assembler::eq, true, vlen_enc, tmp);
1489 } else {
1490 vpxor(xtmp, xtmp, xtmp, vlen_enc);
1491 vpsubb(xtmp, xtmp, src, vlen_enc);
1492 evpmovb2m(dst, xtmp, vlen_enc);
1493 }
1494}
1495
1496void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
1497 ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
1498 if (vlen_in_bytes == 4) {
1499 movdl(dst, addr);
1500 } else if (vlen_in_bytes == 8) {
1501 movq(dst, addr);
1502 } else if (vlen_in_bytes == 16) {
1503 movdqu(dst, addr, scratch);
1504 } else if (vlen_in_bytes == 32) {
1505 vmovdqu(dst, addr, scratch);
1506 } else {
1507 assert(vlen_in_bytes == 64, "%d", vlen_in_bytes)do { if (!(vlen_in_bytes == 64)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1507, "assert(" "vlen_in_bytes == 64" ") failed", "%d", vlen_in_bytes
); ::breakpoint(); } } while (0)
;
1508 evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
1509 }
1510}
1511
1512// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
1513
1514void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
1515 int vector_len = Assembler::AVX_128bit;
1516
1517 switch (opcode) {
1518 case Op_AndReductionV: pand(dst, src); break;
1519 case Op_OrReductionV: por (dst, src); break;
1520 case Op_XorReductionV: pxor(dst, src); break;
1521 case Op_MinReductionV:
1522 switch (typ) {
1523 case T_BYTE: pminsb(dst, src); break;
1524 case T_SHORT: pminsw(dst, src); break;
1525 case T_INT: pminsd(dst, src); break;
1526 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1526, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1527 vpminsq(dst, dst, src, Assembler::AVX_128bit); break;
1528 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1528, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1529 }
1530 break;
1531 case Op_MaxReductionV:
1532 switch (typ) {
1533 case T_BYTE: pmaxsb(dst, src); break;
1534 case T_SHORT: pmaxsw(dst, src); break;
1535 case T_INT: pmaxsd(dst, src); break;
1536 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1536, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1537 vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break;
1538 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1538, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1539 }
1540 break;
1541 case Op_AddReductionVF: addss(dst, src); break;
1542 case Op_AddReductionVD: addsd(dst, src); break;
1543 case Op_AddReductionVI:
1544 switch (typ) {
1545 case T_BYTE: paddb(dst, src); break;
1546 case T_SHORT: paddw(dst, src); break;
1547 case T_INT: paddd(dst, src); break;
1548 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1548, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1549 }
1550 break;
1551 case Op_AddReductionVL: paddq(dst, src); break;
1552 case Op_MulReductionVF: mulss(dst, src); break;
1553 case Op_MulReductionVD: mulsd(dst, src); break;
1554 case Op_MulReductionVI:
1555 switch (typ) {
1556 case T_SHORT: pmullw(dst, src); break;
1557 case T_INT: pmulld(dst, src); break;
1558 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1558, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1559 }
1560 break;
1561 case Op_MulReductionVL: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1561, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1562 vpmullq(dst, dst, src, vector_len); break;
1563 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1563, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1564 }
1565}
1566
1567void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1568 int vector_len = Assembler::AVX_256bit;
1569
1570 switch (opcode) {
1571 case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break;
1572 case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break;
1573 case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break;
1574 case Op_MinReductionV:
1575 switch (typ) {
1576 case T_BYTE: vpminsb(dst, src1, src2, vector_len); break;
1577 case T_SHORT: vpminsw(dst, src1, src2, vector_len); break;
1578 case T_INT: vpminsd(dst, src1, src2, vector_len); break;
1579 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1579, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1580 vpminsq(dst, src1, src2, vector_len); break;
1581 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1581, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1582 }
1583 break;
1584 case Op_MaxReductionV:
1585 switch (typ) {
1586 case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break;
1587 case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break;
1588 case T_INT: vpmaxsd(dst, src1, src2, vector_len); break;
1589 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1589, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1590 vpmaxsq(dst, src1, src2, vector_len); break;
1591 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1591, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1592 }
1593 break;
1594 case Op_AddReductionVI:
1595 switch (typ) {
1596 case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
1597 case T_SHORT: vpaddw(dst, src1, src2, vector_len); break;
1598 case T_INT: vpaddd(dst, src1, src2, vector_len); break;
1599 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1599, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1600 }
1601 break;
1602 case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break;
1603 case Op_MulReductionVI:
1604 switch (typ) {
1605 case T_SHORT: vpmullw(dst, src1, src2, vector_len); break;
1606 case T_INT: vpmulld(dst, src1, src2, vector_len); break;
1607 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1607, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1608 }
1609 break;
1610 case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break;
1611 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1611, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1612 }
1613}
1614
1615void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
1616 XMMRegister dst, XMMRegister src,
1617 XMMRegister vtmp1, XMMRegister vtmp2) {
1618 switch (opcode) {
1619 case Op_AddReductionVF:
1620 case Op_MulReductionVF:
1621 reduceF(opcode, vlen, dst, src, vtmp1, vtmp2);
1622 break;
1623
1624 case Op_AddReductionVD:
1625 case Op_MulReductionVD:
1626 reduceD(opcode, vlen, dst, src, vtmp1, vtmp2);
1627 break;
1628
1629 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1629, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1630 }
1631}
1632
1633void C2_MacroAssembler::reduceB(int opcode, int vlen,
1634 Register dst, Register src1, XMMRegister src2,
1635 XMMRegister vtmp1, XMMRegister vtmp2) {
1636 switch (vlen) {
1637 case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1638 case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1639 case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1640 case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1641
1642 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1642, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1643 }
1644}
1645
1646void C2_MacroAssembler::mulreduceB(int opcode, int vlen,
1647 Register dst, Register src1, XMMRegister src2,
1648 XMMRegister vtmp1, XMMRegister vtmp2) {
1649 switch (vlen) {
1650 case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1651 case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1652 case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1653 case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1654
1655 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1655, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1656 }
1657}
1658
1659void C2_MacroAssembler::reduceS(int opcode, int vlen,
1660 Register dst, Register src1, XMMRegister src2,
1661 XMMRegister vtmp1, XMMRegister vtmp2) {
1662 switch (vlen) {
1663 case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1664 case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1665 case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1666 case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1667
1668 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1668, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1669 }
1670}
1671
1672void C2_MacroAssembler::reduceI(int opcode, int vlen,
1673 Register dst, Register src1, XMMRegister src2,
1674 XMMRegister vtmp1, XMMRegister vtmp2) {
1675 switch (vlen) {
1676 case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1677 case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1678 case 8: reduce8I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1679 case 16: reduce16I(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1680
1681 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1681, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1682 }
1683}
1684
1685#ifdef _LP641
1686void C2_MacroAssembler::reduceL(int opcode, int vlen,
1687 Register dst, Register src1, XMMRegister src2,
1688 XMMRegister vtmp1, XMMRegister vtmp2) {
1689 switch (vlen) {
1690 case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1691 case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1692 case 8: reduce8L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1693
1694 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1694, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1695 }
1696}
1697#endif // _LP64
1698
1699void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1700 switch (vlen) {
1701 case 2:
1702 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1702, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1703 reduce2F(opcode, dst, src, vtmp1);
1704 break;
1705 case 4:
1706 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1706, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1707 reduce4F(opcode, dst, src, vtmp1);
1708 break;
1709 case 8:
1710 reduce8F(opcode, dst, src, vtmp1, vtmp2);
1711 break;
1712 case 16:
1713 reduce16F(opcode, dst, src, vtmp1, vtmp2);
1714 break;
1715 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1715, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1716 }
1717}
1718
1719void C2_MacroAssembler::reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1720 switch (vlen) {
1721 case 2:
1722 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1722, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1723 reduce2D(opcode, dst, src, vtmp1);
1724 break;
1725 case 4:
1726 reduce4D(opcode, dst, src, vtmp1, vtmp2);
1727 break;
1728 case 8:
1729 reduce8D(opcode, dst, src, vtmp1, vtmp2);
1730 break;
1731 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1731, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1732 }
1733}
1734
1735void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1736 if (opcode == Op_AddReductionVI) {
1737 if (vtmp1 != src2) {
1738 movdqu(vtmp1, src2);
1739 }
1740 phaddd(vtmp1, vtmp1);
1741 } else {
1742 pshufd(vtmp1, src2, 0x1);
1743 reduce_operation_128(T_INT, opcode, vtmp1, src2);
1744 }
1745 movdl(vtmp2, src1);
1746 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1747 movdl(dst, vtmp1);
1748}
1749
1750void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1751 if (opcode == Op_AddReductionVI) {
1752 if (vtmp1 != src2) {
1753 movdqu(vtmp1, src2);
1754 }
1755 phaddd(vtmp1, src2);
1756 reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1757 } else {
1758 pshufd(vtmp2, src2, 0xE);
1759 reduce_operation_128(T_INT, opcode, vtmp2, src2);
1760 reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1761 }
1762}
1763
1764void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1765 if (opcode == Op_AddReductionVI) {
1766 vphaddd(vtmp1, src2, src2, Assembler::AVX_256bit);
1767 vextracti128_high(vtmp2, vtmp1);
1768 vpaddd(vtmp1, vtmp1, vtmp2, Assembler::AVX_128bit);
1769 reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1770 } else {
1771 vextracti128_high(vtmp1, src2);
1772 reduce_operation_128(T_INT, opcode, vtmp1, src2);
1773 reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1774 }
1775}
1776
1777void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1778 vextracti64x4_high(vtmp2, src2);
1779 reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2);
1780 reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1781}
1782
1783void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1784 pshufd(vtmp2, src2, 0x1);
1785 reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
1786 movdqu(vtmp1, vtmp2);
1787 psrldq(vtmp1, 2);
1788 reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
1789 movdqu(vtmp2, vtmp1);
1790 psrldq(vtmp2, 1);
1791 reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
1792 movdl(vtmp2, src1);
1793 pmovsxbd(vtmp1, vtmp1);
1794 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1795 pextrb(dst, vtmp1, 0x0);
1796 movsbl(dst, dst);
1797}
1798
1799void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1800 pshufd(vtmp1, src2, 0xE);
1801 reduce_operation_128(T_BYTE, opcode, vtmp1, src2);
1802 reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1803}
1804
1805void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1806 vextracti128_high(vtmp2, src2);
1807 reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
1808 reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1809}
1810
1811void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1812 vextracti64x4_high(vtmp1, src2);
1813 reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2);
1814 reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1815}
1816
1817void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1818 pmovsxbw(vtmp2, src2);
1819 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1820}
1821
1822void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1823 if (UseAVX > 1) {
1824 int vector_len = Assembler::AVX_256bit;
1825 vpmovsxbw(vtmp1, src2, vector_len);
1826 reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1827 } else {
1828 pmovsxbw(vtmp2, src2);
1829 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1830 pshufd(vtmp2, src2, 0x1);
1831 pmovsxbw(vtmp2, src2);
1832 reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1833 }
1834}
1835
1836void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1837 if (UseAVX > 2 && VM_Version::supports_avx512bw()) {
1838 int vector_len = Assembler::AVX_512bit;
1839 vpmovsxbw(vtmp1, src2, vector_len);
1840 reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1841 } else {
1842 assert(UseAVX >= 2,"Should not reach here.")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1842, "assert(" "UseAVX >= 2" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1843 mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2);
1844 vextracti128_high(vtmp2, src2);
1845 mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1846 }
1847}
1848
1849void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1850 mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2);
1851 vextracti64x4_high(vtmp2, src2);
1852 mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1853}
1854
1855void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1856 if (opcode == Op_AddReductionVI) {
1857 if (vtmp1 != src2) {
1858 movdqu(vtmp1, src2);
1859 }
1860 phaddw(vtmp1, vtmp1);
1861 phaddw(vtmp1, vtmp1);
1862 } else {
1863 pshufd(vtmp2, src2, 0x1);
1864 reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
1865 movdqu(vtmp1, vtmp2);
1866 psrldq(vtmp1, 2);
1867 reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
1868 }
1869 movdl(vtmp2, src1);
1870 pmovsxwd(vtmp1, vtmp1);
1871 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1872 pextrw(dst, vtmp1, 0x0);
1873 movswl(dst, dst);
1874}
1875
1876void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1877 if (opcode == Op_AddReductionVI) {
1878 if (vtmp1 != src2) {
1879 movdqu(vtmp1, src2);
1880 }
1881 phaddw(vtmp1, src2);
1882 } else {
1883 pshufd(vtmp1, src2, 0xE);
1884 reduce_operation_128(T_SHORT, opcode, vtmp1, src2);
1885 }
1886 reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1887}
1888
1889void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1890 if (opcode == Op_AddReductionVI) {
1891 int vector_len = Assembler::AVX_256bit;
1892 vphaddw(vtmp2, src2, src2, vector_len);
1893 vpermq(vtmp2, vtmp2, 0xD8, vector_len);
1894 } else {
1895 vextracti128_high(vtmp2, src2);
1896 reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
1897 }
1898 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1899}
1900
1901void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1902 int vector_len = Assembler::AVX_256bit;
1903 vextracti64x4_high(vtmp1, src2);
1904 reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2);
1905 reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1906}
1907
1908#ifdef _LP641
1909void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1910 pshufd(vtmp2, src2, 0xE);
1911 reduce_operation_128(T_LONG, opcode, vtmp2, src2);
1912 movdq(vtmp1, src1);
1913 reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2);
1914 movdq(dst, vtmp1);
1915}
1916
1917void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1918 vextracti128_high(vtmp1, src2);
1919 reduce_operation_128(T_LONG, opcode, vtmp1, src2);
1920 reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1921}
1922
1923void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1924 vextracti64x4_high(vtmp2, src2);
1925 reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2);
1926 reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1927}
1928
1929void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
1930 assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid")do { if (!(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize
<= 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1930, "assert(" "ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64"
") failed", "invalid"); ::breakpoint(); } } while (0)
;
1931 mov64(temp, -1L);
1932 bzhiq(temp, temp, len);
1933 kmovql(dst, temp);
1934}
1935#endif // _LP64
1936
1937void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1938 reduce_operation_128(T_FLOAT, opcode, dst, src);
1939 pshufd(vtmp, src, 0x1);
1940 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1941}
1942
1943void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1944 reduce2F(opcode, dst, src, vtmp);
1945 pshufd(vtmp, src, 0x2);
1946 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1947 pshufd(vtmp, src, 0x3);
1948 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1949}
1950
1951void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1952 reduce4F(opcode, dst, src, vtmp2);
1953 vextractf128_high(vtmp2, src);
1954 reduce4F(opcode, dst, vtmp2, vtmp1);
1955}
1956
1957void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1958 reduce8F(opcode, dst, src, vtmp1, vtmp2);
1959 vextracti64x4_high(vtmp1, src);
1960 reduce8F(opcode, dst, vtmp1, vtmp1, vtmp2);
1961}
1962
1963void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1964 reduce_operation_128(T_DOUBLE, opcode, dst, src);
1965 pshufd(vtmp, src, 0xE);
1966 reduce_operation_128(T_DOUBLE, opcode, dst, vtmp);
1967}
1968
1969void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1970 reduce2D(opcode, dst, src, vtmp2);
1971 vextractf128_high(vtmp2, src);
1972 reduce2D(opcode, dst, vtmp2, vtmp1);
1973}
1974
1975void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1976 reduce4D(opcode, dst, src, vtmp1, vtmp2);
1977 vextracti64x4_high(vtmp1, src);
1978 reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
1979}
1980
1981void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
1982 MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
1983}
1984
1985void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
1986 MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
1987}
1988
1989
1990void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
1991 XMMRegister dst, XMMRegister src,
1992 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
1993 XMMRegister xmm_0, XMMRegister xmm_1) {
1994 int permconst[] = {1, 14};
1995 XMMRegister wsrc = src;
1996 XMMRegister wdst = xmm_0;
1997 XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
1998
1999 int vlen_enc = Assembler::AVX_128bit;
2000 if (vlen == 16) {
2001 vlen_enc = Assembler::AVX_256bit;
2002 }
2003
2004 for (int i = log2(vlen) - 1; i >=0; i--) {
2005 if (i == 0 && !is_dst_valid) {
2006 wdst = dst;
2007 }
2008 if (i == 3) {
2009 vextracti64x4_high(wtmp, wsrc);
2010 } else if (i == 2) {
2011 vextracti128_high(wtmp, wsrc);
2012 } else { // i = [0,1]
2013 vpermilps(wtmp, wsrc, permconst[i], vlen_enc);
2014 }
2015 vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2016 wsrc = wdst;
2017 vlen_enc = Assembler::AVX_128bit;
2018 }
2019 if (is_dst_valid) {
2020 vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2021 }
2022}
2023
2024void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src,
2025 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
2026 XMMRegister xmm_0, XMMRegister xmm_1) {
2027 XMMRegister wsrc = src;
2028 XMMRegister wdst = xmm_0;
2029 XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
2030 int vlen_enc = Assembler::AVX_128bit;
2031 if (vlen == 8) {
2032 vlen_enc = Assembler::AVX_256bit;
2033 }
2034 for (int i = log2(vlen) - 1; i >=0; i--) {
2035 if (i == 0 && !is_dst_valid) {
2036 wdst = dst;
2037 }
2038 if (i == 1) {
2039 vextracti128_high(wtmp, wsrc);
2040 } else if (i == 2) {
2041 vextracti64x4_high(wtmp, wsrc);
2042 } else {
2043 assert(i == 0, "%d", i)do { if (!(i == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2043, "assert(" "i == 0" ") failed", "%d", i); ::breakpoint
(); } } while (0)
;
2044 vpermilpd(wtmp, wsrc, 1, vlen_enc);
2045 }
2046 vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2047 wsrc = wdst;
2048 vlen_enc = Assembler::AVX_128bit;
2049 }
2050 if (is_dst_valid) {
2051 vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2052 }
2053}
2054
2055void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) {
2056 switch (bt) {
2057 case T_BYTE: pextrb(dst, src, idx); break;
2058 case T_SHORT: pextrw(dst, src, idx); break;
2059 case T_INT: pextrd(dst, src, idx); break;
2060 case T_LONG: pextrq(dst, src, idx); break;
2061
2062 default:
2063 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2063, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2064 break;
2065 }
2066}
2067
2068XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) {
2069 int esize = type2aelembytes(typ);
2070 int elem_per_lane = 16/esize;
2071 int lane = elemindex / elem_per_lane;
2072 int eindex = elemindex % elem_per_lane;
2073
2074 if (lane >= 2) {
2075 assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2075, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
2076 vextractf32x4(dst, src, lane & 3);
2077 return dst;
2078 } else if (lane > 0) {
2079 assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2079, "assert(" "UseAVX > 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
2080 vextractf128(dst, src, lane);
2081 return dst;
2082 } else {
2083 return src;
2084 }
2085}
2086
2087void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) {
2088 int esize = type2aelembytes(typ);
2089 int elem_per_lane = 16/esize;
2090 int eindex = elemindex % elem_per_lane;
2091 assert(is_integral_type(typ),"required")do { if (!(is_integral_type(typ))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2091, "assert(" "is_integral_type(typ)" ") failed", "required"
); ::breakpoint(); } } while (0)
;
2092
2093 if (eindex == 0) {
2094 if (typ == T_LONG) {
2095 movq(dst, src);
2096 } else {
2097 movdl(dst, src);
2098 if (typ == T_BYTE)
2099 movsbl(dst, dst);
2100 else if (typ == T_SHORT)
2101 movswl(dst, dst);
2102 }
2103 } else {
2104 extract(typ, dst, src, eindex);
2105 }
2106}
2107
2108void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) {
2109 int esize = type2aelembytes(typ);
2110 int elem_per_lane = 16/esize;
2111 int eindex = elemindex % elem_per_lane;
2112 assert((typ == T_FLOAT || typ == T_DOUBLE),"required")do { if (!((typ == T_FLOAT || typ == T_DOUBLE))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2112, "assert(" "(typ == T_FLOAT || typ == T_DOUBLE)" ") failed"
, "required"); ::breakpoint(); } } while (0)
;
2113
2114 if (eindex == 0) {
2115 movq(dst, src);
2116 } else {
2117 if (typ == T_FLOAT) {
2118 if (UseAVX == 0) {
2119 movdqu(dst, src);
2120 pshufps(dst, dst, eindex);
2121 } else {
2122 vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
2123 }
2124 } else {
2125 if (UseAVX == 0) {
2126 movdqu(dst, src);
2127 psrldq(dst, eindex*esize);
2128 } else {
2129 vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit);
2130 }
2131 movq(dst, dst);
2132 }
2133 }
2134 // Zero upper bits
2135 if (typ == T_FLOAT) {
2136 if (UseAVX == 0) {
2137 assert((vtmp != xnoreg) && (tmp != noreg), "required.")do { if (!((vtmp != xnoreg) && (tmp != noreg))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2137, "assert(" "(vtmp != xnoreg) && (tmp != noreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2138 movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp);
2139 pand(dst, vtmp);
2140 } else {
2141 assert((tmp != noreg), "required.")do { if (!((tmp != noreg))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2141, "assert(" "(tmp != noreg)" ") failed", "required."); ::
breakpoint(); } } while (0)
;
2142 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp);
2143 }
2144 }
2145}
2146
2147void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len) {
2148 switch(typ) {
2149 case T_BYTE:
2150 case T_BOOLEAN:
2151 evpcmpb(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2152 break;
2153 case T_SHORT:
2154 case T_CHAR:
2155 evpcmpw(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2156 break;
2157 case T_INT:
2158 case T_FLOAT:
2159 evpcmpd(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2160 break;
2161 case T_LONG:
2162 case T_DOUBLE:
2163 evpcmpq(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2164 break;
2165 default:
2166 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2166, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2167 break;
2168 }
2169}
2170
2171void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
2172 switch(typ) {
2173 case T_BOOLEAN:
2174 case T_BYTE:
2175 evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2176 break;
2177 case T_CHAR:
2178 case T_SHORT:
2179 evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2180 break;
2181 case T_INT:
2182 case T_FLOAT:
2183 evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2184 break;
2185 case T_LONG:
2186 case T_DOUBLE:
2187 evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2188 break;
2189 default:
2190 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2190, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2191 break;
2192 }
2193}
2194
2195void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison,
2196 int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) {
2197 int vlen_enc = vector_length_encoding(vlen_in_bytes*2);
2198 switch (typ) {
2199 case T_BYTE:
2200 vpmovzxbw(vtmp1, src1, vlen_enc);
2201 vpmovzxbw(vtmp2, src2, vlen_enc);
2202 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2203 vpacksswb(dst, dst, dst, vlen_enc);
2204 break;
2205 case T_SHORT:
2206 vpmovzxwd(vtmp1, src1, vlen_enc);
2207 vpmovzxwd(vtmp2, src2, vlen_enc);
2208 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2209 vpackssdw(dst, dst, dst, vlen_enc);
2210 break;
2211 case T_INT:
2212 vpmovzxdq(vtmp1, src1, vlen_enc);
2213 vpmovzxdq(vtmp2, src2, vlen_enc);
2214 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2215 vpermilps(dst, dst, 8, vlen_enc);
2216 break;
2217 default:
2218 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2218, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
2219 }
2220 if (vlen_in_bytes == 16) {
2221 vpermpd(dst, dst, 0x8, vlen_enc);
2222 }
2223}
2224
2225void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
2226 XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) {
2227 int vlen_enc = vector_length_encoding(vlen_in_bytes);
2228 switch (typ) {
2229 case T_BYTE:
2230 vpmovzxbw(vtmp1, src1, vlen_enc);
2231 vpmovzxbw(vtmp2, src2, vlen_enc);
2232 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2233 vextracti128(vtmp1, src1, 1);
2234 vextracti128(vtmp2, src2, 1);
2235 vpmovzxbw(vtmp1, vtmp1, vlen_enc);
2236 vpmovzxbw(vtmp2, vtmp2, vlen_enc);
2237 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2238 vpacksswb(dst, dst, vtmp3, vlen_enc);
2239 vpermpd(dst, dst, 0xd8, vlen_enc);
2240 break;
2241 case T_SHORT:
2242 vpmovzxwd(vtmp1, src1, vlen_enc);
2243 vpmovzxwd(vtmp2, src2, vlen_enc);
2244 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2245 vextracti128(vtmp1, src1, 1);
2246 vextracti128(vtmp2, src2, 1);
2247 vpmovzxwd(vtmp1, vtmp1, vlen_enc);
2248 vpmovzxwd(vtmp2, vtmp2, vlen_enc);
2249 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2250 vpackssdw(dst, dst, vtmp3, vlen_enc);
2251 vpermpd(dst, dst, 0xd8, vlen_enc);
2252 break;
2253 case T_INT:
2254 vpmovzxdq(vtmp1, src1, vlen_enc);
2255 vpmovzxdq(vtmp2, src2, vlen_enc);
2256 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2257 vpshufd(dst, dst, 8, vlen_enc);
2258 vpermq(dst, dst, 8, vlen_enc);
2259 vextracti128(vtmp1, src1, 1);
2260 vextracti128(vtmp2, src2, 1);
2261 vpmovzxdq(vtmp1, vtmp1, vlen_enc);
2262 vpmovzxdq(vtmp2, vtmp2, vlen_enc);
2263 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2264 vpshufd(vtmp3, vtmp3, 8, vlen_enc);
2265 vpermq(vtmp3, vtmp3, 0x80, vlen_enc);
2266 vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc);
2267 break;
2268 default:
2269 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2269, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
2270 }
2271}
2272
2273void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
2274 switch(typ) {
2275 case T_BYTE:
2276 evpblendmb(dst, kmask, src1, src2, merge, vector_len);
2277 break;
2278 case T_SHORT:
2279 evpblendmw(dst, kmask, src1, src2, merge, vector_len);
2280 break;
2281 case T_INT:
2282 case T_FLOAT:
2283 evpblendmd(dst, kmask, src1, src2, merge, vector_len);
2284 break;
2285 case T_LONG:
2286 case T_DOUBLE:
2287 evpblendmq(dst, kmask, src1, src2, merge, vector_len);
2288 break;
2289 default:
2290 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2290, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2291 break;
2292 }
2293}
2294
2295void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
2296 XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
2297 switch(vlen) {
2298 case 4:
2299 assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2299, "assert(" "vtmp1 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2300 // Broadcast lower 32 bits to 128 bits before ptest
2301 pshufd(vtmp1, src1, 0x0);
2302 if (bt == BoolTest::overflow) {
2303 assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2303, "assert(" "vtmp2 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2304 pshufd(vtmp2, src2, 0x0);
2305 } else {
2306 assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2306, "assert(" "vtmp2 == xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2307 vtmp2 = src2;
2308 }
2309 ptest(vtmp1, vtmp2);
2310 break;
2311 case 8:
2312 assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2312, "assert(" "vtmp1 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2313 // Broadcast lower 64 bits to 128 bits before ptest
2314 pshufd(vtmp1, src1, 0x4);
2315 if (bt == BoolTest::overflow) {
2316 assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2316, "assert(" "vtmp2 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2317 pshufd(vtmp2, src2, 0x4);
2318 } else {
2319 assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2319, "assert(" "vtmp2 == xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2320 vtmp2 = src2;
2321 }
2322 ptest(vtmp1, vtmp2);
2323 break;
2324 case 16:
2325 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2325, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2326 ptest(src1, src2);
2327 break;
2328 case 32:
2329 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2329, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2330 vptest(src1, src2, Assembler::AVX_256bit);
2331 break;
2332 case 64:
2333 {
2334 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2334, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2335 evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
2336 if (bt == BoolTest::ne) {
2337 ktestql(mask, mask);
2338 } else {
2339 assert(bt == BoolTest::overflow, "required")do { if (!(bt == BoolTest::overflow)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2339, "assert(" "bt == BoolTest::overflow" ") failed", "required"
); ::breakpoint(); } } while (0)
;
2340 kortestql(mask, mask);
2341 }
2342 }
2343 break;
2344 default:
2345 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2345, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2346 break;
2347 }
2348}
2349
2350//-------------------------------------------------------------------------------------------
2351
2352// IndexOf for constant substrings with size >= 8 chars
2353// which don't need to be loaded through stack.
2354void C2_MacroAssembler::string_indexofC8(Register str1, Register str2,
2355 Register cnt1, Register cnt2,
2356 int int_cnt2, Register result,
2357 XMMRegister vec, Register tmp,
2358 int ae) {
2359 ShortBranchVerifier sbv(this);
2360 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2360, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2361 assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2361, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding"
); ::breakpoint(); } } while (0)
;
2362
2363 // This method uses the pcmpestri instruction with bound registers
2364 // inputs:
2365 // xmm - substring
2366 // rax - substring length (elements count)
2367 // mem - scanned string
2368 // rdx - string length (elements count)
2369 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
2370 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
2371 // outputs:
2372 // rcx - matched index in string
2373 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2373, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2374 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
2375 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
2376 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
2377 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
2378
2379 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
2380 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
2381 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
2382
2383 // Note, inline_string_indexOf() generates checks:
2384 // if (substr.count > string.count) return -1;
2385 // if (substr.count == 0) return 0;
2386 assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars")do { if (!(int_cnt2 >= stride)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2386, "assert(" "int_cnt2 >= stride" ") failed", "this code is used only for cnt2 >= 8 chars"
); ::breakpoint(); } } while (0)
;
2387
2388 // Load substring.
2389 if (ae == StrIntrinsicNode::UL) {
2390 pmovzxbw(vec, Address(str2, 0));
2391 } else {
2392 movdqu(vec, Address(str2, 0));
2393 }
2394 movl(cnt2, int_cnt2);
2395 movptr(result, str1); // string addr
2396
2397 if (int_cnt2 > stride) {
2398 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2398)
;
2399
2400 // Reload substr for rescan, this code
2401 // is executed only for large substrings (> 8 chars)
2402 bind(RELOAD_SUBSTR);
2403 if (ae == StrIntrinsicNode::UL) {
2404 pmovzxbw(vec, Address(str2, 0));
2405 } else {
2406 movdqu(vec, Address(str2, 0));
2407 }
2408 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
2409
2410 bind(RELOAD_STR);
2411 // We came here after the beginning of the substring was
2412 // matched but the rest of it was not so we need to search
2413 // again. Start from the next element after the previous match.
2414
2415 // cnt2 is number of substring reminding elements and
2416 // cnt1 is number of string reminding elements when cmp failed.
2417 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
2418 subl(cnt1, cnt2);
2419 addl(cnt1, int_cnt2);
2420 movl(cnt2, int_cnt2); // Now restore cnt2
2421
2422 decrementl(cnt1); // Shift to next element
2423 cmpl(cnt1, cnt2);
2424 jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring
2425
2426 addptr(result, (1<<scale1));
2427
2428 } // (int_cnt2 > 8)
2429
2430 // Scan string for start of substr in 16-byte vectors
2431 bind(SCAN_TO_SUBSTR);
2432 pcmpestri(vec, Address(result, 0), mode);
2433 jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2433)
; // CF == 1
2434 subl(cnt1, stride);
2435 jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2435)
; // Scanned full string
2436 cmpl(cnt1, cnt2);
2437 jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2437)
; // Left less then substring
2438 addptr(result, 16);
2439 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2439)
;
2440
2441 // Found a potential substr
2442 bind(FOUND_CANDIDATE);
2443 // Matched whole vector if first element matched (tmp(rcx) == 0).
2444 if (int_cnt2 == stride) {
2445 jccb(Assembler::overflow, RET_FOUND)jccb_0(Assembler::overflow, RET_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2445)
; // OF == 1
2446 } else { // int_cnt2 > 8
2447 jccb(Assembler::overflow, FOUND_SUBSTR)jccb_0(Assembler::overflow, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2447)
;
2448 }
2449 // After pcmpestri tmp(rcx) contains matched element index
2450 // Compute start addr of substr
2451 lea(result, Address(result, tmp, scale1));
2452
2453 // Make sure string is still long enough
2454 subl(cnt1, tmp);
2455 cmpl(cnt1, cnt2);
2456 if (int_cnt2 == stride) {
2457 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2457)
;
2458 } else { // int_cnt2 > 8
2459 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD)jccb_0(Assembler::greaterEqual, MATCH_SUBSTR_HEAD, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2459)
;
2460 }
2461 // Left less then substring.
2462
2463 bind(RET_NOT_FOUND);
2464 movl(result, -1);
2465 jmp(EXIT);
2466
2467 if (int_cnt2 > stride) {
2468 // This code is optimized for the case when whole substring
2469 // is matched if its head is matched.
2470 bind(MATCH_SUBSTR_HEAD);
2471 pcmpestri(vec, Address(result, 0), mode);
2472 // Reload only string if does not match
2473 jcc(Assembler::noOverflow, RELOAD_STR); // OF == 0
2474
2475 Label CONT_SCAN_SUBSTR;
2476 // Compare the rest of substring (> 8 chars).
2477 bind(FOUND_SUBSTR);
2478 // First 8 chars are already matched.
2479 negptr(cnt2);
2480 addptr(cnt2, stride);
2481
2482 bind(SCAN_SUBSTR);
2483 subl(cnt1, stride);
2484 cmpl(cnt2, -stride); // Do not read beyond substring
2485 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::lessEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2485)
;
2486 // Back-up strings to avoid reading beyond substring:
2487 // cnt1 = cnt1 - cnt2 + 8
2488 addl(cnt1, cnt2); // cnt2 is negative
2489 addl(cnt1, stride);
2490 movl(cnt2, stride); negptr(cnt2);
2491 bind(CONT_SCAN_SUBSTR);
2492 if (int_cnt2 < (int)G) {
2493 int tail_off1 = int_cnt2<<scale1;
2494 int tail_off2 = int_cnt2<<scale2;
2495 if (ae == StrIntrinsicNode::UL) {
2496 pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
2497 } else {
2498 movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
2499 }
2500 pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
2501 } else {
2502 // calculate index in register to avoid integer overflow (int_cnt2*2)
2503 movl(tmp, int_cnt2);
2504 addptr(tmp, cnt2);
2505 if (ae == StrIntrinsicNode::UL) {
2506 pmovzxbw(vec, Address(str2, tmp, scale2, 0));
2507 } else {
2508 movdqu(vec, Address(str2, tmp, scale2, 0));
2509 }
2510 pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
2511 }
2512 // Need to reload strings pointers if not matched whole vector
2513 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
2514 addptr(cnt2, stride);
2515 jcc(Assembler::negative, SCAN_SUBSTR);
2516 // Fall through if found full substring
2517
2518 } // (int_cnt2 > 8)
2519
2520 bind(RET_FOUND);
2521 // Found result if we matched full small substring.
2522 // Compute substr offset
2523 subptr(result, str1);
2524 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2525 shrl(result, 1); // index
2526 }
2527 bind(EXIT);
2528
2529} // string_indexofC8
2530
2531// Small strings are loaded through stack if they cross page boundary.
2532void C2_MacroAssembler::string_indexof(Register str1, Register str2,
2533 Register cnt1, Register cnt2,
2534 int int_cnt2, Register result,
2535 XMMRegister vec, Register tmp,
2536 int ae) {
2537 ShortBranchVerifier sbv(this);
2538 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2538, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2539 assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2539, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding"
); ::breakpoint(); } } while (0)
;
2540
2541 //
2542 // int_cnt2 is length of small (< 8 chars) constant substring
2543 // or (-1) for non constant substring in which case its length
2544 // is in cnt2 register.
2545 //
2546 // Note, inline_string_indexOf() generates checks:
2547 // if (substr.count > string.count) return -1;
2548 // if (substr.count == 0) return 0;
2549 //
2550 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
2551 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0")do { if (!(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2
< stride))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2551, "assert(" "int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride)"
") failed", "should be != 0"); ::breakpoint(); } } while (0)
;
2552 // This method uses the pcmpestri instruction with bound registers
2553 // inputs:
2554 // xmm - substring
2555 // rax - substring length (elements count)
2556 // mem - scanned string
2557 // rdx - string length (elements count)
2558 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
2559 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
2560 // outputs:
2561 // rcx - matched index in string
2562 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2562, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2563 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
2564 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
2565 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
2566
2567 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
2568 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
2569 FOUND_CANDIDATE;
2570
2571 { //========================================================
2572 // We don't know where these strings are located
2573 // and we can't read beyond them. Load them through stack.
2574 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
2575
2576 movptr(tmp, rsp); // save old SP
2577
2578 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
2579 if (int_cnt2 == (1>>scale2)) { // One byte
2580 assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding")do { if (!((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode
::UL))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2580, "assert(" "(ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL)"
") failed", "Only possible for latin1 encoding"); ::breakpoint
(); } } while (0)
;
2581 load_unsigned_byte(result, Address(str2, 0));
2582 movdl(vec, result); // move 32 bits
2583 } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes
2584 // Not enough header space in 32-bit VM: 12+3 = 15.
2585 movl(result, Address(str2, -1));
2586 shrl(result, 8);
2587 movdl(vec, result); // move 32 bits
2588 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char
2589 load_unsigned_short(result, Address(str2, 0));
2590 movdl(vec, result); // move 32 bits
2591 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
2592 movdl(vec, Address(str2, 0)); // move 32 bits
2593 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
2594 movq(vec, Address(str2, 0)); // move 64 bits
2595 } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
2596 // Array header size is 12 bytes in 32-bit VM
2597 // + 6 bytes for 3 chars == 18 bytes,
2598 // enough space to load vec and shift.
2599 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity")do { if (!(HeapWordSize*TypeArrayKlass::header_size() >= 12
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2599, "assert(" "HeapWordSize*TypeArrayKlass::header_size() >= 12"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2600 if (ae == StrIntrinsicNode::UL) {
2601 int tail_off = int_cnt2-8;
2602 pmovzxbw(vec, Address(str2, tail_off));
2603 psrldq(vec, -2*tail_off);
2604 }
2605 else {
2606 int tail_off = int_cnt2*(1<<scale2);
2607 movdqu(vec, Address(str2, tail_off-16));
2608 psrldq(vec, 16-tail_off);
2609 }
2610 }
2611 } else { // not constant substring
2612 cmpl(cnt2, stride);
2613 jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2613)
; // Both strings are big enough
2614
2615 // We can read beyond string if srt+16 does not cross page boundary
2616 // since heaps are aligned and mapped by pages.
2617 assert(os::vm_page_size() < (int)G, "default page should be small")do { if (!(os::vm_page_size() < (int)G)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2617, "assert(" "os::vm_page_size() < (int)G" ") failed"
, "default page should be small"); ::breakpoint(); } } while (
0)
;
2618 movl(result, str2); // We need only low 32 bits
2619 andl(result, (os::vm_page_size()-1));
2620 cmpl(result, (os::vm_page_size()-16));
2621 jccb(Assembler::belowEqual, CHECK_STR)jccb_0(Assembler::belowEqual, CHECK_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2621)
;
2622
2623 // Move small strings to stack to allow load 16 bytes into vec.
2624 subptr(rsp, 16);
2625 int stk_offset = wordSize-(1<<scale2);
2626 push(cnt2);
2627
2628 bind(COPY_SUBSTR);
2629 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
2630 load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
2631 movb(Address(rsp, cnt2, scale2, stk_offset), result);
2632 } else if (ae == StrIntrinsicNode::UU) {
2633 load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
2634 movw(Address(rsp, cnt2, scale2, stk_offset), result);
2635 }
2636 decrement(cnt2);
2637 jccb(Assembler::notZero, COPY_SUBSTR)jccb_0(Assembler::notZero, COPY_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2637)
;
2638
2639 pop(cnt2);
2640 movptr(str2, rsp); // New substring address
2641 } // non constant
2642
2643 bind(CHECK_STR);
2644 cmpl(cnt1, stride);
2645 jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2645)
;
2646
2647 // Check cross page boundary.
2648 movl(result, str1); // We need only low 32 bits
2649 andl(result, (os::vm_page_size()-1));
2650 cmpl(result, (os::vm_page_size()-16));
2651 jccb(Assembler::belowEqual, BIG_STRINGS)jccb_0(Assembler::belowEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2651)
;
2652
2653 subptr(rsp, 16);
2654 int stk_offset = -(1<<scale1);
2655 if (int_cnt2 < 0) { // not constant
2656 push(cnt2);
2657 stk_offset += wordSize;
2658 }
2659 movl(cnt2, cnt1);
2660
2661 bind(COPY_STR);
2662 if (ae == StrIntrinsicNode::LL) {
2663 load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
2664 movb(Address(rsp, cnt2, scale1, stk_offset), result);
2665 } else {
2666 load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
2667 movw(Address(rsp, cnt2, scale1, stk_offset), result);
2668 }
2669 decrement(cnt2);
2670 jccb(Assembler::notZero, COPY_STR)jccb_0(Assembler::notZero, COPY_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2670)
;
2671
2672 if (int_cnt2 < 0) { // not constant
2673 pop(cnt2);
2674 }
2675 movptr(str1, rsp); // New string address
2676
2677 bind(BIG_STRINGS);
2678 // Load substring.
2679 if (int_cnt2 < 0) { // -1
2680 if (ae == StrIntrinsicNode::UL) {
2681 pmovzxbw(vec, Address(str2, 0));
2682 } else {
2683 movdqu(vec, Address(str2, 0));
2684 }
2685 push(cnt2); // substr count
2686 push(str2); // substr addr
2687 push(str1); // string addr
2688 } else {
2689 // Small (< 8 chars) constant substrings are loaded already.
2690 movl(cnt2, int_cnt2);
2691 }
2692 push(tmp); // original SP
2693
2694 } // Finished loading
2695
2696 //========================================================
2697 // Start search
2698 //
2699
2700 movptr(result, str1); // string addr
2701
2702 if (int_cnt2 < 0) { // Only for non constant substring
2703 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2703)
;
2704
2705 // SP saved at sp+0
2706 // String saved at sp+1*wordSize
2707 // Substr saved at sp+2*wordSize
2708 // Substr count saved at sp+3*wordSize
2709
2710 // Reload substr for rescan, this code
2711 // is executed only for large substrings (> 8 chars)
2712 bind(RELOAD_SUBSTR);
2713 movptr(str2, Address(rsp, 2*wordSize));
2714 movl(cnt2, Address(rsp, 3*wordSize));
2715 if (ae == StrIntrinsicNode::UL) {
2716 pmovzxbw(vec, Address(str2, 0));
2717 } else {
2718 movdqu(vec, Address(str2, 0));
2719 }
2720 // We came here after the beginning of the substring was
2721 // matched but the rest of it was not so we need to search
2722 // again. Start from the next element after the previous match.
2723 subptr(str1, result); // Restore counter
2724 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2725 shrl(str1, 1);
2726 }
2727 addl(cnt1, str1);
2728 decrementl(cnt1); // Shift to next element
2729 cmpl(cnt1, cnt2);
2730 jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring
2731
2732 addptr(result, (1<<scale1));
2733 } // non constant
2734
2735 // Scan string for start of substr in 16-byte vectors
2736 bind(SCAN_TO_SUBSTR);
2737 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2737, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2738 pcmpestri(vec, Address(result, 0), mode);
2739 jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2739)
; // CF == 1
2740 subl(cnt1, stride);
2741 jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2741)
; // Scanned full string
2742 cmpl(cnt1, cnt2);
2743 jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2743)
; // Left less then substring
2744 addptr(result, 16);
2745
2746 bind(ADJUST_STR);
2747 cmpl(cnt1, stride); // Do not read beyond string
2748 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2748)
;
2749 // Back-up string to avoid reading beyond string.
2750 lea(result, Address(result, cnt1, scale1, -16));
2751 movl(cnt1, stride);
2752 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2752)
;
2753
2754 // Found a potential substr
2755 bind(FOUND_CANDIDATE);
2756 // After pcmpestri tmp(rcx) contains matched element index
2757
2758 // Make sure string is still long enough
2759 subl(cnt1, tmp);
2760 cmpl(cnt1, cnt2);
2761 jccb(Assembler::greaterEqual, FOUND_SUBSTR)jccb_0(Assembler::greaterEqual, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2761)
;
2762 // Left less then substring.
2763
2764 bind(RET_NOT_FOUND);
2765 movl(result, -1);
2766 jmp(CLEANUP);
2767
2768 bind(FOUND_SUBSTR);
2769 // Compute start addr of substr
2770 lea(result, Address(result, tmp, scale1));
2771 if (int_cnt2 > 0) { // Constant substring
2772 // Repeat search for small substring (< 8 chars)
2773 // from new point without reloading substring.
2774 // Have to check that we don't read beyond string.
2775 cmpl(tmp, stride-int_cnt2);
2776 jccb(Assembler::greater, ADJUST_STR)jccb_0(Assembler::greater, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2776)
;
2777 // Fall through if matched whole substring.
2778 } else { // non constant
2779 assert(int_cnt2 == -1, "should be != 0")do { if (!(int_cnt2 == -1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2779, "assert(" "int_cnt2 == -1" ") failed", "should be != 0"
); ::breakpoint(); } } while (0)
;
2780
2781 addl(tmp, cnt2);
2782 // Found result if we matched whole substring.
2783 cmpl(tmp, stride);
2784 jcc(Assembler::lessEqual, RET_FOUND);
2785
2786 // Repeat search for small substring (<= 8 chars)
2787 // from new point 'str1' without reloading substring.
2788 cmpl(cnt2, stride);
2789 // Have to check that we don't read beyond string.
2790 jccb(Assembler::lessEqual, ADJUST_STR)jccb_0(Assembler::lessEqual, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2790)
;
2791
2792 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
2793 // Compare the rest of substring (> 8 chars).
2794 movptr(str1, result);
2795
2796 cmpl(tmp, cnt2);
2797 // First 8 chars are already matched.
2798 jccb(Assembler::equal, CHECK_NEXT)jccb_0(Assembler::equal, CHECK_NEXT, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2798)
;
2799
2800 bind(SCAN_SUBSTR);
2801 pcmpestri(vec, Address(str1, 0), mode);
2802 // Need to reload strings pointers if not matched whole vector
2803 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
2804
2805 bind(CHECK_NEXT);
2806 subl(cnt2, stride);
2807 jccb(Assembler::lessEqual, RET_FOUND_LONG)jccb_0(Assembler::lessEqual, RET_FOUND_LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2807)
; // Found full substring
2808 addptr(str1, 16);
2809 if (ae == StrIntrinsicNode::UL) {
2810 addptr(str2, 8);
2811 } else {
2812 addptr(str2, 16);
2813 }
2814 subl(cnt1, stride);
2815 cmpl(cnt2, stride); // Do not read beyond substring
2816 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::greaterEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2816)
;
2817 // Back-up strings to avoid reading beyond substring.
2818
2819 if (ae == StrIntrinsicNode::UL) {
2820 lea(str2, Address(str2, cnt2, scale2, -8));
2821 lea(str1, Address(str1, cnt2, scale1, -16));
2822 } else {
2823 lea(str2, Address(str2, cnt2, scale2, -16));
2824 lea(str1, Address(str1, cnt2, scale1, -16));
2825 }
2826 subl(cnt1, cnt2);
2827 movl(cnt2, stride);
2828 addl(cnt1, stride);
2829 bind(CONT_SCAN_SUBSTR);
2830 if (ae == StrIntrinsicNode::UL) {
2831 pmovzxbw(vec, Address(str2, 0));
2832 } else {
2833 movdqu(vec, Address(str2, 0));
2834 }
2835 jmp(SCAN_SUBSTR);
2836
2837 bind(RET_FOUND_LONG);
2838 movptr(str1, Address(rsp, wordSize));
2839 } // non constant
2840
2841 bind(RET_FOUND);
2842 // Compute substr offset
2843 subptr(result, str1);
2844 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2845 shrl(result, 1); // index
2846 }
2847 bind(CLEANUP);
2848 pop(rsp); // restore SP
2849
2850} // string_indexof
2851
2852void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
2853 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
2854 ShortBranchVerifier sbv(this);
2855 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2855, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2856
2857 int stride = 8;
2858
2859 Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
2860 SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
2861 RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
2862 FOUND_SEQ_CHAR, DONE_LABEL;
2863
2864 movptr(result, str1);
2865 if (UseAVX >= 2) {
2866 cmpl(cnt1, stride);
2867 jcc(Assembler::less, SCAN_TO_CHAR);
2868 cmpl(cnt1, 2*stride);
2869 jcc(Assembler::less, SCAN_TO_8_CHAR_INIT);
2870 movdl(vec1, ch);
2871 vpbroadcastw(vec1, vec1, Assembler::AVX_256bit);
2872 vpxor(vec2, vec2);
2873 movl(tmp, cnt1);
2874 andl(tmp, 0xFFFFFFF0); //vector count (in chars)
2875 andl(cnt1,0x0000000F); //tail count (in chars)
2876
2877 bind(SCAN_TO_16_CHAR_LOOP);
2878 vmovdqu(vec3, Address(result, 0));
2879 vpcmpeqw(vec3, vec3, vec1, 1);
2880 vptest(vec2, vec3);
2881 jcc(Assembler::carryClear, FOUND_CHAR);
2882 addptr(result, 32);
2883 subl(tmp, 2*stride);
2884 jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
2885 jmp(SCAN_TO_8_CHAR);
2886 bind(SCAN_TO_8_CHAR_INIT);
2887 movdl(vec1, ch);
2888 pshuflw(vec1, vec1, 0x00);
2889 pshufd(vec1, vec1, 0);
2890 pxor(vec2, vec2);
2891 }
2892 bind(SCAN_TO_8_CHAR);
2893 cmpl(cnt1, stride);
2894 jcc(Assembler::less, SCAN_TO_CHAR);
2895 if (UseAVX < 2) {
2896 movdl(vec1, ch);
2897 pshuflw(vec1, vec1, 0x00);
2898 pshufd(vec1, vec1, 0);
2899 pxor(vec2, vec2);
2900 }
2901 movl(tmp, cnt1);
2902 andl(tmp, 0xFFFFFFF8); //vector count (in chars)
2903 andl(cnt1,0x00000007); //tail count (in chars)
2904
2905 bind(SCAN_TO_8_CHAR_LOOP);
2906 movdqu(vec3, Address(result, 0));
2907 pcmpeqw(vec3, vec1);
2908 ptest(vec2, vec3);
2909 jcc(Assembler::carryClear, FOUND_CHAR);
2910 addptr(result, 16);
2911 subl(tmp, stride);
2912 jcc(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
2913 bind(SCAN_TO_CHAR);
2914 testl(cnt1, cnt1);
2915 jcc(Assembler::zero, RET_NOT_FOUND);
2916 bind(SCAN_TO_CHAR_LOOP);
2917 load_unsigned_short(tmp, Address(result, 0));
2918 cmpl(ch, tmp);
2919 jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2919)
;
2920 addptr(result, 2);
2921 subl(cnt1, 1);
2922 jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2922)
;
2923 jmp(SCAN_TO_CHAR_LOOP);
2924
2925 bind(RET_NOT_FOUND);
2926 movl(result, -1);
2927 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2927)
;
2928
2929 bind(FOUND_CHAR);
2930 if (UseAVX >= 2) {
2931 vpmovmskb(tmp, vec3);
2932 } else {
2933 pmovmskb(tmp, vec3);
2934 }
2935 bsfl(ch, tmp);
2936 addptr(result, ch);
2937
2938 bind(FOUND_SEQ_CHAR);
2939 subptr(result, str1);
2940 shrl(result, 1);
2941
2942 bind(DONE_LABEL);
2943} // string_indexof_char
2944
2945void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
2946 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
2947 ShortBranchVerifier sbv(this);
2948 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2948, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2949
2950 int stride = 16;
2951
2952 Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
2953 SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
2954 RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
2955 FOUND_SEQ_CHAR, DONE_LABEL;
2956
2957 movptr(result, str1);
2958 if (UseAVX >= 2) {
2959 cmpl(cnt1, stride);
2960 jcc(Assembler::less, SCAN_TO_CHAR_INIT);
2961 cmpl(cnt1, stride*2);
2962 jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
2963 movdl(vec1, ch);
2964 vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
2965 vpxor(vec2, vec2);
2966 movl(tmp, cnt1);
2967 andl(tmp, 0xFFFFFFE0); //vector count (in chars)
2968 andl(cnt1,0x0000001F); //tail count (in chars)
2969
2970 bind(SCAN_TO_32_CHAR_LOOP);
2971 vmovdqu(vec3, Address(result, 0));
2972 vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
2973 vptest(vec2, vec3);
2974 jcc(Assembler::carryClear, FOUND_CHAR);
2975 addptr(result, 32);
2976 subl(tmp, stride*2);
2977 jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
2978 jmp(SCAN_TO_16_CHAR);
2979
2980 bind(SCAN_TO_16_CHAR_INIT);
2981 movdl(vec1, ch);
2982 pxor(vec2, vec2);
2983 pshufb(vec1, vec2);
2984 }
2985
2986 bind(SCAN_TO_16_CHAR);
2987 cmpl(cnt1, stride);
2988 jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
2989 if (UseAVX < 2) {
2990 movdl(vec1, ch);
2991 pxor(vec2, vec2);
2992 pshufb(vec1, vec2);
2993 }
2994 movl(tmp, cnt1);
2995 andl(tmp, 0xFFFFFFF0); //vector count (in bytes)
2996 andl(cnt1,0x0000000F); //tail count (in bytes)
2997
2998 bind(SCAN_TO_16_CHAR_LOOP);
2999 movdqu(vec3, Address(result, 0));
3000 pcmpeqb(vec3, vec1);
3001 ptest(vec2, vec3);
3002 jcc(Assembler::carryClear, FOUND_CHAR);
3003 addptr(result, 16);
3004 subl(tmp, stride);
3005 jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
3006
3007 bind(SCAN_TO_CHAR_INIT);
3008 testl(cnt1, cnt1);
3009 jcc(Assembler::zero, RET_NOT_FOUND);
3010 bind(SCAN_TO_CHAR_LOOP);
3011 load_unsigned_byte(tmp, Address(result, 0));
3012 cmpl(ch, tmp);
3013 jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3013)
;
3014 addptr(result, 1);
3015 subl(cnt1, 1);
3016 jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3016)
;
3017 jmp(SCAN_TO_CHAR_LOOP);
3018
3019 bind(RET_NOT_FOUND);
3020 movl(result, -1);
3021 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3021)
;
3022
3023 bind(FOUND_CHAR);
3024 if (UseAVX >= 2) {
3025 vpmovmskb(tmp, vec3);
3026 } else {
3027 pmovmskb(tmp, vec3);
3028 }
3029 bsfl(ch, tmp);
3030 addptr(result, ch);
3031
3032 bind(FOUND_SEQ_CHAR);
3033 subptr(result, str1);
3034
3035 bind(DONE_LABEL);
3036} // stringL_indexof_char
3037
3038// helper function for string_compare
3039void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
3040 Address::ScaleFactor scale, Address::ScaleFactor scale1,
3041 Address::ScaleFactor scale2, Register index, int ae) {
3042 if (ae == StrIntrinsicNode::LL) {
3043 load_unsigned_byte(elem1, Address(str1, index, scale, 0));
3044 load_unsigned_byte(elem2, Address(str2, index, scale, 0));
3045 } else if (ae == StrIntrinsicNode::UU) {
3046 load_unsigned_short(elem1, Address(str1, index, scale, 0));
3047 load_unsigned_short(elem2, Address(str2, index, scale, 0));
3048 } else {
3049 load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
3050 load_unsigned_short(elem2, Address(str2, index, scale2, 0));
3051 }
3052}
3053
3054// Compare strings, used for char[] and byte[].
3055void C2_MacroAssembler::string_compare(Register str1, Register str2,
3056 Register cnt1, Register cnt2, Register result,
3057 XMMRegister vec1, int ae, KRegister mask) {
3058 ShortBranchVerifier sbv(this);
3059 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
3060 Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
3061 int stride, stride2, adr_stride, adr_stride1, adr_stride2;
3062 int stride2x2 = 0x40;
3063 Address::ScaleFactor scale = Address::no_scale;
3064 Address::ScaleFactor scale1 = Address::no_scale;
3065 Address::ScaleFactor scale2 = Address::no_scale;
3066
3067 if (ae != StrIntrinsicNode::LL) {
1
Assuming 'ae' is equal to LL
2
Taking false branch
3068 stride2x2 = 0x20;
3069 }
3070
3071 if (ae
2.1
'ae' is not equal to LU
2.1
'ae' is not equal to LU
== StrIntrinsicNode::LU || ae
2.2
'ae' is not equal to UL
2.2
'ae' is not equal to UL
== StrIntrinsicNode::UL) {
3
Taking false branch
3072 shrl(cnt2, 1);
3073 }
3074 // Compute the minimum of the string lengths and the
3075 // difference of the string lengths (stack).
3076 // Do the conditional move stuff
3077 movl(result, cnt1);
3078 subl(cnt1, cnt2);
3079 push(cnt1);
3080 cmov32(Assembler::lessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2)
3081
3082 // Is the minimum length zero?
3083 testl(cnt2, cnt2);
3084 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3085 if (ae
3.1
'ae' is equal to LL
3.1
'ae' is equal to LL
== StrIntrinsicNode::LL) {
4
Taking true branch
3086 // Load first bytes
3087 load_unsigned_byte(result, Address(str1, 0)); // result = str1[0]
3088 load_unsigned_byte(cnt1, Address(str2, 0)); // cnt1 = str2[0]
3089 } else if (ae == StrIntrinsicNode::UU) {
3090 // Load first characters
3091 load_unsigned_short(result, Address(str1, 0));
3092 load_unsigned_short(cnt1, Address(str2, 0));
3093 } else {
3094 load_unsigned_byte(result, Address(str1, 0));
3095 load_unsigned_short(cnt1, Address(str2, 0));
3096 }
3097 subl(result, cnt1);
3098 jcc(Assembler::notZero, POP_LABEL);
3099
3100 if (ae
4.1
'ae' is not equal to UU
4.1
'ae' is not equal to UU
== StrIntrinsicNode::UU) {
5
Taking false branch
3101 // Divide length by 2 to get number of chars
3102 shrl(cnt2, 1);
3103 }
3104 cmpl(cnt2, 1);
3105 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3106
3107 // Check if the strings start at the same location and setup scale and stride
3108 if (ae
5.1
'ae' is equal to LL
5.1
'ae' is equal to LL
== StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3109 cmpptr(str1, str2);
3110 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3111 if (ae
5.2
'ae' is equal to LL
5.2
'ae' is equal to LL
== StrIntrinsicNode::LL) {
6
Taking true branch
3112 scale = Address::times_1;
3113 stride = 16;
3114 } else {
3115 scale = Address::times_2;
3116 stride = 8;
3117 }
3118 } else {
3119 scale1 = Address::times_1;
3120 scale2 = Address::times_2;
3121 // scale not used
3122 stride = 8;
3123 }
3124
3125 if (UseAVX >= 2 && UseSSE42Intrinsics) {
7
Assuming 'UseAVX' is >= 2
8
Assuming 'UseSSE42Intrinsics' is true
9
Taking true branch
3126 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
3127 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
3128 Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
3129 Label COMPARE_TAIL_LONG;
3130 Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
3131
3132 int pcmpmask = 0x19;
3133 if (ae
9.1
'ae' is equal to LL
9.1
'ae' is equal to LL
== StrIntrinsicNode::LL) {
10
Taking true branch
3134 pcmpmask &= ~0x01;
3135 }
3136
3137 // Setup to compare 16-chars (32-bytes) vectors,
3138 // start from first character again because it has aligned address.
3139 if (ae
10.1
'ae' is equal to LL
10.1
'ae' is equal to LL
== StrIntrinsicNode::LL) {
11
Taking true branch
3140 stride2 = 32;
3141 } else {
3142 stride2 = 16;
3143 }
3144 if (ae
11.1
'ae' is equal to LL
11.1
'ae' is equal to LL
== StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3145 adr_stride = stride << scale;
3146 } else {
3147 adr_stride1 = 8; //stride << scale1;
3148 adr_stride2 = 16; //stride << scale2;
3149 }
3150
3151 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1
== rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3151, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
12
Assuming 'result' is equal to 'rax'
13
Assuming 'cnt2' is equal to 'rdx'
14
Assuming 'cnt1' is equal to 'rcx'
15
Taking false branch
16
Loop condition is false. Exiting loop
3152 // rax and rdx are used by pcmpestri as elements counters
3153 movl(result, cnt2);
3154 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
3155 jcc(Assembler::zero, COMPARE_TAIL_LONG);
3156
3157 // fast path : compare first 2 8-char vectors.
3158 bind(COMPARE_16_CHARS);
3159 if (ae
16.1
'ae' is equal to LL
16.1
'ae' is equal to LL
== StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3160 movdqu(vec1, Address(str1, 0));
3161 } else {
3162 pmovzxbw(vec1, Address(str1, 0));
3163 }
3164 pcmpestri(vec1, Address(str2, 0), pcmpmask);
3165 jccb(Assembler::below, COMPARE_INDEX_CHAR)jccb_0(Assembler::below, COMPARE_INDEX_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3165)
;
3166
3167 if (ae
16.2
'ae' is equal to LL
16.2
'ae' is equal to LL
== StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3168 movdqu(vec1, Address(str1, adr_stride));
3169 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
3170 } else {
3171 pmovzxbw(vec1, Address(str1, adr_stride1));
3172 pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
3173 }
3174 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS)jccb_0(Assembler::aboveEqual, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3174)
;
3175 addl(cnt1, stride);
3176
3177 // Compare the characters at index in cnt1
3178 bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
3179 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
3180 subl(result, cnt2);
3181 jmp(POP_LABEL);
3182
3183 // Setup the registers to start vector comparison loop
3184 bind(COMPARE_WIDE_VECTORS);
3185 if (ae
16.3
'ae' is equal to LL
16.3
'ae' is equal to LL
== StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3186 lea(str1, Address(str1, result, scale));
17
Passing null pointer value via 2nd parameter 'index'
18
Calling constructor for 'Address'
3187 lea(str2, Address(str2, result, scale));
3188 } else {
3189 lea(str1, Address(str1, result, scale1));
3190 lea(str2, Address(str2, result, scale2));
3191 }
3192 subl(result, stride2);
3193 subl(cnt2, stride2);
3194 jcc(Assembler::zero, COMPARE_WIDE_TAIL);
3195 negptr(result);
3196
3197 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
3198 bind(COMPARE_WIDE_VECTORS_LOOP);
3199
3200#ifdef _LP641
3201 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
3202 cmpl(cnt2, stride2x2);
3203 jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3203)
;
3204 testl(cnt2, stride2x2-1); // cnt2 holds the vector count
3205 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3205)
; // means we cannot subtract by 0x40
3206
3207 bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
3208 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3209 evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
3210 evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
3211 } else {
3212 vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
3213 evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
3214 }
3215 kortestql(mask, mask);
3216 jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
3217 addptr(result, stride2x2); // update since we already compared at this addr
3218 subl(cnt2, stride2x2); // and sub the size too
3219 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3219)
;
3220
3221 vpxor(vec1, vec1);
3222 jmpb(COMPARE_WIDE_TAIL)jmpb_0(COMPARE_WIDE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3222)
;
3223 }//if (VM_Version::supports_avx512vlbw())
3224#endif // _LP64
3225
3226
3227 bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3228 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3229 vmovdqu(vec1, Address(str1, result, scale));
3230 vpxor(vec1, Address(str2, result, scale));
3231 } else {
3232 vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
3233 vpxor(vec1, Address(str2, result, scale2));
3234 }
3235 vptest(vec1, vec1);
3236 jcc(Assembler::notZero, VECTOR_NOT_EQUAL);
3237 addptr(result, stride2);
3238 subl(cnt2, stride2);
3239 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
3240 // clean upper bits of YMM registers
3241 vpxor(vec1, vec1);
3242
3243 // compare wide vectors tail
3244 bind(COMPARE_WIDE_TAIL);
3245 testptr(result, result);
3246 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3247
3248 movl(result, stride2);
3249 movl(cnt2, result);
3250 negptr(result);
3251 jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3252
3253 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
3254 bind(VECTOR_NOT_EQUAL);
3255 // clean upper bits of YMM registers
3256 vpxor(vec1, vec1);
3257 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3258 lea(str1, Address(str1, result, scale));
3259 lea(str2, Address(str2, result, scale));
3260 } else {
3261 lea(str1, Address(str1, result, scale1));
3262 lea(str2, Address(str2, result, scale2));
3263 }
3264 jmp(COMPARE_16_CHARS);
3265
3266 // Compare tail chars, length between 1 to 15 chars
3267 bind(COMPARE_TAIL_LONG);
3268 movl(cnt2, result);
3269 cmpl(cnt2, stride);
3270 jcc(Assembler::less, COMPARE_SMALL_STR);
3271
3272 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3273 movdqu(vec1, Address(str1, 0));
3274 } else {
3275 pmovzxbw(vec1, Address(str1, 0));
3276 }
3277 pcmpestri(vec1, Address(str2, 0), pcmpmask);
3278 jcc(Assembler::below, COMPARE_INDEX_CHAR);
3279 subptr(cnt2, stride);
3280 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3281 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3282 lea(str1, Address(str1, result, scale));
3283 lea(str2, Address(str2, result, scale));
3284 } else {
3285 lea(str1, Address(str1, result, scale1));
3286 lea(str2, Address(str2, result, scale2));
3287 }
3288 negptr(cnt2);
3289 jmpb(WHILE_HEAD_LABEL)jmpb_0(WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3289)
;
3290
3291 bind(COMPARE_SMALL_STR);
3292 } else if (UseSSE42Intrinsics) {
3293 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3294 int pcmpmask = 0x19;
3295 // Setup to compare 8-char (16-byte) vectors,
3296 // start from first character again because it has aligned address.
3297 movl(result, cnt2);
3298 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
3299 if (ae == StrIntrinsicNode::LL) {
3300 pcmpmask &= ~0x01;
3301 }
3302 jcc(Assembler::zero, COMPARE_TAIL);
3303 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3304 lea(str1, Address(str1, result, scale));
3305 lea(str2, Address(str2, result, scale));
3306 } else {
3307 lea(str1, Address(str1, result, scale1));
3308 lea(str2, Address(str2, result, scale2));
3309 }
3310 negptr(result);
3311
3312 // pcmpestri
3313 // inputs:
3314 // vec1- substring
3315 // rax - negative string length (elements count)
3316 // mem - scanned string
3317 // rdx - string length (elements count)
3318 // pcmpmask - cmp mode: 11000 (string compare with negated result)
3319 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
3320 // outputs:
3321 // rcx - first mismatched element index
3322 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1
== rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3322, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
3323
3324 bind(COMPARE_WIDE_VECTORS);
3325 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3326 movdqu(vec1, Address(str1, result, scale));
3327 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
3328 } else {
3329 pmovzxbw(vec1, Address(str1, result, scale1));
3330 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
3331 }
3332 // After pcmpestri cnt1(rcx) contains mismatched element index
3333
3334 jccb(Assembler::below, VECTOR_NOT_EQUAL)jccb_0(Assembler::below, VECTOR_NOT_EQUAL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3334)
; // CF==1
3335 addptr(result, stride);
3336 subptr(cnt2, stride);
3337 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3337)
;
3338
3339 // compare wide vectors tail
3340 testptr(result, result);
3341 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3342
3343 movl(cnt2, stride);
3344 movl(result, stride);
3345 negptr(result);
3346 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3347 movdqu(vec1, Address(str1, result, scale));
3348 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
3349 } else {
3350 pmovzxbw(vec1, Address(str1, result, scale1));
3351 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
3352 }
3353 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL)jccb_0(Assembler::aboveEqual, LENGTH_DIFF_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3353)
;
3354
3355 // Mismatched characters in the vectors
3356 bind(VECTOR_NOT_EQUAL);
3357 addptr(cnt1, result);
3358 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
3359 subl(result, cnt2);
3360 jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3360)
;
3361
3362 bind(COMPARE_TAIL); // limit is zero
3363 movl(cnt2, result);
3364 // Fallthru to tail compare
3365 }
3366 // Shift str2 and str1 to the end of the arrays, negate min
3367 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3368 lea(str1, Address(str1, cnt2, scale));
3369 lea(str2, Address(str2, cnt2, scale));
3370 } else {
3371 lea(str1, Address(str1, cnt2, scale1));
3372 lea(str2, Address(str2, cnt2, scale2));
3373 }
3374 decrementl(cnt2); // first character was compared already
3375 negptr(cnt2);
3376
3377 // Compare the rest of the elements
3378 bind(WHILE_HEAD_LABEL);
3379 load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
3380 subl(result, cnt1);
3381 jccb(Assembler::notZero, POP_LABEL)jccb_0(Assembler::notZero, POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3381)
;
3382 increment(cnt2);
3383 jccb(Assembler::notZero, WHILE_HEAD_LABEL)jccb_0(Assembler::notZero, WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3383)
;
3384
3385 // Strings are equal up to min length. Return the length difference.
3386 bind(LENGTH_DIFF_LABEL);
3387 pop(result);
3388 if (ae == StrIntrinsicNode::UU) {
3389 // Divide diff by 2 to get number of chars
3390 sarl(result, 1);
3391 }
3392 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3392)
;
3393
3394#ifdef _LP641
3395 if (VM_Version::supports_avx512vlbw()) {
3396
3397 bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
3398
3399 kmovql(cnt1, mask);
3400 notq(cnt1);
3401 bsfq(cnt2, cnt1);
3402 if (ae != StrIntrinsicNode::LL) {
3403 // Divide diff by 2 to get number of chars
3404 sarl(cnt2, 1);
3405 }
3406 addq(result, cnt2);
3407 if (ae == StrIntrinsicNode::LL) {
3408 load_unsigned_byte(cnt1, Address(str2, result));
3409 load_unsigned_byte(result, Address(str1, result));
3410 } else if (ae == StrIntrinsicNode::UU) {
3411 load_unsigned_short(cnt1, Address(str2, result, scale));
3412 load_unsigned_short(result, Address(str1, result, scale));
3413 } else {
3414 load_unsigned_short(cnt1, Address(str2, result, scale2));
3415 load_unsigned_byte(result, Address(str1, result, scale1));
3416 }
3417 subl(result, cnt1);
3418 jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3418)
;
3419 }//if (VM_Version::supports_avx512vlbw())
3420#endif // _LP64
3421
3422 // Discard the stored length difference
3423 bind(POP_LABEL);
3424 pop(cnt1);
3425
3426 // That's it
3427 bind(DONE_LABEL);
3428 if(ae == StrIntrinsicNode::UL) {
3429 negl(result);
3430 }
3431
3432}
3433
3434// Search for Non-ASCII character (Negative byte value) in a byte array,
3435// return true if it has any and false otherwise.
3436// ..\jdk\src\java.base\share\classes\java\lang\StringCoding.java
3437// @IntrinsicCandidate
3438// private static boolean hasNegatives(byte[] ba, int off, int len) {
3439// for (int i = off; i < off + len; i++) {
3440// if (ba[i] < 0) {
3441// return true;
3442// }
3443// }
3444// return false;
3445// }
3446void C2_MacroAssembler::has_negatives(Register ary1, Register len,
3447 Register result, Register tmp1,
3448 XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
3449 // rsi: byte array
3450 // rcx: len
3451 // rax: result
3452 ShortBranchVerifier sbv(this);
3453 assert_different_registers(ary1, len, result, tmp1);
3454 assert_different_registers(vec1, vec2);
3455 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
3456
3457 // len == 0
3458 testl(len, len);
3459 jcc(Assembler::zero, FALSE_LABEL);
3460
3461 if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
3462 VM_Version::supports_avx512vlbw() &&
3463 VM_Version::supports_bmi2()) {
3464
3465 Label test_64_loop, test_tail;
3466 Register tmp3_aliased = len;
3467
3468 movl(tmp1, len);
3469 vpxor(vec2, vec2, vec2, Assembler::AVX_512bit);
3470
3471 andl(tmp1, 64 - 1); // tail count (in chars) 0x3F
3472 andl(len, ~(64 - 1)); // vector count (in chars)
3473 jccb(Assembler::zero, test_tail)jccb_0(Assembler::zero, test_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3473)
;
3474
3475 lea(ary1, Address(ary1, len, Address::times_1));
3476 negptr(len);
3477
3478 bind(test_64_loop);
3479 // Check whether our 64 elements of size byte contain negatives
3480 evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
3481 kortestql(mask1, mask1);
3482 jcc(Assembler::notZero, TRUE_LABEL);
3483
3484 addptr(len, 64);
3485 jccb(Assembler::notZero, test_64_loop)jccb_0(Assembler::notZero, test_64_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3485)
;
3486
3487
3488 bind(test_tail);
3489 // bail out when there is nothing to be done
3490 testl(tmp1, -1);
3491 jcc(Assembler::zero, FALSE_LABEL);
3492
3493 // ~(~0 << len) applied up to two times (for 32-bit scenario)
3494#ifdef _LP641
3495 mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
3496 shlxq(tmp3_aliased, tmp3_aliased, tmp1);
3497 notq(tmp3_aliased);
3498 kmovql(mask2, tmp3_aliased);
3499#else
3500 Label k_init;
3501 jmp(k_init);
3502
3503 // We could not read 64-bits from a general purpose register thus we move
3504 // data required to compose 64 1's to the instruction stream
3505 // We emit 64 byte wide series of elements from 0..63 which later on would
3506 // be used as a compare targets with tail count contained in tmp1 register.
3507 // Result would be a k register having tmp1 consecutive number or 1
3508 // counting from least significant bit.
3509 address tmp = pc();
3510 emit_int64(0x0706050403020100);
3511 emit_int64(0x0F0E0D0C0B0A0908);
3512 emit_int64(0x1716151413121110);
3513 emit_int64(0x1F1E1D1C1B1A1918);
3514 emit_int64(0x2726252423222120);
3515 emit_int64(0x2F2E2D2C2B2A2928);
3516 emit_int64(0x3736353433323130);
3517 emit_int64(0x3F3E3D3C3B3A3938);
3518
3519 bind(k_init);
3520 lea(len, InternalAddress(tmp));
3521 // create mask to test for negative byte inside a vector
3522 evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
3523 evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
3524
3525#endif
3526 evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
3527 ktestq(mask1, mask2);
3528 jcc(Assembler::notZero, TRUE_LABEL);
3529
3530 jmp(FALSE_LABEL);
3531 } else {
3532 movl(result, len); // copy
3533
3534 if (UseAVX >= 2 && UseSSE >= 2) {
3535 // With AVX2, use 32-byte vector compare
3536 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3537
3538 // Compare 32-byte vectors
3539 andl(result, 0x0000001f); // tail count (in bytes)
3540 andl(len, 0xffffffe0); // vector count (in bytes)
3541 jccb(Assembler::zero, COMPARE_TAIL)jccb_0(Assembler::zero, COMPARE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3541)
;
3542
3543 lea(ary1, Address(ary1, len, Address::times_1));
3544 negptr(len);
3545
3546 movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector
3547 movdl(vec2, tmp1);
3548 vpbroadcastd(vec2, vec2, Assembler::AVX_256bit);
3549
3550 bind(COMPARE_WIDE_VECTORS);
3551 vmovdqu(vec1, Address(ary1, len, Address::times_1));
3552 vptest(vec1, vec2);
3553 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3553)
;
3554 addptr(len, 32);
3555 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3556
3557 testl(result, result);
3558 jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3558)
;
3559
3560 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
3561 vptest(vec1, vec2);
3562 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3562)
;
3563 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3563)
;
3564
3565 bind(COMPARE_TAIL); // len is zero
3566 movl(len, result);
3567 // Fallthru to tail compare
3568 } else if (UseSSE42Intrinsics) {
3569 // With SSE4.2, use double quad vector compare
3570 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3571
3572 // Compare 16-byte vectors
3573 andl(result, 0x0000000f); // tail count (in bytes)
3574 andl(len, 0xfffffff0); // vector count (in bytes)
3575 jcc(Assembler::zero, COMPARE_TAIL);
3576
3577 lea(ary1, Address(ary1, len, Address::times_1));
3578 negptr(len);
3579
3580 movl(tmp1, 0x80808080);
3581 movdl(vec2, tmp1);
3582 pshufd(vec2, vec2, 0);
3583
3584 bind(COMPARE_WIDE_VECTORS);
3585 movdqu(vec1, Address(ary1, len, Address::times_1));
3586 ptest(vec1, vec2);
3587 jcc(Assembler::notZero, TRUE_LABEL);
3588 addptr(len, 16);
3589 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3590
3591 testl(result, result);
3592 jcc(Assembler::zero, FALSE_LABEL);
3593
3594 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
3595 ptest(vec1, vec2);
3596 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3596)
;
3597 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3597)
;
3598
3599 bind(COMPARE_TAIL); // len is zero
3600 movl(len, result);
3601 // Fallthru to tail compare
3602 }
3603 }
3604 // Compare 4-byte vectors
3605 andl(len, 0xfffffffc); // vector count (in bytes)
3606 jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3606)
;
3607
3608 lea(ary1, Address(ary1, len, Address::times_1));
3609 negptr(len);
3610
3611 bind(COMPARE_VECTORS);
3612 movl(tmp1, Address(ary1, len, Address::times_1));
3613 andl(tmp1, 0x80808080);
3614 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3614)
;
3615 addptr(len, 4);
3616 jcc(Assembler::notZero, COMPARE_VECTORS);
3617
3618 // Compare trailing char (final 2 bytes), if any
3619 bind(COMPARE_CHAR);
3620 testl(result, 0x2); // tail char
3621 jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3621)
;
3622 load_unsigned_short(tmp1, Address(ary1, 0));
3623 andl(tmp1, 0x00008080);
3624 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3624)
;
3625 subptr(result, 2);
3626 lea(ary1, Address(ary1, 2));
3627
3628 bind(COMPARE_BYTE);
3629 testl(result, 0x1); // tail byte
3630 jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3630)
;
3631 load_unsigned_byte(tmp1, Address(ary1, 0));
3632 andl(tmp1, 0x00000080);
3633 jccb(Assembler::notEqual, TRUE_LABEL)jccb_0(Assembler::notEqual, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3633)
;
3634 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3634)
;
3635
3636 bind(TRUE_LABEL);
3637 movl(result, 1); // return true
3638 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3638)
;
3639
3640 bind(FALSE_LABEL);
3641 xorl(result, result); // return false
3642
3643 // That's it
3644 bind(DONE);
3645 if (UseAVX >= 2 && UseSSE >= 2) {
3646 // clean upper bits of YMM registers
3647 vpxor(vec1, vec1);
3648 vpxor(vec2, vec2);
3649 }
3650}
3651// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
3652void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
3653 Register limit, Register result, Register chr,
3654 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
3655 ShortBranchVerifier sbv(this);
3656 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
3657
3658 int length_offset = arrayOopDesc::length_offset_in_bytes();
3659 int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
3660
3661 if (is_array_equ) {
3662 // Check the input args
3663 cmpoop(ary1, ary2);
3664 jcc(Assembler::equal, TRUE_LABEL);
3665
3666 // Need additional checks for arrays_equals.
3667 testptr(ary1, ary1);
3668 jcc(Assembler::zero, FALSE_LABEL);
3669 testptr(ary2, ary2);
3670 jcc(Assembler::zero, FALSE_LABEL);
3671
3672 // Check the lengths
3673 movl(limit, Address(ary1, length_offset));
3674 cmpl(limit, Address(ary2, length_offset));
3675 jcc(Assembler::notEqual, FALSE_LABEL);
3676 }
3677
3678 // count == 0
3679 testl(limit, limit);
3680 jcc(Assembler::zero, TRUE_LABEL);
3681
3682 if (is_array_equ) {
3683 // Load array address
3684 lea(ary1, Address(ary1, base_offset));
3685 lea(ary2, Address(ary2, base_offset));
3686 }
3687
3688 if (is_array_equ && is_char) {
3689 // arrays_equals when used for char[].
3690 shll(limit, 1); // byte count != 0
3691 }
3692 movl(result, limit); // copy
3693
3694 if (UseAVX >= 2) {
3695 // With AVX2, use 32-byte vector compare
3696 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3697
3698 // Compare 32-byte vectors
3699 andl(result, 0x0000001f); // tail count (in bytes)
3700 andl(limit, 0xffffffe0); // vector count (in bytes)
3701 jcc(Assembler::zero, COMPARE_TAIL);
3702
3703 lea(ary1, Address(ary1, limit, Address::times_1));
3704 lea(ary2, Address(ary2, limit, Address::times_1));
3705 negptr(limit);
3706
3707#ifdef _LP641
3708 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
3709 Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
3710
3711 cmpl(limit, -64);
3712 jcc(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2);
3713
3714 bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
3715
3716 evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
3717 evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
3718 kortestql(mask, mask);
3719 jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
3720 addptr(limit, 64); // update since we already compared at this addr
3721 cmpl(limit, -64);
3722 jccb(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3722)
;
3723
3724 // At this point we may still need to compare -limit+result bytes.
3725 // We could execute the next two instruction and just continue via non-wide path:
3726 // cmpl(limit, 0);
3727 // jcc(Assembler::equal, COMPARE_TAIL); // true
3728 // But since we stopped at the points ary{1,2}+limit which are
3729 // not farther than 64 bytes from the ends of arrays ary{1,2}+result
3730 // (|limit| <= 32 and result < 32),
3731 // we may just compare the last 64 bytes.
3732 //
3733 addptr(result, -64); // it is safe, bc we just came from this area
3734 evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
3735 evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
3736 kortestql(mask, mask);
3737 jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
3738
3739 jmp(TRUE_LABEL);
3740
3741 bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3742
3743 }//if (VM_Version::supports_avx512vlbw())
3744#endif //_LP64
3745 bind(COMPARE_WIDE_VECTORS);
3746 vmovdqu(vec1, Address(ary1, limit, Address::times_1));
3747 vmovdqu(vec2, Address(ary2, limit, Address::times_1));
3748 vpxor(vec1, vec2);
3749
3750 vptest(vec1, vec1);
3751 jcc(Assembler::notZero, FALSE_LABEL);
3752 addptr(limit, 32);
3753 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3754
3755 testl(result, result);
3756 jcc(Assembler::zero, TRUE_LABEL);
3757
3758 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
3759 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
3760 vpxor(vec1, vec2);
3761
3762 vptest(vec1, vec1);
3763 jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3763)
;
3764 jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3764)
;
3765
3766 bind(COMPARE_TAIL); // limit is zero
3767 movl(limit, result);
3768 // Fallthru to tail compare
3769 } else if (UseSSE42Intrinsics) {
3770 // With SSE4.2, use double quad vector compare
3771 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3772
3773 // Compare 16-byte vectors
3774 andl(result, 0x0000000f); // tail count (in bytes)
3775 andl(limit, 0xfffffff0); // vector count (in bytes)
3776 jcc(Assembler::zero, COMPARE_TAIL);
3777
3778 lea(ary1, Address(ary1, limit, Address::times_1));
3779 lea(ary2, Address(ary2, limit, Address::times_1));
3780 negptr(limit);
3781
3782 bind(COMPARE_WIDE_VECTORS);
3783 movdqu(vec1, Address(ary1, limit, Address::times_1));
3784 movdqu(vec2, Address(ary2, limit, Address::times_1));
3785 pxor(vec1, vec2);
3786
3787 ptest(vec1, vec1);
3788 jcc(Assembler::notZero, FALSE_LABEL);
3789 addptr(limit, 16);
3790 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3791
3792 testl(result, result);
3793 jcc(Assembler::zero, TRUE_LABEL);
3794
3795 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
3796 movdqu(vec2, Address(ary2, result, Address::times_1, -16));
3797 pxor(vec1, vec2);
3798
3799 ptest(vec1, vec1);
3800 jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3800)
;
3801 jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3801)
;
3802
3803 bind(COMPARE_TAIL); // limit is zero
3804 movl(limit, result);
3805 // Fallthru to tail compare
3806 }
3807
3808 // Compare 4-byte vectors
3809 andl(limit, 0xfffffffc); // vector count (in bytes)
3810 jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3810)
;
3811
3812 lea(ary1, Address(ary1, limit, Address::times_1));
3813 lea(ary2, Address(ary2, limit, Address::times_1));
3814 negptr(limit);
3815
3816 bind(COMPARE_VECTORS);
3817 movl(chr, Address(ary1, limit, Address::times_1));
3818 cmpl(chr, Address(ary2, limit, Address::times_1));
3819 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3819)
;
3820 addptr(limit, 4);
3821 jcc(Assembler::notZero, COMPARE_VECTORS);
3822
3823 // Compare trailing char (final 2 bytes), if any
3824 bind(COMPARE_CHAR);
3825 testl(result, 0x2); // tail char
3826 jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3826)
;
3827 load_unsigned_short(chr, Address(ary1, 0));
3828 load_unsigned_short(limit, Address(ary2, 0));
3829 cmpl(chr, limit);
3830 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3830)
;
3831
3832 if (is_array_equ && is_char) {
3833 bind(COMPARE_BYTE);
3834 } else {
3835 lea(ary1, Address(ary1, 2));
3836 lea(ary2, Address(ary2, 2));
3837
3838 bind(COMPARE_BYTE);
3839 testl(result, 0x1); // tail byte
3840 jccb(Assembler::zero, TRUE_LABEL)jccb_0(Assembler::zero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3840)
;
3841 load_unsigned_byte(chr, Address(ary1, 0));
3842 load_unsigned_byte(limit, Address(ary2, 0));
3843 cmpl(chr, limit);
3844 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3844)
;
3845 }
3846 bind(TRUE_LABEL);
3847 movl(result, 1); // return true
3848 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3848)
;
3849
3850 bind(FALSE_LABEL);
3851 xorl(result, result); // return false
3852
3853 // That's it
3854 bind(DONE);
3855 if (UseAVX >= 2) {
3856 // clean upper bits of YMM registers
3857 vpxor(vec1, vec1);
3858 vpxor(vec2, vec2);
3859 }
3860}
3861
3862void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3863 XMMRegister src1, int imm8, bool merge, int vlen_enc) {
3864 switch(ideal_opc) {
3865 case Op_LShiftVS:
3866 Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break;
3867 case Op_LShiftVI:
3868 Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break;
3869 case Op_LShiftVL:
3870 Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break;
3871 case Op_RShiftVS:
3872 Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break;
3873 case Op_RShiftVI:
3874 Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break;
3875 case Op_RShiftVL:
3876 Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break;
3877 case Op_URShiftVS:
3878 Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break;
3879 case Op_URShiftVI:
3880 Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break;
3881 case Op_URShiftVL:
3882 Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break;
3883 case Op_RotateRightV:
3884 evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3885 case Op_RotateLeftV:
3886 evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3887 default:
3888 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3888, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
3889 }
3890}
3891
3892void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3893 XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc,
3894 bool is_varshift) {
3895 switch (ideal_opc) {
3896 case Op_AddVB:
3897 evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3898 case Op_AddVS:
3899 evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3900 case Op_AddVI:
3901 evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3902 case Op_AddVL:
3903 evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
3904 case Op_AddVF:
3905 evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
3906 case Op_AddVD:
3907 evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
3908 case Op_SubVB:
3909 evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
3910 case Op_SubVS:
3911 evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
3912 case Op_SubVI:
3913 evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
3914 case Op_SubVL:
3915 evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
3916 case Op_SubVF:
3917 evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
3918 case Op_SubVD:
3919 evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
3920 case Op_MulVS:
3921 evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
3922 case Op_MulVI:
3923 evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
3924 case Op_MulVL:
3925 evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
3926 case Op_MulVF:
3927 evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
3928 case Op_MulVD:
3929 evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
3930 case Op_DivVF:
3931 evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
3932 case Op_DivVD:
3933 evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
3934 case Op_SqrtVF:
3935 evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break;
3936 case Op_SqrtVD:
3937 evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break;
3938 case Op_AbsVB:
3939 evpabsb(dst, mask, src2, merge, vlen_enc); break;
3940 case Op_AbsVS:
3941 evpabsw(dst, mask, src2, merge, vlen_enc); break;
3942 case Op_AbsVI:
3943 evpabsd(dst, mask, src2, merge, vlen_enc); break;
3944 case Op_AbsVL:
3945 evpabsq(dst, mask, src2, merge, vlen_enc); break;
3946 case Op_FmaVF:
3947 evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
3948 case Op_FmaVD:
3949 evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
3950 case Op_VectorRearrange:
3951 evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break;
3952 case Op_LShiftVS:
3953 evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3954 case Op_LShiftVI:
3955 evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3956 case Op_LShiftVL:
3957 evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3958 case Op_RShiftVS:
3959 evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3960 case Op_RShiftVI:
3961 evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3962 case Op_RShiftVL:
3963 evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3964 case Op_URShiftVS:
3965 evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3966 case Op_URShiftVI:
3967 evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3968 case Op_URShiftVL:
3969 evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3970 case Op_RotateLeftV:
3971 evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3972 case Op_RotateRightV:
3973 evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3974 case Op_MaxV:
3975 evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3976 case Op_MinV:
3977 evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3978 case Op_XorV:
3979 evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3980 case Op_OrV:
3981 evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3982 case Op_AndV:
3983 evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3984 default:
3985 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3985, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
3986 }
3987}
3988
3989void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3990 XMMRegister src1, Address src2, bool merge, int vlen_enc) {
3991 switch (ideal_opc) {
3992 case Op_AddVB:
3993 evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3994 case Op_AddVS:
3995 evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3996 case Op_AddVI:
3997 evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3998 case Op_AddVL:
3999 evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
4000 case Op_AddVF:
4001 evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
4002 case Op_AddVD:
4003 evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
4004 case Op_SubVB:
4005 evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
4006 case Op_SubVS:
4007 evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
4008 case Op_SubVI:
4009 evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
4010 case Op_SubVL:
4011 evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
4012 case Op_SubVF:
4013 evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
4014 case Op_SubVD:
4015 evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
4016 case Op_MulVS:
4017 evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
4018 case Op_MulVI:
4019 evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
4020 case Op_MulVL:
4021 evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
4022 case Op_MulVF:
4023 evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
4024 case Op_MulVD:
4025 evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
4026 case Op_DivVF:
4027 evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
4028 case Op_DivVD:
4029 evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
4030 case Op_FmaVF:
4031 evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
4032 case Op_FmaVD:
4033 evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
4034 case Op_MaxV:
4035 evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4036 case Op_MinV:
4037 evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4038 case Op_XorV:
4039 evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4040 case Op_OrV:
4041 evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4042 case Op_AndV:
4043 evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4044 default:
4045 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4045, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
4046 }
4047}
4048
4049void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
4050 KRegister src1, KRegister src2) {
4051 BasicType etype = T_ILLEGAL;
4052 switch(mask_len) {
4053 case 2:
4054 case 4:
4055 case 8: etype = T_BYTE; break;
4056 case 16: etype = T_SHORT; break;
4057 case 32: etype = T_INT; break;
4058 case 64: etype = T_LONG; break;
4059 default: fatal("Unsupported type")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4059, "Unsupported type"); ::breakpoint(); } while (0)
; break;
4060 }
4061 assert(etype != T_ILLEGAL, "")do { if (!(etype != T_ILLEGAL)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4061, "assert(" "etype != T_ILLEGAL" ") failed", ""); ::breakpoint
(); } } while (0)
;
4062 switch(ideal_opc) {
4063 case Op_AndVMask:
4064 kand(etype, dst, src1, src2); break;
4065 case Op_OrVMask:
4066 kor(etype, dst, src1, src2); break;
4067 case Op_XorVMask:
4068 kxor(etype, dst, src1, src2); break;
4069 default:
4070 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4070, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
4071 }
4072}
4073
4074/*
4075 * Algorithm for vector D2L and F2I conversions:-
4076 * a) Perform vector D2L/F2I cast.
4077 * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
4078 * It signifies that source value could be any of the special floating point
4079 * values(NaN,-Inf,Inf,Max,-Min).
4080 * c) Set destination to zero if source is NaN value.
4081 * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
4082 */
4083
4084void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
4085 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
4086 Register scratch, int vec_enc) {
4087 Label done;
4088 evcvttpd2qq(dst, src, vec_enc);
4089 evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
4090 evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
4091 kortestwl(ktmp1, ktmp1);
4092 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4092)
;
4093
4094 vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
4095 evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
4096 evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
4097
4098 kxorwl(ktmp1, ktmp1, ktmp2);
4099 evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
4100 vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
4101 evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
4102 bind(done);
4103}
4104
4105void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
4106 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
4107 AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
4108 Label done;
4109 vcvttps2dq(dst, src, vec_enc);
4110 vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
4111 vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
4112 vptest(xtmp2, xtmp2, vec_enc);
4113 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4113)
;
4114
4115 vpcmpeqd(xtmp4, xtmp4, xtmp4, vec_enc);
4116 vpxor(xtmp1, xtmp1, xtmp4, vec_enc);
4117
4118 vpxor(xtmp4, xtmp4, xtmp4, vec_enc);
4119 vcmpps(xtmp3, src, src, Assembler::UNORD_Q, vec_enc);
4120 vblendvps(dst, dst, xtmp4, xtmp3, vec_enc);
4121
4122 // Recompute the mask for remaining special value.
4123 vpxor(xtmp2, xtmp2, xtmp3, vec_enc);
4124 // Extract SRC values corresponding to TRUE mask lanes.
4125 vpand(xtmp4, xtmp2, src, vec_enc);
4126 // Flip mask bits so that MSB bit of MASK lanes corresponding to +ve special
4127 // values are set.
4128 vpxor(xtmp3, xtmp2, xtmp4, vec_enc);
4129
4130 vblendvps(dst, dst, xtmp1, xtmp3, vec_enc);
4131 bind(done);
4132}
4133
4134void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
4135 KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
4136 Register scratch, int vec_enc) {
4137 Label done;
4138 vcvttps2dq(dst, src, vec_enc);
4139 evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
4140 Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
4141 kortestwl(ktmp1, ktmp1);
4142 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4142)
;
4143
4144 vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
4145 evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
4146 evmovdqul(dst, ktmp2, xtmp2, true, vec_enc);
4147
4148 kxorwl(ktmp1, ktmp1, ktmp2);
4149 evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
4150 vpternlogd(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
4151 evmovdqul(dst, ktmp1, xtmp2, true, vec_enc);
4152 bind(done);
4153}
4154
4155#ifdef _LP641
4156void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
4157 Register rtmp2, XMMRegister xtmp, int mask_len,
4158 int vec_enc) {
4159 int index = 0;
4160 int vindex = 0;
4161 mov64(rtmp1, 0x0101010101010101L);
4162 pdep(rtmp1, src, rtmp1);
4163 if (mask_len > 8) {
4164 movq(rtmp2, src);
4165 vpxor(xtmp, xtmp, xtmp, vec_enc);
4166 movq(xtmp, rtmp1);
4167 }
4168 movq(dst, rtmp1);
4169
4170 mask_len -= 8;
4171 while (mask_len > 0) {
4172 assert ((mask_len & 0x7) == 0, "mask must be multiple of 8")do { if (!((mask_len & 0x7) == 0)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4172, "assert(" "(mask_len & 0x7) == 0" ") failed", "mask must be multiple of 8"
); ::breakpoint(); } } while (0)
;
4173 index++;
4174 if ((index % 2) == 0) {
4175 pxor(xtmp, xtmp);
4176 }
4177 mov64(rtmp1, 0x0101010101010101L);
4178 shrq(rtmp2, 8);
4179 pdep(rtmp1, rtmp2, rtmp1);
4180 pinsrq(xtmp, rtmp1, index % 2);
4181 vindex = index / 2;
4182 if (vindex) {
4183 // Write entire 16 byte vector when both 64 bit
4184 // lanes are update to save redundant instructions.
4185 if (index % 2) {
4186 vinsertf128(dst, dst, xtmp, vindex);
4187 }
4188 } else {
4189 vmovdqu(dst, xtmp);
4190 }
4191 mask_len -= 8;
4192 }
4193}
4194
4195void C2_MacroAssembler::vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen) {
4196 switch(opc) {
4197 case Op_VectorMaskTrueCount:
4198 popcntq(dst, tmp);
4199 break;
4200 case Op_VectorMaskLastTrue:
4201 if (VM_Version::supports_lzcnt()) {
4202 lzcntq(tmp, tmp);
4203 movl(dst, 63);
4204 subl(dst, tmp);
4205 } else {
4206 movl(dst, -1);
4207 bsrq(tmp, tmp);
4208 cmov32(Assembler::notZero, dst, tmp);
4209 }
4210 break;
4211 case Op_VectorMaskFirstTrue:
4212 if (VM_Version::supports_bmi1()) {
4213 if (masklen < 32) {
4214 orl(tmp, 1 << masklen);
4215 tzcntl(dst, tmp);
4216 } else if (masklen == 32) {
4217 tzcntl(dst, tmp);
4218 } else {
4219 assert(masklen == 64, "")do { if (!(masklen == 64)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4219, "assert(" "masklen == 64" ") failed", ""); ::breakpoint
(); } } while (0)
;
4220 tzcntq(dst, tmp);
4221 }
4222 } else {
4223 if (masklen < 32) {
4224 orl(tmp, 1 << masklen);
4225 bsfl(dst, tmp);
4226 } else {
4227 assert(masklen == 32 || masklen == 64, "")do { if (!(masklen == 32 || masklen == 64)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4227, "assert(" "masklen == 32 || masklen == 64" ") failed"
, ""); ::breakpoint(); } } while (0)
;
4228 movl(dst, masklen);
4229 if (masklen == 32) {
4230 bsfl(tmp, tmp);
4231 } else {
4232 bsfq(tmp, tmp);
4233 }
4234 cmov32(Assembler::notZero, dst, tmp);
4235 }
4236 }
4237 break;
4238 case Op_VectorMaskToLong:
4239 assert(dst == tmp, "Dst and tmp should be the same for toLong operations")do { if (!(dst == tmp)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4239, "assert(" "dst == tmp" ") failed", "Dst and tmp should be the same for toLong operations"
); ::breakpoint(); } } while (0)
;
4240 break;
4241 default: assert(false, "Unhandled mask operation")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4241, "assert(" "false" ") failed", "Unhandled mask operation"
); ::breakpoint(); } } while (0)
;
4242 }
4243}
4244
4245void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp,
4246 int masklen, int masksize, int vec_enc) {
4247 assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4247, "assert(" "VM_Version::supports_popcnt()" ") failed",
""); ::breakpoint(); } } while (0)
;
4248
4249 if(VM_Version::supports_avx512bw()) {
4250 kmovql(tmp, mask);
4251 } else {
4252 assert(masklen <= 16, "")do { if (!(masklen <= 16)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4252, "assert(" "masklen <= 16" ") failed", ""); ::breakpoint
(); } } while (0)
;
4253 kmovwl(tmp, mask);
4254 }
4255
4256 // Mask generated out of partial vector comparisons/replicate/mask manipulation
4257 // operations needs to be clipped.
4258 if (masksize < 16 && opc != Op_VectorMaskFirstTrue) {
4259 andq(tmp, (1 << masklen) - 1);
4260 }
4261
4262 vector_mask_operation_helper(opc, dst, tmp, masklen);
4263}
4264
4265void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
4266 Register tmp, int masklen, BasicType bt, int vec_enc) {
4267 assert(vec_enc == AVX_128bit && VM_Version::supports_avx() ||do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx
() || vec_enc == AVX_256bit && (VM_Version::supports_avx2
() || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)"
") failed", ""); ::breakpoint(); } } while (0)
4268 vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4), "")do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx
() || vec_enc == AVX_256bit && (VM_Version::supports_avx2
() || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)"
") failed", ""); ::breakpoint(); } } while (0)
;
4269 assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4269, "assert(" "VM_Version::supports_popcnt()" ") failed",
""); ::breakpoint(); } } while (0)
;
4270
4271 bool need_clip = false;
4272 switch(bt) {
4273 case T_BOOLEAN:
4274 // While masks of other types contain 0, -1; boolean masks contain lane values of 0, 1
4275 vpxor(xtmp, xtmp, xtmp, vec_enc);
4276 vpsubb(xtmp, xtmp, mask, vec_enc);
4277 vpmovmskb(tmp, xtmp, vec_enc);
4278 need_clip = masklen < 16;
4279 break;
4280 case T_BYTE:
4281 vpmovmskb(tmp, mask, vec_enc);
4282 need_clip = masklen < 16;
4283 break;
4284 case T_SHORT:
4285 vpacksswb(xtmp, mask, mask, vec_enc);
4286 if (masklen >= 16) {
4287 vpermpd(xtmp, xtmp, 8, vec_enc);
4288 }
4289 vpmovmskb(tmp, xtmp, Assembler::AVX_128bit);
4290 need_clip = masklen < 16;
4291 break;
4292 case T_INT:
4293 case T_FLOAT:
4294 vmovmskps(tmp, mask, vec_enc);
4295 need_clip = masklen < 4;
4296 break;
4297 case T_LONG:
4298 case T_DOUBLE:
4299 vmovmskpd(tmp, mask, vec_enc);
4300 need_clip = masklen < 2;
4301 break;
4302 default: assert(false, "Unhandled type, %s", type2name(bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4302, "assert(" "false" ") failed", "Unhandled type, %s", type2name
(bt)); ::breakpoint(); } } while (0)
;
4303 }
4304
4305 // Mask generated out of partial vector comparisons/replicate/mask manipulation
4306 // operations needs to be clipped.
4307 if (need_clip && opc != Op_VectorMaskFirstTrue) {
4308 // need_clip implies masklen < 32
4309 andq(tmp, (1 << masklen) - 1);
4310 }
4311
4312 vector_mask_operation_helper(opc, dst, tmp, masklen);
4313}
4314#endif

/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp

1/*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef CPU_X86_ASSEMBLER_X86_HPP
26#define CPU_X86_ASSEMBLER_X86_HPP
27
28#include "asm/register.hpp"
29#include "utilities/powerOfTwo.hpp"
30
31// Contains all the definitions needed for x86 assembly code generation.
32
33// Calling convention
34class Argument {
35 public:
36 enum {
37#ifdef _LP641
38#ifdef _WIN64
39 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
40 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
41 n_int_register_returns_c = 1, // rax
42 n_float_register_returns_c = 1, // xmm0
43#else
44 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
45 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
46 n_int_register_returns_c = 2, // rax, rdx
47 n_float_register_returns_c = 2, // xmm0, xmm1
48#endif // _WIN64
49 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
50 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
51#else
52 n_register_parameters = 0 // 0 registers used to pass arguments
53#endif // _LP64
54 };
55};
56
57
58#ifdef _LP641
59// Symbolically name the register arguments used by the c calling convention.
60// Windows is different from linux/solaris. So much for standards...
61
62#ifdef _WIN64
63
64REGISTER_DECLARATION(Register, c_rarg0, rcx)const Register c_rarg0 = ((Register)rcx);
65REGISTER_DECLARATION(Register, c_rarg1, rdx)const Register c_rarg1 = ((Register)rdx);
66REGISTER_DECLARATION(Register, c_rarg2, r8)const Register c_rarg2 = ((Register)r8);
67REGISTER_DECLARATION(Register, c_rarg3, r9)const Register c_rarg3 = ((Register)r9);
68
69REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
70REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
71REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
72REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
73
74#else
75
76REGISTER_DECLARATION(Register, c_rarg0, rdi)const Register c_rarg0 = ((Register)rdi);
77REGISTER_DECLARATION(Register, c_rarg1, rsi)const Register c_rarg1 = ((Register)rsi);
78REGISTER_DECLARATION(Register, c_rarg2, rdx)const Register c_rarg2 = ((Register)rdx);
79REGISTER_DECLARATION(Register, c_rarg3, rcx)const Register c_rarg3 = ((Register)rcx);
80REGISTER_DECLARATION(Register, c_rarg4, r8)const Register c_rarg4 = ((Register)r8);
81REGISTER_DECLARATION(Register, c_rarg5, r9)const Register c_rarg5 = ((Register)r9);
82
83REGISTER_DECLARATION(XMMRegister, c_farg0, xmm0)const XMMRegister c_farg0 = ((XMMRegister)xmm0);
84REGISTER_DECLARATION(XMMRegister, c_farg1, xmm1)const XMMRegister c_farg1 = ((XMMRegister)xmm1);
85REGISTER_DECLARATION(XMMRegister, c_farg2, xmm2)const XMMRegister c_farg2 = ((XMMRegister)xmm2);
86REGISTER_DECLARATION(XMMRegister, c_farg3, xmm3)const XMMRegister c_farg3 = ((XMMRegister)xmm3);
87REGISTER_DECLARATION(XMMRegister, c_farg4, xmm4)const XMMRegister c_farg4 = ((XMMRegister)xmm4);
88REGISTER_DECLARATION(XMMRegister, c_farg5, xmm5)const XMMRegister c_farg5 = ((XMMRegister)xmm5);
89REGISTER_DECLARATION(XMMRegister, c_farg6, xmm6)const XMMRegister c_farg6 = ((XMMRegister)xmm6);
90REGISTER_DECLARATION(XMMRegister, c_farg7, xmm7)const XMMRegister c_farg7 = ((XMMRegister)xmm7);
91
92#endif // _WIN64
93
94// Symbolically name the register arguments used by the Java calling convention.
95// We have control over the convention for java so we can do what we please.
96// What pleases us is to offset the java calling convention so that when
97// we call a suitable jni method the arguments are lined up and we don't
98// have to do little shuffling. A suitable jni method is non-static and a
99// small number of arguments (two fewer args on windows)
100//
101// |-------------------------------------------------------|
102// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
103// |-------------------------------------------------------|
104// | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
105// | rdi rsi rdx rcx r8 r9 | solaris/linux
106// |-------------------------------------------------------|
107// | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
108// |-------------------------------------------------------|
109
110REGISTER_DECLARATION(Register, j_rarg0, c_rarg1)const Register j_rarg0 = ((Register)c_rarg1);
111REGISTER_DECLARATION(Register, j_rarg1, c_rarg2)const Register j_rarg1 = ((Register)c_rarg2);
112REGISTER_DECLARATION(Register, j_rarg2, c_rarg3)const Register j_rarg2 = ((Register)c_rarg3);
113// Windows runs out of register args here
114#ifdef _WIN64
115REGISTER_DECLARATION(Register, j_rarg3, rdi)const Register j_rarg3 = ((Register)rdi);
116REGISTER_DECLARATION(Register, j_rarg4, rsi)const Register j_rarg4 = ((Register)rsi);
117#else
118REGISTER_DECLARATION(Register, j_rarg3, c_rarg4)const Register j_rarg3 = ((Register)c_rarg4);
119REGISTER_DECLARATION(Register, j_rarg4, c_rarg5)const Register j_rarg4 = ((Register)c_rarg5);
120#endif /* _WIN64 */
121REGISTER_DECLARATION(Register, j_rarg5, c_rarg0)const Register j_rarg5 = ((Register)c_rarg0);
122
123REGISTER_DECLARATION(XMMRegister, j_farg0, xmm0)const XMMRegister j_farg0 = ((XMMRegister)xmm0);
124REGISTER_DECLARATION(XMMRegister, j_farg1, xmm1)const XMMRegister j_farg1 = ((XMMRegister)xmm1);
125REGISTER_DECLARATION(XMMRegister, j_farg2, xmm2)const XMMRegister j_farg2 = ((XMMRegister)xmm2);
126REGISTER_DECLARATION(XMMRegister, j_farg3, xmm3)const XMMRegister j_farg3 = ((XMMRegister)xmm3);
127REGISTER_DECLARATION(XMMRegister, j_farg4, xmm4)const XMMRegister j_farg4 = ((XMMRegister)xmm4);
128REGISTER_DECLARATION(XMMRegister, j_farg5, xmm5)const XMMRegister j_farg5 = ((XMMRegister)xmm5);
129REGISTER_DECLARATION(XMMRegister, j_farg6, xmm6)const XMMRegister j_farg6 = ((XMMRegister)xmm6);
130REGISTER_DECLARATION(XMMRegister, j_farg7, xmm7)const XMMRegister j_farg7 = ((XMMRegister)xmm7);
131
132REGISTER_DECLARATION(Register, rscratch1, r10)const Register rscratch1 = ((Register)r10); // volatile
133REGISTER_DECLARATION(Register, rscratch2, r11)const Register rscratch2 = ((Register)r11); // volatile
134
135REGISTER_DECLARATION(Register, r12_heapbase, r12)const Register r12_heapbase = ((Register)r12); // callee-saved
136REGISTER_DECLARATION(Register, r15_thread, r15)const Register r15_thread = ((Register)r15); // callee-saved
137
138#else
139// rscratch1 will apear in 32bit code that is dead but of course must compile
140// Using noreg ensures if the dead code is incorrectly live and executed it
141// will cause an assertion failure
142#define rscratch1 noreg
143#define rscratch2 noreg
144
145#endif // _LP64
146
147// JSR 292
148// On x86, the SP does not have to be saved when invoking method handle intrinsics
149// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
150REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg)const Register rbp_mh_SP_save = ((Register)noreg);
151
152// Address is an abstraction used to represent a memory location
153// using any of the amd64 addressing modes with one object.
154//
155// Note: A register location is represented via a Register, not
156// via an address for efficiency & simplicity reasons.
157
158class ArrayAddress;
159
160class Address {
161 public:
162 enum ScaleFactor {
163 no_scale = -1,
164 times_1 = 0,
165 times_2 = 1,
166 times_4 = 2,
167 times_8 = 3,
168 times_ptr = LP64_ONLY(times_8)times_8 NOT_LP64(times_4)
169 };
170 static ScaleFactor times(int size) {
171 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size")do { if (!(size >= 1 && size <= 8 && is_power_of_2
(size))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 171, "assert(" "size >= 1 && size <= 8 && is_power_of_2(size)"
") failed", "bad scale size"); ::breakpoint(); } } while (0)
;
172 if (size == 8) return times_8;
173 if (size == 4) return times_4;
174 if (size == 2) return times_2;
175 return times_1;
176 }
177 static int scale_size(ScaleFactor scale) {
178 assert(scale != no_scale, "")do { if (!(scale != no_scale)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 178, "assert(" "scale != no_scale" ") failed", ""); ::breakpoint
(); } } while (0)
;
179 assert(((1 << (int)times_1) == 1 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
180 (1 << (int)times_2) == 2 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
181 (1 << (int)times_4) == 4 &&do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
182 (1 << (int)times_8) == 8), "")do { if (!(((1 << (int)times_1) == 1 && (1 <<
(int)times_2) == 2 && (1 << (int)times_4) == 4
&& (1 << (int)times_8) == 8))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 182, "assert(" "((1 << (int)times_1) == 1 && (1 << (int)times_2) == 2 && (1 << (int)times_4) == 4 && (1 << (int)times_8) == 8)"
") failed", ""); ::breakpoint(); } } while (0)
;
183 return (1 << (int)scale);
184 }
185
186 private:
187 Register _base;
188 Register _index;
189 XMMRegister _xmmindex;
190 ScaleFactor _scale;
191 int _disp;
192 bool _isxmmindex;
193 RelocationHolder _rspec;
194
195 // Easily misused constructors make them private
196 // %%% can we make these go away?
197 NOT_LP64(Address(address loc, RelocationHolder spec);)
198 Address(int disp, address loc, relocInfo::relocType rtype);
199 Address(int disp, address loc, RelocationHolder spec);
200
201 public:
202
203 int disp() { return _disp; }
204 // creation
205 Address()
206 : _base(noreg),
207 _index(noreg),
208 _xmmindex(xnoreg),
209 _scale(no_scale),
210 _disp(0),
211 _isxmmindex(false){
212 }
213
214 // No default displacement otherwise Register can be implicitly
215 // converted to 0(Register) which is quite a different animal.
216
217 Address(Register base, int disp)
218 : _base(base),
219 _index(noreg),
220 _xmmindex(xnoreg),
221 _scale(no_scale),
222 _disp(disp),
223 _isxmmindex(false){
224 }
225
226 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
227 : _base (base),
228 _index(index),
229 _xmmindex(xnoreg),
230 _scale(scale),
231 _disp (disp),
232 _isxmmindex(false) {
233 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
19
Called C++ object pointer is null
234 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 234, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
235 }
236
237 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
238 : _base (base),
239 _index(index.register_or_noreg()),
240 _xmmindex(xnoreg),
241 _scale(scale),
242 _disp (disp + (index.constant_or_zero() * scale_size(scale))),
243 _isxmmindex(false){
244 if (!index.is_register()) scale = Address::no_scale;
245 assert(!_index->is_valid() == (scale == Address::no_scale),do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
246 "inconsistent address")do { if (!(!_index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 246, "assert(" "!_index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
247 }
248
249 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
250 : _base (base),
251 _index(noreg),
252 _xmmindex(index),
253 _scale(scale),
254 _disp(disp),
255 _isxmmindex(true) {
256 assert(!index->is_valid() == (scale == Address::no_scale),do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
257 "inconsistent address")do { if (!(!index->is_valid() == (scale == Address::no_scale
))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 257, "assert(" "!index->is_valid() == (scale == Address::no_scale)"
") failed", "inconsistent address"); ::breakpoint(); } } while
(0)
;
258 }
259
260 // The following overloads are used in connection with the
261 // ByteSize type (see sizes.hpp). They simplify the use of
262 // ByteSize'd arguments in assembly code.
263
264 Address(Register base, ByteSize disp)
265 : Address(base, in_bytes(disp)) {}
266
267 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
268 : Address(base, index, scale, in_bytes(disp)) {}
269
270 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
271 : Address(base, index, scale, in_bytes(disp)) {}
272
273 Address plus_disp(int disp) const {
274 Address a = (*this);
275 a._disp += disp;
276 return a;
277 }
278 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
279 Address a = (*this);
280 a._disp += disp.constant_or_zero() * scale_size(scale);
281 if (disp.is_register()) {
282 assert(!a.index()->is_valid(), "competing indexes")do { if (!(!a.index()->is_valid())) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 282, "assert(" "!a.index()->is_valid()" ") failed", "competing indexes"
); ::breakpoint(); } } while (0)
;
283 a._index = disp.as_register();
284 a._scale = scale;
285 }
286 return a;
287 }
288 bool is_same_address(Address a) const {
289 // disregard _rspec
290 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
291 }
292
293 // accessors
294 bool uses(Register reg) const { return _base == reg || _index == reg; }
295 Register base() const { return _base; }
296 Register index() const { return _index; }
297 XMMRegister xmmindex() const { return _xmmindex; }
298 ScaleFactor scale() const { return _scale; }
299 int disp() const { return _disp; }
300 bool isxmmindex() const { return _isxmmindex; }
301
302 // Convert the raw encoding form into the form expected by the constructor for
303 // Address. An index of 4 (rsp) corresponds to having no index, so convert
304 // that to noreg for the Address constructor.
305 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
306
307 static Address make_array(ArrayAddress);
308
309 private:
310 bool base_needs_rex() const {
311 return _base->is_valid() && _base->encoding() >= 8;
312 }
313
314 bool index_needs_rex() const {
315 return _index->is_valid() &&_index->encoding() >= 8;
316 }
317
318 bool xmmindex_needs_rex() const {
319 return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
320 }
321
322 relocInfo::relocType reloc() const { return _rspec.type(); }
323
324 friend class Assembler;
325 friend class MacroAssembler;
326 friend class LIR_Assembler; // base/index/scale/disp
327};
328
329//
330// AddressLiteral has been split out from Address because operands of this type
331// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
332// the few instructions that need to deal with address literals are unique and the
333// MacroAssembler does not have to implement every instruction in the Assembler
334// in order to search for address literals that may need special handling depending
335// on the instruction and the platform. As small step on the way to merging i486/amd64
336// directories.
337//
338class AddressLiteral {
339 friend class ArrayAddress;
340 RelocationHolder _rspec;
341 // Typically we use AddressLiterals we want to use their rval
342 // However in some situations we want the lval (effect address) of the item.
343 // We provide a special factory for making those lvals.
344 bool _is_lval;
345
346 // If the target is far we'll need to load the ea of this to
347 // a register to reach it. Otherwise if near we can do rip
348 // relative addressing.
349
350 address _target;
351
352 protected:
353 // creation
354 AddressLiteral()
355 : _is_lval(false),
356 _target(NULL__null)
357 {}
358
359 public:
360
361
362 AddressLiteral(address target, relocInfo::relocType rtype);
363
364 AddressLiteral(address target, RelocationHolder const& rspec)
365 : _rspec(rspec),
366 _is_lval(false),
367 _target(target)
368 {}
369
370 AddressLiteral addr() {
371 AddressLiteral ret = *this;
372 ret._is_lval = true;
373 return ret;
374 }
375
376
377 private:
378
379 address target() { return _target; }
380 bool is_lval() { return _is_lval; }
381
382 relocInfo::relocType reloc() const { return _rspec.type(); }
383 const RelocationHolder& rspec() const { return _rspec; }
384
385 friend class Assembler;
386 friend class MacroAssembler;
387 friend class Address;
388 friend class LIR_Assembler;
389};
390
391// Convience classes
392class RuntimeAddress: public AddressLiteral {
393
394 public:
395
396 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
397
398};
399
400class ExternalAddress: public AddressLiteral {
401 private:
402 static relocInfo::relocType reloc_for_target(address target) {
403 // Sometimes ExternalAddress is used for values which aren't
404 // exactly addresses, like the card table base.
405 // external_word_type can't be used for values in the first page
406 // so just skip the reloc in that case.
407 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
408 }
409
410 public:
411
412 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
413
414};
415
416class InternalAddress: public AddressLiteral {
417
418 public:
419
420 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
421
422};
423
424// x86 can do array addressing as a single operation since disp can be an absolute
425// address amd64 can't. We create a class that expresses the concept but does extra
426// magic on amd64 to get the final result
427
428class ArrayAddress {
429 private:
430
431 AddressLiteral _base;
432 Address _index;
433
434 public:
435
436 ArrayAddress() {};
437 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
438 AddressLiteral base() { return _base; }
439 Address index() { return _index; }
440
441};
442
443class InstructionAttr;
444
445// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
446// See fxsave and xsave(EVEX enabled) documentation for layout
447const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize)2688 / wordSize;
448
449// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
450// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
451// is what you get. The Assembler is generating code into a CodeBuffer.
452
453class Assembler : public AbstractAssembler {
454 friend class AbstractAssembler; // for the non-virtual hack
455 friend class LIR_Assembler; // as_Address()
456 friend class StubGenerator;
457
458 public:
459 enum Condition { // The x86 condition codes used for conditional jumps/moves.
460 zero = 0x4,
461 notZero = 0x5,
462 equal = 0x4,
463 notEqual = 0x5,
464 less = 0xc,
465 lessEqual = 0xe,
466 greater = 0xf,
467 greaterEqual = 0xd,
468 below = 0x2,
469 belowEqual = 0x6,
470 above = 0x7,
471 aboveEqual = 0x3,
472 overflow = 0x0,
473 noOverflow = 0x1,
474 carrySet = 0x2,
475 carryClear = 0x3,
476 negative = 0x8,
477 positive = 0x9,
478 parity = 0xa,
479 noParity = 0xb
480 };
481
482 enum Prefix {
483 // segment overrides
484 CS_segment = 0x2e,
485 SS_segment = 0x36,
486 DS_segment = 0x3e,
487 ES_segment = 0x26,
488 FS_segment = 0x64,
489 GS_segment = 0x65,
490
491 REX = 0x40,
492
493 REX_B = 0x41,
494 REX_X = 0x42,
495 REX_XB = 0x43,
496 REX_R = 0x44,
497 REX_RB = 0x45,
498 REX_RX = 0x46,
499 REX_RXB = 0x47,
500
501 REX_W = 0x48,
502
503 REX_WB = 0x49,
504 REX_WX = 0x4A,
505 REX_WXB = 0x4B,
506 REX_WR = 0x4C,
507 REX_WRB = 0x4D,
508 REX_WRX = 0x4E,
509 REX_WRXB = 0x4F,
510
511 VEX_3bytes = 0xC4,
512 VEX_2bytes = 0xC5,
513 EVEX_4bytes = 0x62,
514 Prefix_EMPTY = 0x0
515 };
516
517 enum VexPrefix {
518 VEX_B = 0x20,
519 VEX_X = 0x40,
520 VEX_R = 0x80,
521 VEX_W = 0x80
522 };
523
524 enum ExexPrefix {
525 EVEX_F = 0x04,
526 EVEX_V = 0x08,
527 EVEX_Rb = 0x10,
528 EVEX_X = 0x40,
529 EVEX_Z = 0x80
530 };
531
532 enum VexSimdPrefix {
533 VEX_SIMD_NONE = 0x0,
534 VEX_SIMD_66 = 0x1,
535 VEX_SIMD_F3 = 0x2,
536 VEX_SIMD_F2 = 0x3
537 };
538
539 enum VexOpcode {
540 VEX_OPCODE_NONE = 0x0,
541 VEX_OPCODE_0F = 0x1,
542 VEX_OPCODE_0F_38 = 0x2,
543 VEX_OPCODE_0F_3A = 0x3,
544 VEX_OPCODE_MASK = 0x1F
545 };
546
547 enum AvxVectorLen {
548 AVX_128bit = 0x0,
549 AVX_256bit = 0x1,
550 AVX_512bit = 0x2,
551 AVX_NoVec = 0x4
552 };
553
554 enum EvexTupleType {
555 EVEX_FV = 0,
556 EVEX_HV = 4,
557 EVEX_FVM = 6,
558 EVEX_T1S = 7,
559 EVEX_T1F = 11,
560 EVEX_T2 = 13,
561 EVEX_T4 = 15,
562 EVEX_T8 = 17,
563 EVEX_HVM = 18,
564 EVEX_QVM = 19,
565 EVEX_OVM = 20,
566 EVEX_M128 = 21,
567 EVEX_DUP = 22,
568 EVEX_ETUP = 23
569 };
570
571 enum EvexInputSizeInBits {
572 EVEX_8bit = 0,
573 EVEX_16bit = 1,
574 EVEX_32bit = 2,
575 EVEX_64bit = 3,
576 EVEX_NObit = 4
577 };
578
579 enum WhichOperand {
580 // input to locate_operand, and format code for relocations
581 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
582 disp32_operand = 1, // embedded 32-bit displacement or address
583 call32_operand = 2, // embedded 32-bit self-relative displacement
584#ifndef _LP641
585 _WhichOperand_limit = 3
586#else
587 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
588 _WhichOperand_limit = 4
589#endif
590 };
591
592 // Comparison predicates for integral types & FP types when using SSE
593 enum ComparisonPredicate {
594 eq = 0,
595 lt = 1,
596 le = 2,
597 _false = 3,
598 neq = 4,
599 nlt = 5,
600 nle = 6,
601 _true = 7
602 };
603
604 // Comparison predicates for FP types when using AVX
605 // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
606 // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
607 enum ComparisonPredicateFP {
608 EQ_OQ = 0,
609 LT_OS = 1,
610 LE_OS = 2,
611 UNORD_Q = 3,
612 NEQ_UQ = 4,
613 NLT_US = 5,
614 NLE_US = 6,
615 ORD_Q = 7,
616 EQ_UQ = 8,
617 NGE_US = 9,
618 NGT_US = 0xA,
619 FALSE_OQ = 0XB,
620 NEQ_OQ = 0xC,
621 GE_OS = 0xD,
622 GT_OS = 0xE,
623 TRUE_UQ = 0xF,
624 EQ_OS = 0x10,
625 LT_OQ = 0x11,
626 LE_OQ = 0x12,
627 UNORD_S = 0x13,
628 NEQ_US = 0x14,
629 NLT_UQ = 0x15,
630 NLE_UQ = 0x16,
631 ORD_S = 0x17,
632 EQ_US = 0x18,
633 NGE_UQ = 0x19,
634 NGT_UQ = 0x1A,
635 FALSE_OS = 0x1B,
636 NEQ_OS = 0x1C,
637 GE_OQ = 0x1D,
638 GT_OQ = 0x1E,
639 TRUE_US =0x1F
640 };
641
642 enum Width {
643 B = 0,
644 W = 1,
645 D = 2,
646 Q = 3
647 };
648
649 //---< calculate length of instruction >---
650 // As instruction size can't be found out easily on x86/x64,
651 // we just use '4' for len and maxlen.
652 // instruction must start at passed address
653 static unsigned int instr_len(unsigned char *instr) { return 4; }
654
655 //---< longest instructions >---
656 // Max instruction length is not specified in architecture documentation.
657 // We could use a "safe enough" estimate (15), but just default to
658 // instruction length guess from above.
659 static unsigned int instr_maxlen() { return 4; }
660
661 // NOTE: The general philopsophy of the declarations here is that 64bit versions
662 // of instructions are freely declared without the need for wrapping them an ifdef.
663 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
664 // In the .cpp file the implementations are wrapped so that they are dropped out
665 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
666 // to the size it was prior to merging up the 32bit and 64bit assemblers.
667 //
668 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
669 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
670
671private:
672
673 bool _legacy_mode_bw;
674 bool _legacy_mode_dq;
675 bool _legacy_mode_vl;
676 bool _legacy_mode_vlbw;
677 NOT_LP64(bool _is_managed;)
678
679 class InstructionAttr *_attributes;
680
681 // 64bit prefixes
682 void prefix(Register reg);
683 void prefix(Register dst, Register src, Prefix p);
684 void prefix(Register dst, Address adr, Prefix p);
685
686 void prefix(Address adr);
687 void prefix(Address adr, Register reg, bool byteinst = false);
688 void prefix(Address adr, XMMRegister reg);
689
690 int prefix_and_encode(int reg_enc, bool byteinst = false);
691 int prefix_and_encode(int dst_enc, int src_enc) {
692 return prefix_and_encode(dst_enc, false, src_enc, false);
693 }
694 int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
695
696 // Some prefixq variants always emit exactly one prefix byte, so besides a
697 // prefix-emitting method we provide a method to get the prefix byte to emit,
698 // which can then be folded into a byte stream.
699 int8_t get_prefixq(Address adr);
700 int8_t get_prefixq(Address adr, Register reg);
701
702 void prefixq(Address adr);
703 void prefixq(Address adr, Register reg);
704 void prefixq(Address adr, XMMRegister reg);
705
706 int prefixq_and_encode(int reg_enc);
707 int prefixq_and_encode(int dst_enc, int src_enc);
708
709 void rex_prefix(Address adr, XMMRegister xreg,
710 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
711 int rex_prefix_and_encode(int dst_enc, int src_enc,
712 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
713
714 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
715
716 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
717 int nds_enc, VexSimdPrefix pre, VexOpcode opc);
718
719 void vex_prefix(Address adr, int nds_enc, int xreg_enc,
720 VexSimdPrefix pre, VexOpcode opc,
721 InstructionAttr *attributes);
722
723 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
724 VexSimdPrefix pre, VexOpcode opc,
725 InstructionAttr *attributes);
726
727 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
728 VexOpcode opc, InstructionAttr *attributes);
729
730 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
731 VexOpcode opc, InstructionAttr *attributes);
732
733 // Helper functions for groups of instructions
734 void emit_arith_b(int op1, int op2, Register dst, int imm8);
735
736 void emit_arith(int op1, int op2, Register dst, int32_t imm32);
737 // Force generation of a 4 byte immediate value even if it fits into 8bit
738 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
739 void emit_arith(int op1, int op2, Register dst, Register src);
740
741 bool emit_compressed_disp_byte(int &disp);
742
743 void emit_modrm(int mod, int dst_enc, int src_enc);
744 void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
745 int disp);
746 void emit_modrm_sib(int mod, int dst_enc, int src_enc,
747 Address::ScaleFactor scale, int index_enc, int base_enc);
748 void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
749 Address::ScaleFactor scale, int index_enc, int base_enc,
750 int disp);
751
752 void emit_operand_helper(int reg_enc,
753 int base_enc, int index_enc, Address::ScaleFactor scale,
754 int disp,
755 RelocationHolder const& rspec,
756 int rip_relative_correction = 0);
757
758 void emit_operand(Register reg,
759 Register base, Register index, Address::ScaleFactor scale,
760 int disp,
761 RelocationHolder const& rspec,
762 int rip_relative_correction = 0);
763
764 void emit_operand(Register reg,
765 Register base, XMMRegister index, Address::ScaleFactor scale,
766 int disp,
767 RelocationHolder const& rspec);
768
769 void emit_operand(XMMRegister xreg,
770 Register base, XMMRegister xindex, Address::ScaleFactor scale,
771 int disp,
772 RelocationHolder const& rspec);
773
774 void emit_operand(Register reg, Address adr,
775 int rip_relative_correction = 0);
776
777 void emit_operand(XMMRegister reg,
778 Register base, Register index, Address::ScaleFactor scale,
779 int disp,
780 RelocationHolder const& rspec);
781
782 void emit_operand(XMMRegister reg, Address adr);
783
784 // Immediate-to-memory forms
785 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
786
787 protected:
788 #ifdef ASSERT1
789 void check_relocation(RelocationHolder const& rspec, int format);
790 #endif
791
792 void emit_data(jint data, relocInfo::relocType rtype, int format);
793 void emit_data(jint data, RelocationHolder const& rspec, int format);
794 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
795 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
796
797 bool reachable(AddressLiteral adr) NOT_LP64({ return true;});
798
799 // These are all easily abused and hence protected
800
801 // 32BIT ONLY SECTION
802#ifndef _LP641
803 // Make these disappear in 64bit mode since they would never be correct
804 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
805 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
806
807 void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
808 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
809
810 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
811#else
812 // 64BIT ONLY SECTION
813 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
814
815 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
816 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
817
818 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
819 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
820#endif // _LP64
821
822 // These are unique in that we are ensured by the caller that the 32bit
823 // relative in these instructions will always be able to reach the potentially
824 // 64bit address described by entry. Since they can take a 64bit address they
825 // don't have the 32 suffix like the other instructions in this class.
826
827 void call_literal(address entry, RelocationHolder const& rspec);
828 void jmp_literal(address entry, RelocationHolder const& rspec);
829
830 // Avoid using directly section
831 // Instructions in this section are actually usable by anyone without danger
832 // of failure but have performance issues that are addressed my enhanced
833 // instructions which will do the proper thing base on the particular cpu.
834 // We protect them because we don't trust you...
835
836 // Don't use next inc() and dec() methods directly. INC & DEC instructions
837 // could cause a partial flag stall since they don't set CF flag.
838 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
839 // which call inc() & dec() or add() & sub() in accordance with
840 // the product flag UseIncDec value.
841
842 void decl(Register dst);
843 void decl(Address dst);
844 void decq(Address dst);
845
846 void incl(Register dst);
847 void incl(Address dst);
848 void incq(Register dst);
849 void incq(Address dst);
850
851 // New cpus require use of movsd and movss to avoid partial register stall
852 // when loading from memory. But for old Opteron use movlpd instead of movsd.
853 // The selection is done in MacroAssembler::movdbl() and movflt().
854
855 // Move Scalar Single-Precision Floating-Point Values
856 void movss(XMMRegister dst, Address src);
857 void movss(XMMRegister dst, XMMRegister src);
858 void movss(Address dst, XMMRegister src);
859
860 // Move Scalar Double-Precision Floating-Point Values
861 void movsd(XMMRegister dst, Address src);
862 void movsd(XMMRegister dst, XMMRegister src);
863 void movsd(Address dst, XMMRegister src);
864 void movlpd(XMMRegister dst, Address src);
865
866 // New cpus require use of movaps and movapd to avoid partial register stall
867 // when moving between registers.
868 void movaps(XMMRegister dst, XMMRegister src);
869 void movapd(XMMRegister dst, XMMRegister src);
870
871 // End avoid using directly
872
873
874 // Instruction prefixes
875 void prefix(Prefix p);
876
877 public:
878
879 // Creation
880 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
881 init_attributes();
882 }
883
884 // Decoding
885 static address locate_operand(address inst, WhichOperand which);
886 static address locate_next_instruction(address inst);
887
888 // Utilities
889 static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
890 int cur_tuple_type, int in_size_in_bits, int cur_encoding);
891
892 // Generic instructions
893 // Does 32bit or 64bit as needed for the platform. In some sense these
894 // belong in macro assembler but there is no need for both varieties to exist
895
896 void init_attributes(void);
897
898 void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
899 void clear_attributes(void) { _attributes = NULL__null; }
900
901 void set_managed(void) { NOT_LP64(_is_managed = true;) }
902 void clear_managed(void) { NOT_LP64(_is_managed = false;) }
903 bool is_managed(void) {
904 NOT_LP64(return _is_managed;)
905 LP64_ONLY(return false;)return false; }
906
907 void lea(Register dst, Address src);
908
909 void mov(Register dst, Register src);
910
911#ifdef _LP641
912 // support caching the result of some routines
913
914 // must be called before pusha(), popa(), vzeroupper() - checked with asserts
915 static void precompute_instructions();
916
917 void pusha_uncached();
918 void popa_uncached();
919#endif
920 void vzeroupper_uncached();
921 void decq(Register dst);
922
923 void pusha();
924 void popa();
925
926 void pushf();
927 void popf();
928
929 void push(int32_t imm32);
930
931 void push(Register src);
932
933 void pop(Register dst);
934
935 // These are dummies to prevent surprise implicit conversions to Register
936 void push(void* v);
937 void pop(void* v);
938
939 // These do register sized moves/scans
940 void rep_mov();
941 void rep_stos();
942 void rep_stosb();
943 void repne_scan();
944#ifdef _LP641
945 void repne_scanl();
946#endif
947
948 // Vanilla instructions in lexical order
949
950 void adcl(Address dst, int32_t imm32);
951 void adcl(Address dst, Register src);
952 void adcl(Register dst, int32_t imm32);
953 void adcl(Register dst, Address src);
954 void adcl(Register dst, Register src);
955
956 void adcq(Register dst, int32_t imm32);
957 void adcq(Register dst, Address src);
958 void adcq(Register dst, Register src);
959
960 void addb(Address dst, int imm8);
961 void addw(Register dst, Register src);
962 void addw(Address dst, int imm16);
963
964 void addl(Address dst, int32_t imm32);
965 void addl(Address dst, Register src);
966 void addl(Register dst, int32_t imm32);
967 void addl(Register dst, Address src);
968 void addl(Register dst, Register src);
969
970 void addq(Address dst, int32_t imm32);
971 void addq(Address dst, Register src);
972 void addq(Register dst, int32_t imm32);
973 void addq(Register dst, Address src);
974 void addq(Register dst, Register src);
975
976#ifdef _LP641
977 //Add Unsigned Integers with Carry Flag
978 void adcxq(Register dst, Register src);
979
980 //Add Unsigned Integers with Overflow Flag
981 void adoxq(Register dst, Register src);
982#endif
983
984 void addr_nop_4();
985 void addr_nop_5();
986 void addr_nop_7();
987 void addr_nop_8();
988
989 // Add Scalar Double-Precision Floating-Point Values
990 void addsd(XMMRegister dst, Address src);
991 void addsd(XMMRegister dst, XMMRegister src);
992
993 // Add Scalar Single-Precision Floating-Point Values
994 void addss(XMMRegister dst, Address src);
995 void addss(XMMRegister dst, XMMRegister src);
996
997 // AES instructions
998 void aesdec(XMMRegister dst, Address src);
999 void aesdec(XMMRegister dst, XMMRegister src);
1000 void aesdeclast(XMMRegister dst, Address src);
1001 void aesdeclast(XMMRegister dst, XMMRegister src);
1002 void aesenc(XMMRegister dst, Address src);
1003 void aesenc(XMMRegister dst, XMMRegister src);
1004 void aesenclast(XMMRegister dst, Address src);
1005 void aesenclast(XMMRegister dst, XMMRegister src);
1006 // Vector AES instructions
1007 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1008 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1009 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1010 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1011
1012 void andw(Register dst, Register src);
1013 void andb(Address dst, Register src);
1014
1015 void andl(Address dst, int32_t imm32);
1016 void andl(Register dst, int32_t imm32);
1017 void andl(Register dst, Address src);
1018 void andl(Register dst, Register src);
1019 void andl(Address dst, Register src);
1020
1021 void andq(Address dst, int32_t imm32);
1022 void andq(Register dst, int32_t imm32);
1023 void andq(Register dst, Address src);
1024 void andq(Register dst, Register src);
1025 void andq(Address dst, Register src);
1026
1027 // BMI instructions
1028 void andnl(Register dst, Register src1, Register src2);
1029 void andnl(Register dst, Register src1, Address src2);
1030 void andnq(Register dst, Register src1, Register src2);
1031 void andnq(Register dst, Register src1, Address src2);
1032
1033 void blsil(Register dst, Register src);
1034 void blsil(Register dst, Address src);
1035 void blsiq(Register dst, Register src);
1036 void blsiq(Register dst, Address src);
1037
1038 void blsmskl(Register dst, Register src);
1039 void blsmskl(Register dst, Address src);
1040 void blsmskq(Register dst, Register src);
1041 void blsmskq(Register dst, Address src);
1042
1043 void blsrl(Register dst, Register src);
1044 void blsrl(Register dst, Address src);
1045 void blsrq(Register dst, Register src);
1046 void blsrq(Register dst, Address src);
1047
1048 void bsfl(Register dst, Register src);
1049 void bsrl(Register dst, Register src);
1050
1051#ifdef _LP641
1052 void bsfq(Register dst, Register src);
1053 void bsrq(Register dst, Register src);
1054#endif
1055
1056 void bswapl(Register reg);
1057
1058 void bswapq(Register reg);
1059
1060 void call(Label& L, relocInfo::relocType rtype);
1061 void call(Register reg); // push pc; pc <- reg
1062 void call(Address adr); // push pc; pc <- adr
1063
1064 void cdql();
1065
1066 void cdqq();
1067
1068 void cld();
1069
1070 void clflush(Address adr);
1071 void clflushopt(Address adr);
1072 void clwb(Address adr);
1073
1074 void cmovl(Condition cc, Register dst, Register src);
1075 void cmovl(Condition cc, Register dst, Address src);
1076
1077 void cmovq(Condition cc, Register dst, Register src);
1078 void cmovq(Condition cc, Register dst, Address src);
1079
1080
1081 void cmpb(Address dst, int imm8);
1082
1083 void cmpl(Address dst, int32_t imm32);
1084
1085 void cmp(Register dst, int32_t imm32);
1086 void cmpl(Register dst, int32_t imm32);
1087 void cmpl(Register dst, Register src);
1088 void cmpl(Register dst, Address src);
1089
1090 void cmpq(Address dst, int32_t imm32);
1091 void cmpq(Address dst, Register src);
1092
1093 void cmpq(Register dst, int32_t imm32);
1094 void cmpq(Register dst, Register src);
1095 void cmpq(Register dst, Address src);
1096
1097 // these are dummies used to catch attempting to convert NULL to Register
1098 void cmpl(Register dst, void* junk); // dummy
1099 void cmpq(Register dst, void* junk); // dummy
1100
1101 void cmpw(Address dst, int imm16);
1102
1103 void cmpxchg8 (Address adr);
1104
1105 void cmpxchgb(Register reg, Address adr);
1106 void cmpxchgl(Register reg, Address adr);
1107
1108 void cmpxchgq(Register reg, Address adr);
1109 void cmpxchgw(Register reg, Address adr);
1110
1111 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1112 void comisd(XMMRegister dst, Address src);
1113 void comisd(XMMRegister dst, XMMRegister src);
1114
1115 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1116 void comiss(XMMRegister dst, Address src);
1117 void comiss(XMMRegister dst, XMMRegister src);
1118
1119 // Identify processor type and features
1120 void cpuid();
1121
1122 // CRC32C
1123 void crc32(Register crc, Register v, int8_t sizeInBytes);
1124 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1125
1126 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1127 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1128 void cvtsd2ss(XMMRegister dst, Address src);
1129
1130 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1131 void cvtsi2sdl(XMMRegister dst, Register src);
1132 void cvtsi2sdl(XMMRegister dst, Address src);
1133 void cvtsi2sdq(XMMRegister dst, Register src);
1134 void cvtsi2sdq(XMMRegister dst, Address src);
1135
1136 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1137 void cvtsi2ssl(XMMRegister dst, Register src);
1138 void cvtsi2ssl(XMMRegister dst, Address src);
1139 void cvtsi2ssq(XMMRegister dst, Register src);
1140 void cvtsi2ssq(XMMRegister dst, Address src);
1141
1142 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1143 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1144 void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1145
1146 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1147 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1148 void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1149
1150 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1151 void cvtss2sd(XMMRegister dst, XMMRegister src);
1152 void cvtss2sd(XMMRegister dst, Address src);
1153
1154 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1155 void cvttsd2sil(Register dst, Address src);
1156 void cvttsd2sil(Register dst, XMMRegister src);
1157 void cvttsd2siq(Register dst, Address src);
1158 void cvttsd2siq(Register dst, XMMRegister src);
1159
1160 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1161 void cvttss2sil(Register dst, XMMRegister src);
1162 void cvttss2siq(Register dst, XMMRegister src);
1163
1164 // Convert vector double to int
1165 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1166
1167 // Convert vector float and double
1168 void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1169 void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1170
1171 // Convert vector float and int
1172 void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1173
1174 // Convert vector long to vector FP
1175 void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1176 void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1177
1178 // Convert vector double to long
1179 void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
1180
1181 // Evex casts with truncation
1182 void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1183 void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1184 void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1185 void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1186 void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1187 void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1188
1189 //Abs of packed Integer values
1190 void pabsb(XMMRegister dst, XMMRegister src);
1191 void pabsw(XMMRegister dst, XMMRegister src);
1192 void pabsd(XMMRegister dst, XMMRegister src);
1193 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1194 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1195 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1196 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1197
1198 // Divide Scalar Double-Precision Floating-Point Values
1199 void divsd(XMMRegister dst, Address src);
1200 void divsd(XMMRegister dst, XMMRegister src);
1201
1202 // Divide Scalar Single-Precision Floating-Point Values
1203 void divss(XMMRegister dst, Address src);
1204 void divss(XMMRegister dst, XMMRegister src);
1205
1206
1207#ifndef _LP641
1208 private:
1209
1210 void emit_farith(int b1, int b2, int i);
1211
1212 public:
1213 void emms();
1214
1215 void fabs();
1216
1217 void fadd(int i);
1218
1219 void fadd_d(Address src);
1220 void fadd_s(Address src);
1221
1222 // "Alternate" versions of x87 instructions place result down in FPU
1223 // stack instead of on TOS
1224
1225 void fadda(int i); // "alternate" fadd
1226 void faddp(int i = 1);
1227
1228 void fchs();
1229
1230 void fcom(int i);
1231
1232 void fcomp(int i = 1);
1233 void fcomp_d(Address src);
1234 void fcomp_s(Address src);
1235
1236 void fcompp();
1237
1238 void fcos();
1239
1240 void fdecstp();
1241
1242 void fdiv(int i);
1243 void fdiv_d(Address src);
1244 void fdivr_s(Address src);
1245 void fdiva(int i); // "alternate" fdiv
1246 void fdivp(int i = 1);
1247
1248 void fdivr(int i);
1249 void fdivr_d(Address src);
1250 void fdiv_s(Address src);
1251
1252 void fdivra(int i); // "alternate" reversed fdiv
1253
1254 void fdivrp(int i = 1);
1255
1256 void ffree(int i = 0);
1257
1258 void fild_d(Address adr);
1259 void fild_s(Address adr);
1260
1261 void fincstp();
1262
1263 void finit();
1264
1265 void fist_s (Address adr);
1266 void fistp_d(Address adr);
1267 void fistp_s(Address adr);
1268
1269 void fld1();
1270
1271 void fld_d(Address adr);
1272 void fld_s(Address adr);
1273 void fld_s(int index);
1274
1275 void fldcw(Address src);
1276
1277 void fldenv(Address src);
1278
1279 void fldlg2();
1280
1281 void fldln2();
1282
1283 void fldz();
1284
1285 void flog();
1286 void flog10();
1287
1288 void fmul(int i);
1289
1290 void fmul_d(Address src);
1291 void fmul_s(Address src);
1292
1293 void fmula(int i); // "alternate" fmul
1294
1295 void fmulp(int i = 1);
1296
1297 void fnsave(Address dst);
1298
1299 void fnstcw(Address src);
1300
1301 void fnstsw_ax();
1302
1303 void fprem();
1304 void fprem1();
1305
1306 void frstor(Address src);
1307
1308 void fsin();
1309
1310 void fsqrt();
1311
1312 void fst_d(Address adr);
1313 void fst_s(Address adr);
1314
1315 void fstp_d(Address adr);
1316 void fstp_d(int index);
1317 void fstp_s(Address adr);
1318
1319 void fsub(int i);
1320 void fsub_d(Address src);
1321 void fsub_s(Address src);
1322
1323 void fsuba(int i); // "alternate" fsub
1324
1325 void fsubp(int i = 1);
1326
1327 void fsubr(int i);
1328 void fsubr_d(Address src);
1329 void fsubr_s(Address src);
1330
1331 void fsubra(int i); // "alternate" reversed fsub
1332
1333 void fsubrp(int i = 1);
1334
1335 void ftan();
1336
1337 void ftst();
1338
1339 void fucomi(int i = 1);
1340 void fucomip(int i = 1);
1341
1342 void fwait();
1343
1344 void fxch(int i = 1);
1345
1346 void fyl2x();
1347 void frndint();
1348 void f2xm1();
1349 void fldl2e();
1350#endif // !_LP64
1351
1352 // operands that only take the original 32bit registers
1353 void emit_operand32(Register reg, Address adr);
1354
1355 void fld_x(Address adr); // extended-precision (80-bit) format
1356 void fstp_x(Address adr); // extended-precision (80-bit) format
1357 void fxrstor(Address src);
1358 void xrstor(Address src);
1359
1360 void fxsave(Address dst);
1361 void xsave(Address dst);
1362
1363 void hlt();
1364
1365 void idivl(Register src);
1366 void divl(Register src); // Unsigned division
1367
1368#ifdef _LP641
1369 void idivq(Register src);
1370#endif
1371
1372 void imull(Register src);
1373 void imull(Register dst, Register src);
1374 void imull(Register dst, Register src, int value);
1375 void imull(Register dst, Address src, int value);
1376 void imull(Register dst, Address src);
1377
1378#ifdef _LP641
1379 void imulq(Register dst, Register src);
1380 void imulq(Register dst, Register src, int value);
1381 void imulq(Register dst, Address src, int value);
1382 void imulq(Register dst, Address src);
1383 void imulq(Register dst);
1384#endif
1385
1386 // jcc is the generic conditional branch generator to run-
1387 // time routines, jcc is used for branches to labels. jcc
1388 // takes a branch opcode (cc) and a label (L) and generates
1389 // either a backward branch or a forward branch and links it
1390 // to the label fixup chain. Usage:
1391 //
1392 // Label L; // unbound label
1393 // jcc(cc, L); // forward branch to unbound label
1394 // bind(L); // bind label to the current pc
1395 // jcc(cc, L); // backward branch to bound label
1396 // bind(L); // illegal: a label may be bound only once
1397 //
1398 // Note: The same Label can be used for forward and backward branches
1399 // but it may be bound only once.
1400
1401 void jcc(Condition cc, Label& L, bool maybe_short = true);
1402
1403 // Conditional jump to a 8-bit offset to L.
1404 // WARNING: be very careful using this for forward jumps. If the label is
1405 // not bound within an 8-bit offset of this instruction, a run-time error
1406 // will occur.
1407
1408 // Use macro to record file and line number.
1409 #define jccb(cc, L)jccb_0(cc, L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1409)
jccb_0(cc, L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1409)
1410
1411 void jccb_0(Condition cc, Label& L, const char* file, int line);
1412
1413 void jmp(Address entry); // pc <- entry
1414
1415 // Label operations & relative jumps (PPUM Appendix D)
1416 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1417
1418 void jmp(Register entry); // pc <- entry
1419
1420 // Unconditional 8-bit offset jump to L.
1421 // WARNING: be very careful using this for forward jumps. If the label is
1422 // not bound within an 8-bit offset of this instruction, a run-time error
1423 // will occur.
1424
1425 // Use macro to record file and line number.
1426 #define jmpb(L)jmpb_0(L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp"
, 1426)
jmpb_0(L, __FILE__"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/assembler_x86.hpp", __LINE__1426)
1427
1428 void jmpb_0(Label& L, const char* file, int line);
1429
1430 void ldmxcsr( Address src );
1431
1432 void leal(Register dst, Address src);
1433
1434 void leaq(Register dst, Address src);
1435
1436 void lfence();
1437
1438 void lock();
1439 void size_prefix();
1440
1441 void lzcntl(Register dst, Register src);
1442
1443#ifdef _LP641
1444 void lzcntq(Register dst, Register src);
1445#endif
1446
1447 enum Membar_mask_bits {
1448 StoreStore = 1 << 3,
1449 LoadStore = 1 << 2,
1450 StoreLoad = 1 << 1,
1451 LoadLoad = 1 << 0
1452 };
1453
1454 // Serializes memory and blows flags
1455 void membar(Membar_mask_bits order_constraint);
1456
1457 void mfence();
1458 void sfence();
1459
1460 // Moves
1461
1462 void mov64(Register dst, int64_t imm64);
1463 void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1464
1465 void movb(Address dst, Register src);
1466 void movb(Address dst, int imm8);
1467 void movb(Register dst, Address src);
1468
1469 void movddup(XMMRegister dst, XMMRegister src);
1470
1471 void kandbl(KRegister dst, KRegister src1, KRegister src2);
1472 void kandwl(KRegister dst, KRegister src1, KRegister src2);
1473 void kanddl(KRegister dst, KRegister src1, KRegister src2);
1474 void kandql(KRegister dst, KRegister src1, KRegister src2);
1475
1476 void korbl(KRegister dst, KRegister src1, KRegister src2);
1477 void korwl(KRegister dst, KRegister src1, KRegister src2);
1478 void kordl(KRegister dst, KRegister src1, KRegister src2);
1479 void korql(KRegister dst, KRegister src1, KRegister src2);
1480
1481 void kxorbl(KRegister dst, KRegister src1, KRegister src2);
1482 void kxorwl(KRegister dst, KRegister src1, KRegister src2);
1483 void kxordl(KRegister dst, KRegister src1, KRegister src2);
1484 void kxorql(KRegister dst, KRegister src1, KRegister src2);
1485 void kmovbl(KRegister dst, Register src);
1486 void kmovbl(Register dst, KRegister src);
1487 void kmovbl(KRegister dst, KRegister src);
1488 void kmovwl(KRegister dst, Register src);
1489 void kmovwl(KRegister dst, Address src);
1490 void kmovwl(Register dst, KRegister src);
1491 void kmovwl(Address dst, KRegister src);
1492 void kmovwl(KRegister dst, KRegister src);
1493 void kmovdl(KRegister dst, Register src);
1494 void kmovdl(Register dst, KRegister src);
1495 void kmovql(KRegister dst, KRegister src);
1496 void kmovql(Address dst, KRegister src);
1497 void kmovql(KRegister dst, Address src);
1498 void kmovql(KRegister dst, Register src);
1499 void kmovql(Register dst, KRegister src);
1500
1501 void knotbl(KRegister dst, KRegister src);
1502 void knotwl(KRegister dst, KRegister src);
1503 void knotdl(KRegister dst, KRegister src);
1504 void knotql(KRegister dst, KRegister src);
1505
1506 void kortestbl(KRegister dst, KRegister src);
1507 void kortestwl(KRegister dst, KRegister src);
1508 void kortestdl(KRegister dst, KRegister src);
1509 void kortestql(KRegister dst, KRegister src);
1510
1511 void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
1512 void kshiftlbl(KRegister dst, KRegister src, int imm8);
1513 void kshiftrbl(KRegister dst, KRegister src, int imm8);
1514 void kshiftrwl(KRegister dst, KRegister src, int imm8);
1515 void kshiftrdl(KRegister dst, KRegister src, int imm8);
1516 void kshiftrql(KRegister dst, KRegister src, int imm8);
1517 void ktestq(KRegister src1, KRegister src2);
1518 void ktestd(KRegister src1, KRegister src2);
1519
1520 void ktestql(KRegister dst, KRegister src);
1521 void ktestdl(KRegister dst, KRegister src);
1522 void ktestwl(KRegister dst, KRegister src);
1523 void ktestbl(KRegister dst, KRegister src);
1524
1525 void movdl(XMMRegister dst, Register src);
1526 void movdl(Register dst, XMMRegister src);
1527 void movdl(XMMRegister dst, Address src);
1528 void movdl(Address dst, XMMRegister src);
1529
1530 // Move Double Quadword
1531 void movdq(XMMRegister dst, Register src);
1532 void movdq(Register dst, XMMRegister src);
1533
1534 // Move Aligned Double Quadword
1535 void movdqa(XMMRegister dst, XMMRegister src);
1536 void movdqa(XMMRegister dst, Address src);
1537
1538 // Move Unaligned Double Quadword
1539 void movdqu(Address dst, XMMRegister src);
1540 void movdqu(XMMRegister dst, Address src);
1541 void movdqu(XMMRegister dst, XMMRegister src);
1542
1543 // Move Unaligned 256bit Vector
1544 void vmovdqu(Address dst, XMMRegister src);
1545 void vmovdqu(XMMRegister dst, Address src);
1546 void vmovdqu(XMMRegister dst, XMMRegister src);
1547
1548 // Move Unaligned 512bit Vector
1549 void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
1550 void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
1551 void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
1552 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1553 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1554 void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
1555 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1556 void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
1557 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1558 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1559 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1560 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1561 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1562 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1563 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1564 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1565 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1566 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1567 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1568 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1569 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1570
1571 // Move lower 64bit to high 64bit in 128bit register
1572 void movlhps(XMMRegister dst, XMMRegister src);
1573
1574 void movl(Register dst, int32_t imm32);
1575 void movl(Address dst, int32_t imm32);
1576 void movl(Register dst, Register src);
1577 void movl(Register dst, Address src);
1578 void movl(Address dst, Register src);
1579
1580 // These dummies prevent using movl from converting a zero (like NULL) into Register
1581 // by giving the compiler two choices it can't resolve
1582
1583 void movl(Address dst, void* junk);
1584 void movl(Register dst, void* junk);
1585
1586#ifdef _LP641
1587 void movq(Register dst, Register src);
1588 void movq(Register dst, Address src);
1589 void movq(Address dst, Register src);
1590 void movq(Address dst, int32_t imm32);
1591 void movq(Register dst, int32_t imm32);
1592
1593 // These dummies prevent using movq from converting a zero (like NULL) into Register
1594 // by giving the compiler two choices it can't resolve
1595
1596 void movq(Address dst, void* dummy);
1597 void movq(Register dst, void* dummy);
1598#endif
1599
1600 // Move Quadword
1601 void movq(Address dst, XMMRegister src);
1602 void movq(XMMRegister dst, Address src);
1603 void movq(XMMRegister dst, XMMRegister src);
1604 void movq(Register dst, XMMRegister src);
1605 void movq(XMMRegister dst, Register src);
1606
1607 void movsbl(Register dst, Address src);
1608 void movsbl(Register dst, Register src);
1609
1610#ifdef _LP641
1611 void movsbq(Register dst, Address src);
1612 void movsbq(Register dst, Register src);
1613
1614 // Move signed 32bit immediate to 64bit extending sign
1615 void movslq(Address dst, int32_t imm64);
1616 void movslq(Register dst, int32_t imm64);
1617
1618 void movslq(Register dst, Address src);
1619 void movslq(Register dst, Register src);
1620 void movslq(Register dst, void* src); // Dummy declaration to cause NULL to be ambiguous
1621#endif
1622
1623 void movswl(Register dst, Address src);
1624 void movswl(Register dst, Register src);
1625
1626#ifdef _LP641
1627 void movswq(Register dst, Address src);
1628 void movswq(Register dst, Register src);
1629#endif
1630
1631 void movw(Address dst, int imm16);
1632 void movw(Register dst, Address src);
1633 void movw(Address dst, Register src);
1634
1635 void movzbl(Register dst, Address src);
1636 void movzbl(Register dst, Register src);
1637
1638#ifdef _LP641
1639 void movzbq(Register dst, Address src);
1640 void movzbq(Register dst, Register src);
1641#endif
1642
1643 void movzwl(Register dst, Address src);
1644 void movzwl(Register dst, Register src);
1645
1646#ifdef _LP641
1647 void movzwq(Register dst, Address src);
1648 void movzwq(Register dst, Register src);
1649#endif
1650
1651 // Unsigned multiply with RAX destination register
1652 void mull(Address src);
1653 void mull(Register src);
1654
1655#ifdef _LP641
1656 void mulq(Address src);
1657 void mulq(Register src);
1658 void mulxq(Register dst1, Register dst2, Register src);
1659#endif
1660
1661 // Multiply Scalar Double-Precision Floating-Point Values
1662 void mulsd(XMMRegister dst, Address src);
1663 void mulsd(XMMRegister dst, XMMRegister src);
1664
1665 // Multiply Scalar Single-Precision Floating-Point Values
1666 void mulss(XMMRegister dst, Address src);
1667 void mulss(XMMRegister dst, XMMRegister src);
1668
1669 void negl(Register dst);
1670 void negl(Address dst);
1671
1672#ifdef _LP641
1673 void negq(Register dst);
1674 void negq(Address dst);
1675#endif
1676
1677 void nop(int i = 1);
1678
1679 void notl(Register dst);
1680
1681#ifdef _LP641
1682 void notq(Register dst);
1683
1684 void btsq(Address dst, int imm8);
1685 void btrq(Address dst, int imm8);
1686#endif
1687
1688 void orw(Register dst, Register src);
1689
1690 void orl(Address dst, int32_t imm32);
1691 void orl(Register dst, int32_t imm32);
1692 void orl(Register dst, Address src);
1693 void orl(Register dst, Register src);
1694 void orl(Address dst, Register src);
1695
1696 void orb(Address dst, int imm8);
1697 void orb(Address dst, Register src);
1698
1699 void orq(Address dst, int32_t imm32);
1700 void orq(Address dst, Register src);
1701 void orq(Register dst, int32_t imm32);
1702 void orq(Register dst, Address src);
1703 void orq(Register dst, Register src);
1704
1705 // Pack with signed saturation
1706 void packsswb(XMMRegister dst, XMMRegister src);
1707 void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1708 void packssdw(XMMRegister dst, XMMRegister src);
1709 void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1710
1711 // Pack with unsigned saturation
1712 void packuswb(XMMRegister dst, XMMRegister src);
1713 void packuswb(XMMRegister dst, Address src);
1714 void packusdw(XMMRegister dst, XMMRegister src);
1715 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1716 void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1717
1718 // Permutations
1719 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1720 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1721 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1722 void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1723 void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1724 void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1725 void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1726 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1727 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1728 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1729 void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1730 void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1731 void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1732 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1733 void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1734 void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);
1735
1736 void pause();
1737
1738 // Undefined Instruction
1739 void ud2();
1740
1741 // SSE4.2 string instructions
1742 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1743 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1744
1745 void pcmpeqb(XMMRegister dst, XMMRegister src);
1746 void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1747
1748 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1749 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1750 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1751 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1752
1753 void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1754 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1755 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1756
1757 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1758 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1759
1760 void pcmpeqw(XMMRegister dst, XMMRegister src);
1761 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1762 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1763 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1764
1765 void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1766
1767 void pcmpeqd(XMMRegister dst, XMMRegister src);
1768 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1769 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1770 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1771
1772 void pcmpeqq(XMMRegister dst, XMMRegister src);
1773 void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1774 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1775 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1776 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1777
1778 void pcmpgtq(XMMRegister dst, XMMRegister src);
1779 void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1780
1781 void pmovmskb(Register dst, XMMRegister src);
1782 void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1783 void vmovmskps(Register dst, XMMRegister src, int vec_enc);
1784 void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
1785 void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1786
1787 // SSE 4.1 extract
1788 void pextrd(Register dst, XMMRegister src, int imm8);
1789 void pextrq(Register dst, XMMRegister src, int imm8);
1790 void pextrd(Address dst, XMMRegister src, int imm8);
1791 void pextrq(Address dst, XMMRegister src, int imm8);
1792 void pextrb(Register dst, XMMRegister src, int imm8);
1793 void pextrb(Address dst, XMMRegister src, int imm8);
1794 // SSE 2 extract
1795 void pextrw(Register dst, XMMRegister src, int imm8);
1796 void pextrw(Address dst, XMMRegister src, int imm8);
1797
1798 // SSE 4.1 insert
1799 void pinsrd(XMMRegister dst, Register src, int imm8);
1800 void pinsrq(XMMRegister dst, Register src, int imm8);
1801 void pinsrb(XMMRegister dst, Register src, int imm8);
1802 void pinsrd(XMMRegister dst, Address src, int imm8);
1803 void pinsrq(XMMRegister dst, Address src, int imm8);
1804 void pinsrb(XMMRegister dst, Address src, int imm8);
1805 void insertps(XMMRegister dst, XMMRegister src, int imm8);
1806 // SSE 2 insert
1807 void pinsrw(XMMRegister dst, Register src, int imm8);
1808 void pinsrw(XMMRegister dst, Address src, int imm8);
1809
1810 // AVX insert
1811 void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1812 void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1813 void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1814 void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1815 void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1816
1817 // Zero extend moves
1818 void pmovzxbw(XMMRegister dst, XMMRegister src);
1819 void pmovzxbw(XMMRegister dst, Address src);
1820 void pmovzxbd(XMMRegister dst, XMMRegister src);
1821 void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
1822 void pmovzxdq(XMMRegister dst, XMMRegister src);
1823 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1824 void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1825 void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1826 void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1827 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1828
1829 // Sign extend moves
1830 void pmovsxbd(XMMRegister dst, XMMRegister src);
1831 void pmovsxbq(XMMRegister dst, XMMRegister src);
1832 void pmovsxbw(XMMRegister dst, XMMRegister src);
1833 void pmovsxwd(XMMRegister dst, XMMRegister src);
1834 void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1835 void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1836 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1837 void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1838 void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1839 void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1840
1841 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1842 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1843
1844 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1845
1846 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1847
1848 // Multiply add
1849 void pmaddwd(XMMRegister dst, XMMRegister src);
1850 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1851 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1852
1853 // Multiply add accumulate
1854 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1855
1856#ifndef _LP641 // no 32bit push/pop on amd64
1857 void popl(Address dst);
1858#endif
1859
1860#ifdef _LP641
1861 void popq(Address dst);
1862 void popq(Register dst);
1863#endif
1864
1865 void popcntl(Register dst, Address src);
1866 void popcntl(Register dst, Register src);
1867
1868 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1869
1870#ifdef _LP641
1871 void popcntq(Register dst, Address src);
1872 void popcntq(Register dst, Register src);
1873#endif
1874
1875 // Prefetches (SSE, SSE2, 3DNOW only)
1876
1877 void prefetchnta(Address src);
1878 void prefetchr(Address src);
1879 void prefetcht0(Address src);
1880 void prefetcht1(Address src);
1881 void prefetcht2(Address src);
1882 void prefetchw(Address src);
1883
1884 // Shuffle Bytes
1885 void pshufb(XMMRegister dst, XMMRegister src);
1886 void pshufb(XMMRegister dst, Address src);
1887 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1888
1889 // Shuffle Packed Doublewords
1890 void pshufd(XMMRegister dst, XMMRegister src, int mode);
1891 void pshufd(XMMRegister dst, Address src, int mode);
1892 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
1893
1894 // Shuffle Packed High/Low Words
1895 void pshufhw(XMMRegister dst, XMMRegister src, int mode);
1896 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
1897 void pshuflw(XMMRegister dst, Address src, int mode);
1898
1899 //shuffle floats and doubles
1900 void pshufps(XMMRegister, XMMRegister, int);
1901 void pshufpd(XMMRegister, XMMRegister, int);
1902 void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
1903 void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
1904
1905 // Shuffle packed values at 128 bit granularity
1906 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
1907
1908 // Shift Right by bytes Logical DoubleQuadword Immediate
1909 void psrldq(XMMRegister dst, int shift);
1910 // Shift Left by bytes Logical DoubleQuadword Immediate
1911 void pslldq(XMMRegister dst, int shift);
1912
1913 // Logical Compare 128bit
1914 void ptest(XMMRegister dst, XMMRegister src);
1915 void ptest(XMMRegister dst, Address src);
1916 // Logical Compare 256bit
1917 void vptest(XMMRegister dst, XMMRegister src);
1918 void vptest(XMMRegister dst, Address src);
1919
1920 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1921
1922 // Vector compare
1923 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1924
1925 // Interleave Low Bytes
1926 void punpcklbw(XMMRegister dst, XMMRegister src);
1927 void punpcklbw(XMMRegister dst, Address src);
1928
1929 // Interleave Low Doublewords
1930 void punpckldq(XMMRegister dst, XMMRegister src);
1931 void punpckldq(XMMRegister dst, Address src);
1932
1933 // Interleave Low Quadwords
1934 void punpcklqdq(XMMRegister dst, XMMRegister src);
1935
1936#ifndef _LP641 // no 32bit push/pop on amd64
1937 void pushl(Address src);
1938#endif
1939
1940 void pushq(Address src);
1941
1942 void rcll(Register dst, int imm8);
1943
1944 void rclq(Register dst, int imm8);
1945
1946 void rcrq(Register dst, int imm8);
1947
1948 void rcpps(XMMRegister dst, XMMRegister src);
1949
1950 void rcpss(XMMRegister dst, XMMRegister src);
1951
1952 void rdtsc();
1953
1954 void ret(int imm16);
1955
1956 void roll(Register dst);
1957
1958 void roll(Register dst, int imm8);
1959
1960 void rorl(Register dst);
1961
1962 void rorl(Register dst, int imm8);
1963
1964#ifdef _LP641
1965 void rolq(Register dst);
1966 void rolq(Register dst, int imm8);
1967 void rorq(Register dst);
1968 void rorq(Register dst, int imm8);
1969 void rorxq(Register dst, Register src, int imm8);
1970 void rorxd(Register dst, Register src, int imm8);
1971#endif
1972
1973 void sahf();
1974
1975 void sall(Register dst, int imm8);
1976 void sall(Register dst);
1977 void sall(Address dst, int imm8);
1978 void sall(Address dst);
1979
1980 void sarl(Address dst, int imm8);
1981 void sarl(Address dst);
1982 void sarl(Register dst, int imm8);
1983 void sarl(Register dst);
1984
1985#ifdef _LP641
1986 void salq(Register dst, int imm8);
1987 void salq(Register dst);
1988 void salq(Address dst, int imm8);
1989 void salq(Address dst);
1990
1991 void sarq(Address dst, int imm8);
1992 void sarq(Address dst);
1993 void sarq(Register dst, int imm8);
1994 void sarq(Register dst);
1995#endif
1996
1997 void sbbl(Address dst, int32_t imm32);
1998 void sbbl(Register dst, int32_t imm32);
1999 void sbbl(Register dst, Address src);
2000 void sbbl(Register dst, Register src);
2001
2002 void sbbq(Address dst, int32_t imm32);
2003 void sbbq(Register dst, int32_t imm32);
2004 void sbbq(Register dst, Address src);
2005 void sbbq(Register dst, Register src);
2006
2007 void setb(Condition cc, Register dst);
2008
2009 void sete(Register dst);
2010 void setl(Register dst);
2011 void setne(Register dst);
2012
2013 void palignr(XMMRegister dst, XMMRegister src, int imm8);
2014 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2015 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2016
2017 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
2018 void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2019
2020 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
2021 void sha1nexte(XMMRegister dst, XMMRegister src);
2022 void sha1msg1(XMMRegister dst, XMMRegister src);
2023 void sha1msg2(XMMRegister dst, XMMRegister src);
2024 // xmm0 is implicit additional source to the following instruction.
2025 void sha256rnds2(XMMRegister dst, XMMRegister src);
2026 void sha256msg1(XMMRegister dst, XMMRegister src);
2027 void sha256msg2(XMMRegister dst, XMMRegister src);
2028
2029 void shldl(Register dst, Register src);
2030 void shldl(Register dst, Register src, int8_t imm8);
2031 void shrdl(Register dst, Register src);
2032 void shrdl(Register dst, Register src, int8_t imm8);
2033
2034 void shll(Register dst, int imm8);
2035 void shll(Register dst);
2036
2037 void shlq(Register dst, int imm8);
2038 void shlq(Register dst);
2039
2040 void shrl(Register dst, int imm8);
2041 void shrl(Register dst);
2042 void shrl(Address dst);
2043 void shrl(Address dst, int imm8);
2044
2045 void shrq(Register dst, int imm8);
2046 void shrq(Register dst);
2047 void shrq(Address dst);
2048 void shrq(Address dst, int imm8);
2049
2050 void smovl(); // QQQ generic?
2051
2052 // Compute Square Root of Scalar Double-Precision Floating-Point Value
2053 void sqrtsd(XMMRegister dst, Address src);
2054 void sqrtsd(XMMRegister dst, XMMRegister src);
2055
2056 void roundsd(XMMRegister dst, Address src, int32_t rmode);
2057 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2058
2059 // Compute Square Root of Scalar Single-Precision Floating-Point Value
2060 void sqrtss(XMMRegister dst, Address src);
2061 void sqrtss(XMMRegister dst, XMMRegister src);
2062
2063 void std();
2064
2065 void stmxcsr( Address dst );
2066
2067 void subl(Address dst, int32_t imm32);
2068 void subl(Address dst, Register src);
2069 void subl(Register dst, int32_t imm32);
2070 void subl(Register dst, Address src);
2071 void subl(Register dst, Register src);
2072
2073 void subq(Address dst, int32_t imm32);
2074 void subq(Address dst, Register src);
2075 void subq(Register dst, int32_t imm32);
2076 void subq(Register dst, Address src);
2077 void subq(Register dst, Register src);
2078
2079 // Force generation of a 4 byte immediate value even if it fits into 8bit
2080 void subl_imm32(Register dst, int32_t imm32);
2081 void subq_imm32(Register dst, int32_t imm32);
2082
2083 // Subtract Scalar Double-Precision Floating-Point Values
2084 void subsd(XMMRegister dst, Address src);
2085 void subsd(XMMRegister dst, XMMRegister src);
2086
2087 // Subtract Scalar Single-Precision Floating-Point Values
2088 void subss(XMMRegister dst, Address src);
2089 void subss(XMMRegister dst, XMMRegister src);
2090
2091 void testb(Register dst, int imm8);
2092 void testb(Address dst, int imm8);
2093
2094 void testl(Register dst, int32_t imm32);
2095 void testl(Register dst, Register src);
2096 void testl(Register dst, Address src);
2097
2098 void testq(Address dst, int32_t imm32);
2099 void testq(Register dst, int32_t imm32);
2100 void testq(Register dst, Register src);
2101 void testq(Register dst, Address src);
2102
2103 // BMI - count trailing zeros
2104 void tzcntl(Register dst, Register src);
2105 void tzcntq(Register dst, Register src);
2106
2107 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2108 void ucomisd(XMMRegister dst, Address src);
2109 void ucomisd(XMMRegister dst, XMMRegister src);
2110
2111 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2112 void ucomiss(XMMRegister dst, Address src);
2113 void ucomiss(XMMRegister dst, XMMRegister src);
2114
2115 void xabort(int8_t imm8);
2116
2117 void xaddb(Address dst, Register src);
2118 void xaddw(Address dst, Register src);
2119 void xaddl(Address dst, Register src);
2120 void xaddq(Address dst, Register src);
2121
2122 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2123
2124 void xchgb(Register reg, Address adr);
2125 void xchgw(Register reg, Address adr);
2126 void xchgl(Register reg, Address adr);
2127 void xchgl(Register dst, Register src);
2128
2129 void xchgq(Register reg, Address adr);
2130 void xchgq(Register dst, Register src);
2131
2132 void xend();
2133
2134 // Get Value of Extended Control Register
2135 void xgetbv();
2136
2137 void xorl(Register dst, int32_t imm32);
2138 void xorl(Address dst, int32_t imm32);
2139 void xorl(Register dst, Address src);
2140 void xorl(Register dst, Register src);
2141 void xorl(Address dst, Register src);
2142
2143 void xorb(Address dst, Register src);
2144 void xorb(Register dst, Address src);
2145 void xorw(Register dst, Register src);
2146
2147 void xorq(Register dst, Address src);
2148 void xorq(Address dst, int32_t imm32);
2149 void xorq(Register dst, Register src);
2150 void xorq(Register dst, int32_t imm32);
2151 void xorq(Address dst, Register src);
2152
2153 void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
2154
2155 // AVX 3-operands scalar instructions (encoded with VEX prefix)
2156
2157 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2158 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2159 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2160 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2161 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2162 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2163 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2164 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2165 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2166 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2167 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2168 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2169 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2170 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2171 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2172 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2173 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2174 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2175
2176 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2177 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2178 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2179 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2180
2181 void shlxl(Register dst, Register src1, Register src2);
2182 void shlxq(Register dst, Register src1, Register src2);
2183 void shrxl(Register dst, Register src1, Register src2);
2184 void shrxq(Register dst, Register src1, Register src2);
2185
2186 void bzhiq(Register dst, Register src1, Register src2);
2187 void pdep(Register dst, Register src1, Register src2);
2188 void pext(Register dst, Register src1, Register src2);
2189
2190
2191 //====================VECTOR ARITHMETIC=====================================
2192 // Add Packed Floating-Point Values
2193 void addpd(XMMRegister dst, XMMRegister src);
2194 void addpd(XMMRegister dst, Address src);
2195 void addps(XMMRegister dst, XMMRegister src);
2196 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2197 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2198 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2199 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2200
2201 // Subtract Packed Floating-Point Values
2202 void subpd(XMMRegister dst, XMMRegister src);
2203 void subps(XMMRegister dst, XMMRegister src);
2204 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2205 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2206 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2207 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2208
2209 // Multiply Packed Floating-Point Values
2210 void mulpd(XMMRegister dst, XMMRegister src);
2211 void mulpd(XMMRegister dst, Address src);
2212 void mulps(XMMRegister dst, XMMRegister src);
2213 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2214 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2215 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2216 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2217
2218 void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219 void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2220 void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221 void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2222
2223 // Divide Packed Floating-Point Values
2224 void divpd(XMMRegister dst, XMMRegister src);
2225 void divps(XMMRegister dst, XMMRegister src);
2226 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2227 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2228 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2229 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2230
2231 // Sqrt Packed Floating-Point Values
2232 void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2233 void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2234 void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2235 void vsqrtps(XMMRegister dst, Address src, int vector_len);
2236
2237 // Round Packed Double precision value.
2238 void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2239 void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2240 void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2241 void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2242
2243 // Bitwise Logical AND of Packed Floating-Point Values
2244 void andpd(XMMRegister dst, XMMRegister src);
2245 void andps(XMMRegister dst, XMMRegister src);
2246 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2247 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2248 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2249 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2250
2251 void unpckhpd(XMMRegister dst, XMMRegister src);
2252 void unpcklpd(XMMRegister dst, XMMRegister src);
2253
2254 // Bitwise Logical XOR of Packed Floating-Point Values
2255 void xorpd(XMMRegister dst, XMMRegister src);
2256 void xorps(XMMRegister dst, XMMRegister src);
2257 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2258 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2259 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2260 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2261
2262 // Add horizontal packed integers
2263 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2264 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2265 void phaddw(XMMRegister dst, XMMRegister src);
2266 void phaddd(XMMRegister dst, XMMRegister src);
2267
2268 // Add packed integers
2269 void paddb(XMMRegister dst, XMMRegister src);
2270 void paddw(XMMRegister dst, XMMRegister src);
2271 void paddd(XMMRegister dst, XMMRegister src);
2272 void paddd(XMMRegister dst, Address src);
2273 void paddq(XMMRegister dst, XMMRegister src);
2274 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2275 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2276 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2277 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2278 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2279 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2280 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2281 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2282
2283 // Leaf level assembler routines for masked operations.
2284 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2285 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2286 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2287 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2288 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2289 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2290 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2291 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2292 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2293 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2294 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2295 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2296 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2297 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2298 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2299 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2300 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2301 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2302 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2303 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2304 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2305 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2306 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2307 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2308 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2309 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2310 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2311 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2312 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2313 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2314 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2315 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2316 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2317 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2318 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2319 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2320 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2321 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2322 void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2323 void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2324 void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2325 void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2326 void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2327 void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2328 void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2329 void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2330 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2331 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2332 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2333 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2334 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2335 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2336 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2337 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2338 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2339 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2340 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2341 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2342 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2343 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2344 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2345 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2346 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2347 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2348 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2349 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2350 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2351 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2352 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2353 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2354 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2355
2356 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2357 void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2358 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2359 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2360 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2361 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2362 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2363 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2364 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2365
2366 void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2367 void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2368 void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2369 void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2370 void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2371 void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2372 void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2373 void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2374 void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2375 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2376 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2377 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2378 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2379 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2380 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2381 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2382 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2383 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2384 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2385 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2386 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2387 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2388 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2389 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2390 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2391 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2392 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2393 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2394 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2395 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2396 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2397 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2398 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2399 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2400 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2401 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2402 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2403
2404 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2405 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2406 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2407 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2408 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2409 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2410 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2411 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2412
2413 // Sub packed integers
2414 void psubb(XMMRegister dst, XMMRegister src);
2415 void psubw(XMMRegister dst, XMMRegister src);
2416 void psubd(XMMRegister dst, XMMRegister src);
2417 void psubq(XMMRegister dst, XMMRegister src);
2418 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2419 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2420 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2421 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2422 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2423 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2424 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2425 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2426 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2427
2428 // Multiply packed integers (only shorts and ints)
2429 void pmullw(XMMRegister dst, XMMRegister src);
2430 void pmulld(XMMRegister dst, XMMRegister src);
2431 void pmuludq(XMMRegister dst, XMMRegister src);
2432 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2433 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2434 void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2435 void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2436 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2437 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2438 void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2439 void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2440
2441 // Minimum of packed integers
2442 void pminsb(XMMRegister dst, XMMRegister src);
2443 void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2444 void pminsw(XMMRegister dst, XMMRegister src);
2445 void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2446 void pminsd(XMMRegister dst, XMMRegister src);
2447 void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2448 void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2449 void minps(XMMRegister dst, XMMRegister src);
2450 void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2451 void minpd(XMMRegister dst, XMMRegister src);
2452 void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2453
2454 // Maximum of packed integers
2455 void pmaxsb(XMMRegister dst, XMMRegister src);
2456 void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2457 void pmaxsw(XMMRegister dst, XMMRegister src);
2458 void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2459 void pmaxsd(XMMRegister dst, XMMRegister src);
2460 void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2461 void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2462 void maxps(XMMRegister dst, XMMRegister src);
2463 void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2464 void maxpd(XMMRegister dst, XMMRegister src);
2465 void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2466
2467 // Shift left packed integers
2468 void psllw(XMMRegister dst, int shift);
2469 void pslld(XMMRegister dst, int shift);
2470 void psllq(XMMRegister dst, int shift);
2471 void psllw(XMMRegister dst, XMMRegister shift);
2472 void pslld(XMMRegister dst, XMMRegister shift);
2473 void psllq(XMMRegister dst, XMMRegister shift);
2474 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2475 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2476 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2477 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2478 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2479 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2480 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2481
2482 // Logical shift right packed integers
2483 void psrlw(XMMRegister dst, int shift);
2484 void psrld(XMMRegister dst, int shift);
2485 void psrlq(XMMRegister dst, int shift);
2486 void psrlw(XMMRegister dst, XMMRegister shift);
2487 void psrld(XMMRegister dst, XMMRegister shift);
2488 void psrlq(XMMRegister dst, XMMRegister shift);
2489 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2490 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2491 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2492 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2493 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2494 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2495 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2496 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2497 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2498
2499 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2500 void psraw(XMMRegister dst, int shift);
2501 void psrad(XMMRegister dst, int shift);
2502 void psraw(XMMRegister dst, XMMRegister shift);
2503 void psrad(XMMRegister dst, XMMRegister shift);
2504 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2505 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2506 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2507 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2508 void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2509 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2510 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2511
2512 // Variable shift left packed integers
2513 void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2514 void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2515
2516 // Variable shift right packed integers
2517 void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2518 void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2519
2520 // Variable shift right arithmetic packed integers
2521 void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2522 void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2523
2524 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2525 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2526
2527 // And packed integers
2528 void pand(XMMRegister dst, XMMRegister src);
2529 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2530 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2531 void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2532
2533 // Andn packed integers
2534 void pandn(XMMRegister dst, XMMRegister src);
2535 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2536
2537 // Or packed integers
2538 void por(XMMRegister dst, XMMRegister src);
2539 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2540 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2541 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2542
2543 // Xor packed integers
2544 void pxor(XMMRegister dst, XMMRegister src);
2545 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2546 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2547 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2548 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2549 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2550
2551 // Ternary logic instruction.
2552 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2553 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2554 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2555
2556 // Vector Rotate Left/Right instruction.
2557 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2558 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2559 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2560 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2561 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2562 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2563 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2564 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2565
2566 // vinserti forms
2567 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2568 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2569 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2570 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2571 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2572
2573 // vinsertf forms
2574 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2575 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2576 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2577 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2578 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2579 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2580
2581 // vextracti forms
2582 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2583 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2584 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2585 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2586 void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2587 void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2588 void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2589
2590 // vextractf forms
2591 void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2592 void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2593 void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2594 void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2595 void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2596 void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2597 void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2598
2599 // xmm/mem sourced byte/word/dword/qword replicate
2600 void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2601 void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2602 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2603 void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2604 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2605 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2606 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2607 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
2608
2609 void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
2610 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
2611 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
2612
2613 // scalar single/double/128bit precision replicate
2614 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
2615 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
2616 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
2617 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
2618 void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
2619
2620 // gpr sourced byte/word/dword/qword replicate
2621 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
2622 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
2623 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
2624 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
2625
2626 // Gather AVX2 and AVX3
2627 void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2628 void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2629 void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2630 void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
2631 void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2632 void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
2633 void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
2634 void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
2635
2636 //Scatter AVX3 only
2637 void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2638 void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
2639 void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
2640 void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
2641
2642 // Carry-Less Multiplication Quadword
2643 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
2644 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
2645 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
2646 // AVX instruction which is used to clear upper 128 bits of YMM registers and
2647 // to avoid transaction penalty between AVX and SSE states. There is no
2648 // penalty if legacy SSE instructions are encoded using VEX prefix because
2649 // they always clear upper 128 bits. It should be used before calling
2650 // runtime code and native libraries.
2651 void vzeroupper();
2652
2653 // Vector double compares
2654 void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
2655 void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2656 ComparisonPredicateFP comparison, int vector_len);
2657
2658 // Vector float compares
2659 void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
2660 void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2661 ComparisonPredicateFP comparison, int vector_len);
2662
2663 // Vector integer compares
2664 void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2665 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2666 int comparison, bool is_signed, int vector_len);
2667 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2668 int comparison, bool is_signed, int vector_len);
2669
2670 // Vector long compares
2671 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2672 int comparison, bool is_signed, int vector_len);
2673 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2674 int comparison, bool is_signed, int vector_len);
2675
2676 // Vector byte compares
2677 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2678 int comparison, bool is_signed, int vector_len);
2679 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2680 int comparison, bool is_signed, int vector_len);
2681
2682 // Vector short compares
2683 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
2684 int comparison, bool is_signed, int vector_len);
2685 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
2686 int comparison, bool is_signed, int vector_len);
2687
2688 void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
2689 void evpmovw2m(KRegister dst, XMMRegister src, int vector_len);
2690 void evpmovd2m(KRegister dst, XMMRegister src, int vector_len);
2691 void evpmovq2m(KRegister dst, XMMRegister src, int vector_len);
2692 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2693 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2694 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2695 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2696
2697 // Vector blends
2698 void blendvps(XMMRegister dst, XMMRegister src);
2699 void blendvpd(XMMRegister dst, XMMRegister src);
2700 void pblendvb(XMMRegister dst, XMMRegister src);
2701 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2702 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2703 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2704 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2705 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2706 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2707 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2708 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2709 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2710 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2711 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2712 protected:
2713 // Next instructions require address alignment 16 bytes SSE mode.
2714 // They should be called only from corresponding MacroAssembler instructions.
2715 void andpd(XMMRegister dst, Address src);
2716 void andps(XMMRegister dst, Address src);
2717 void xorpd(XMMRegister dst, Address src);
2718 void xorps(XMMRegister dst, Address src);
2719
2720};
2721
2722// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2723// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2724// are applied.
2725class InstructionAttr {
2726public:
2727 InstructionAttr(
2728 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2729 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2730 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2731 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2732 bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
2733 :
2734 _rex_vex_w(rex_vex_w),
2735 _legacy_mode(legacy_mode || UseAVX < 3),
2736 _no_reg_mask(no_reg_mask),
2737 _uses_vl(uses_vl),
2738 _rex_vex_w_reverted(false),
2739 _is_evex_instruction(false),
2740 _is_clear_context(true),
2741 _is_extended_context(false),
2742 _avx_vector_len(vector_len),
2743 _tuple_type(Assembler::EVEX_ETUP),
2744 _input_size_in_bits(Assembler::EVEX_NObit),
2745 _evex_encoding(0),
2746 _embedded_opmask_register_specifier(0), // hard code k0
2747 _current_assembler(NULL__null) { }
2748
2749 ~InstructionAttr() {
2750 if (_current_assembler != NULL__null) {
2751 _current_assembler->clear_attributes();
2752 }
2753 _current_assembler = NULL__null;
2754 }
2755
2756private:
2757 bool _rex_vex_w;
2758 bool _legacy_mode;
2759 bool _no_reg_mask;
2760 bool _uses_vl;
2761 bool _rex_vex_w_reverted;
2762 bool _is_evex_instruction;
2763 bool _is_clear_context;
2764 bool _is_extended_context;
2765 int _avx_vector_len;
2766 int _tuple_type;
2767 int _input_size_in_bits;
2768 int _evex_encoding;
2769 int _embedded_opmask_register_specifier;
2770
2771 Assembler *_current_assembler;
2772
2773public:
2774 // query functions for field accessors
2775 bool is_rex_vex_w(void) const { return _rex_vex_w; }
2776 bool is_legacy_mode(void) const { return _legacy_mode; }
2777 bool is_no_reg_mask(void) const { return _no_reg_mask; }
2778 bool uses_vl(void) const { return _uses_vl; }
2779 bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2780 bool is_evex_instruction(void) const { return _is_evex_instruction; }
2781 bool is_clear_context(void) const { return _is_clear_context; }
2782 bool is_extended_context(void) const { return _is_extended_context; }
2783 int get_vector_len(void) const { return _avx_vector_len; }
2784 int get_tuple_type(void) const { return _tuple_type; }
2785 int get_input_size(void) const { return _input_size_in_bits; }
2786 int get_evex_encoding(void) const { return _evex_encoding; }
2787 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2788
2789 // Set the vector len manually
2790 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2791
2792 // Set revert rex_vex_w for avx encoding
2793 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2794
2795 // Set rex_vex_w based on state
2796 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2797
2798 // Set the instruction to be encoded in AVX mode
2799 void set_is_legacy_mode(void) { _legacy_mode = true; }
2800
2801 // Set the current instuction to be encoded as an EVEX instuction
2802 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2803
2804 // Internal encoding data used in compressed immediate offset programming
2805 void set_evex_encoding(int value) { _evex_encoding = value; }
2806
2807 // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
2808 // This method unsets it so that merge semantics are used instead.
2809 void reset_is_clear_context(void) { _is_clear_context = false; }
2810
2811 // Map back to current asembler so that we can manage object level assocation
2812 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
2813
2814 // Address modifiers used for compressed displacement calculation
2815 void set_address_attributes(int tuple_type, int input_size_in_bits);
2816
2817 // Set embedded opmask register specifier.
2818 void set_embedded_opmask_register_specifier(KRegister mask) {
2819 _embedded_opmask_register_specifier = (*mask).encoding() & 0x7;
2820 }
2821
2822};
2823
2824#endif // CPU_X86_ASSEMBLER_X86_HPP