Bug Summary

File:jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Warning:line 2072, column 7
Value stored to 'eindex' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name c2_MacroAssembler_x86.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -mthread-model posix -fno-delete-null-pointer-checks -mframe-pointer=all -relaxed-aliasing -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/libjvm/objs/precompiled -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D _GNU_SOURCE -D _REENTRANT -D LIBC=gnu -D LINUX -D VM_LITTLE_ENDIAN -D _LP64=1 -D ASSERT -D CHECK_UNHANDLED_OOPS -D TARGET_ARCH_x86 -D INCLUDE_SUFFIX_OS=_linux -D INCLUDE_SUFFIX_CPU=_x86 -D INCLUDE_SUFFIX_COMPILER=_gcc -D TARGET_COMPILER_gcc -D AMD64 -D HOTSPOT_LIB_ARCH="amd64" -D COMPILER1 -D COMPILER2 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -I /home/daniel/Projects/java/jdk/src/hotspot/share/precompiled -I /home/daniel/Projects/java/jdk/src/hotspot/share/include -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix/include -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/support/modules_include/java.base/linux -I /home/daniel/Projects/java/jdk/src/java.base/share/native/libjimage -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc/adfiles -I /home/daniel/Projects/java/jdk/src/hotspot/share -I /home/daniel/Projects/java/jdk/src/hotspot/os/linux -I /home/daniel/Projects/java/jdk/src/hotspot/os/posix -I /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86 -I /home/daniel/Projects/java/jdk/src/hotspot/os_cpu/linux_x86 -I /home/daniel/Projects/java/jdk/build/linux-x86_64-server-fastdebug/hotspot/variant-server/gensrc -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/x86_64-linux-gnu/c++/7.5.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-format-zero-length -Wno-unused-parameter -Wno-unused -Wno-parentheses -Wno-comment -Wno-unknown-pragmas -Wno-address -Wno-delete-non-virtual-dtor -Wno-char-subscripts -Wno-array-bounds -Wno-int-in-bool-context -Wno-ignored-qualifiers -Wno-missing-field-initializers -Wno-implicit-fallthrough -Wno-empty-body -Wno-strict-overflow -Wno-sequence-point -Wno-maybe-uninitialized -Wno-misleading-indentation -Wno-cast-function-type -Wno-shift-negative-value -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /home/daniel/Projects/java/jdk/make/hotspot -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -stack-protector 1 -fno-rtti -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -o /home/daniel/Projects/java/scan/2021-12-21-193737-8510-1 -x c++ /home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
1/*
2 * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "oops/methodData.hpp"
29#include "opto/c2_MacroAssembler.hpp"
30#include "opto/intrinsicnode.hpp"
31#include "opto/opcodes.hpp"
32#include "opto/subnode.hpp"
33#include "runtime/objectMonitor.hpp"
34#include "runtime/stubRoutines.hpp"
35
36inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
37 switch (vlen_in_bytes) {
38 case 4: // fall-through
39 case 8: // fall-through
40 case 16: return Assembler::AVX_128bit;
41 case 32: return Assembler::AVX_256bit;
42 case 64: return Assembler::AVX_512bit;
43
44 default: {
45 ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 45); ::breakpoint(); } while (0)
;
46 return Assembler::AVX_NoVec;
47 }
48 }
49}
50
51void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
52 guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 52, "guarantee(" "PostLoopMultiversioning" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
53 Assembler::movl(dst, 1);
54 Assembler::shlxl(dst, dst, src);
55 Assembler::decl(dst);
56 Assembler::kmovdl(mask, dst);
57 Assembler::movl(dst, src);
58}
59
60void C2_MacroAssembler::restorevectmask(KRegister mask) {
61 guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 61, "guarantee(" "PostLoopMultiversioning" ") failed", "must be"
); ::breakpoint(); } } while (0)
;
62 Assembler::knotwl(mask, k0);
63}
64
65#if INCLUDE_RTM_OPT1
66
67// Update rtm_counters based on abort status
68// input: abort_status
69// rtm_counters (RTMLockingCounters*)
70// flags are killed
71void C2_MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
72
73 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
74 if (PrintPreciseRTMLockingStatistics) {
75 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
76 Label check_abort;
77 testl(abort_status, (1<<i));
78 jccb(Assembler::equal, check_abort)jccb_0(Assembler::equal, check_abort, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 78)
;
79 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
80 bind(check_abort);
81 }
82 }
83}
84
85// Branch if (random & (count-1) != 0), count is 2^n
86// tmp, scr and flags are killed
87void C2_MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) {
88 assert(tmp == rax, "")do { if (!(tmp == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 88, "assert(" "tmp == rax" ") failed", ""); ::breakpoint();
} } while (0)
;
89 assert(scr == rdx, "")do { if (!(scr == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 89, "assert(" "scr == rdx" ") failed", ""); ::breakpoint();
} } while (0)
;
90 rdtsc(); // modifies EDX:EAX
91 andptr(tmp, count-1);
92 jccb(Assembler::notZero, brLabel)jccb_0(Assembler::notZero, brLabel, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 92)
;
93}
94
95// Perform abort ratio calculation, set no_rtm bit if high ratio
96// input: rtm_counters_Reg (RTMLockingCounters* address)
97// tmpReg, rtm_counters_Reg and flags are killed
98void C2_MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg,
99 Register rtm_counters_Reg,
100 RTMLockingCounters* rtm_counters,
101 Metadata* method_data) {
102 Label L_done, L_check_always_rtm1, L_check_always_rtm2;
103
104 if (RTMLockingCalculationDelay > 0) {
105 // Delay calculation
106 movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg);
107 testptr(tmpReg, tmpReg);
108 jccb(Assembler::equal, L_done)jccb_0(Assembler::equal, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 108)
;
109 }
110 // Abort ratio calculation only if abort_count > RTMAbortThreshold
111 // Aborted transactions = abort_count * 100
112 // All transactions = total_count * RTMTotalCountIncrRate
113 // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
114
115 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
116 cmpptr(tmpReg, RTMAbortThreshold);
117 jccb(Assembler::below, L_check_always_rtm2)jccb_0(Assembler::below, L_check_always_rtm2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 117)
;
118 imulptr(tmpReg, tmpReg, 100);
119
120 Register scrReg = rtm_counters_Reg;
121 movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
122 imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
123 imulptr(scrReg, scrReg, RTMAbortRatio);
124 cmpptr(tmpReg, scrReg);
125 jccb(Assembler::below, L_check_always_rtm1)jccb_0(Assembler::below, L_check_always_rtm1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 125)
;
126 if (method_data != NULL__null) {
127 // set rtm_state to "no rtm" in MDO
128 mov_metadata(tmpReg, method_data);
129 lock();
130 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
131 }
132 jmpb(L_done)jmpb_0(L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 132)
;
133 bind(L_check_always_rtm1);
134 // Reload RTMLockingCounters* address
135 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
136 bind(L_check_always_rtm2);
137 movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
138 cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
139 jccb(Assembler::below, L_done)jccb_0(Assembler::below, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 139)
;
140 if (method_data != NULL__null) {
141 // set rtm_state to "always rtm" in MDO
142 mov_metadata(tmpReg, method_data);
143 lock();
144 orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
145 }
146 bind(L_done);
147}
148
149// Update counters and perform abort ratio calculation
150// input: abort_status_Reg
151// rtm_counters_Reg, flags are killed
152void C2_MacroAssembler::rtm_profiling(Register abort_status_Reg,
153 Register rtm_counters_Reg,
154 RTMLockingCounters* rtm_counters,
155 Metadata* method_data,
156 bool profile_rtm) {
157
158 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 158, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
159 // update rtm counters based on rax value at abort
160 // reads abort_status_Reg, updates flags
161 lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
162 rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
163 if (profile_rtm) {
164 // Save abort status because abort_status_Reg is used by following code.
165 if (RTMRetryCount > 0) {
166 push(abort_status_Reg);
167 }
168 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 168, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
169 rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data);
170 // restore abort status
171 if (RTMRetryCount > 0) {
172 pop(abort_status_Reg);
173 }
174 }
175}
176
177// Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4)
178// inputs: retry_count_Reg
179// : abort_status_Reg
180// output: retry_count_Reg decremented by 1
181// flags are killed
182void C2_MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) {
183 Label doneRetry;
184 assert(abort_status_Reg == rax, "")do { if (!(abort_status_Reg == rax)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 184, "assert(" "abort_status_Reg == rax" ") failed", ""); ::
breakpoint(); } } while (0)
;
185 // The abort reason bits are in eax (see all states in rtmLocking.hpp)
186 // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4)
187 // if reason is in 0x6 and retry count != 0 then retry
188 andptr(abort_status_Reg, 0x6);
189 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 189)
;
190 testl(retry_count_Reg, retry_count_Reg);
191 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 191)
;
192 pause();
193 decrementl(retry_count_Reg);
194 jmp(retryLabel);
195 bind(doneRetry);
196}
197
198// Spin and retry if lock is busy,
199// inputs: box_Reg (monitor address)
200// : retry_count_Reg
201// output: retry_count_Reg decremented by 1
202// : clear z flag if retry count exceeded
203// tmp_Reg, scr_Reg, flags are killed
204void C2_MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg,
205 Register tmp_Reg, Register scr_Reg, Label& retryLabel) {
206 Label SpinLoop, SpinExit, doneRetry;
207 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
208
209 testl(retry_count_Reg, retry_count_Reg);
210 jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 210)
;
211 decrementl(retry_count_Reg);
212 movptr(scr_Reg, RTMSpinLoopCount);
213
214 bind(SpinLoop);
215 pause();
216 decrementl(scr_Reg);
217 jccb(Assembler::lessEqual, SpinExit)jccb_0(Assembler::lessEqual, SpinExit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 217)
;
218 movptr(tmp_Reg, Address(box_Reg, owner_offset));
219 testptr(tmp_Reg, tmp_Reg);
220 jccb(Assembler::notZero, SpinLoop)jccb_0(Assembler::notZero, SpinLoop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 220)
;
221
222 bind(SpinExit);
223 jmp(retryLabel);
224 bind(doneRetry);
225 incrementl(retry_count_Reg); // clear z flag
226}
227
228// Use RTM for normal stack locks
229// Input: objReg (object to lock)
230void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
231 Register retry_on_abort_count_Reg,
232 RTMLockingCounters* stack_rtm_counters,
233 Metadata* method_data, bool profile_rtm,
234 Label& DONE_LABEL, Label& IsInflated) {
235 assert(UseRTMForStackLocks, "why call this otherwise?")do { if (!(UseRTMForStackLocks)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 235, "assert(" "UseRTMForStackLocks" ") failed", "why call this otherwise?"
); ::breakpoint(); } } while (0)
;
236 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 236, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
237 assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 237, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint
(); } } while (0)
;
238 Label L_rtm_retry, L_decrement_retry, L_on_abort;
239
240 if (RTMRetryCount > 0) {
241 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
242 bind(L_rtm_retry);
243 }
244 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
245 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
246 jcc(Assembler::notZero, IsInflated);
247
248 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
249 Label L_noincrement;
250 if (RTMTotalCountIncrRate > 1) {
251 // tmpReg, scrReg and flags are killed
252 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
253 }
254 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(stack_rtm_counters != __null)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 254, "assert(" "stack_rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
255 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
256 bind(L_noincrement);
257 }
258 xbegin(L_on_abort);
259 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
260 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
261 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
262 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
263
264 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
265 if (UseRTMXendForLockBusy) {
266 xend();
267 movptr(abort_status_Reg, 0x2); // Set the abort status to 2 (so we can retry)
268 jmp(L_decrement_retry);
269 }
270 else {
271 xabort(0);
272 }
273 bind(L_on_abort);
274 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
275 rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm);
276 }
277 bind(L_decrement_retry);
278 if (RTMRetryCount > 0) {
279 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
280 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
281 }
282}
283
284// Use RTM for inflating locks
285// inputs: objReg (object to lock)
286// boxReg (on-stack box address (displaced header location) - KILLED)
287// tmpReg (ObjectMonitor address + markWord::monitor_value)
288void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
289 Register scrReg, Register retry_on_busy_count_Reg,
290 Register retry_on_abort_count_Reg,
291 RTMLockingCounters* rtm_counters,
292 Metadata* method_data, bool profile_rtm,
293 Label& DONE_LABEL) {
294 assert(UseRTMLocking, "why call this otherwise?")do { if (!(UseRTMLocking)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 294, "assert(" "UseRTMLocking" ") failed", "why call this otherwise?"
); ::breakpoint(); } } while (0)
;
295 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 295, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
296 assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 296, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint
(); } } while (0)
;
297 Label L_rtm_retry, L_decrement_retry, L_on_abort;
298 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
299
300 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
301 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
302 movptr(boxReg, tmpReg); // Save ObjectMonitor address
303
304 if (RTMRetryCount > 0) {
305 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
306 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
307 bind(L_rtm_retry);
308 }
309 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
310 Label L_noincrement;
311 if (RTMTotalCountIncrRate > 1) {
312 // tmpReg, scrReg and flags are killed
313 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
314 }
315 assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 315, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM"
); ::breakpoint(); } } while (0)
;
316 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
317 bind(L_noincrement);
318 }
319 xbegin(L_on_abort);
320 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
321 movptr(tmpReg, Address(tmpReg, owner_offset));
322 testptr(tmpReg, tmpReg);
323 jcc(Assembler::zero, DONE_LABEL);
324 if (UseRTMXendForLockBusy) {
325 xend();
326 jmp(L_decrement_retry);
327 }
328 else {
329 xabort(0);
330 }
331 bind(L_on_abort);
332 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
333 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
334 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
335 }
336 if (RTMRetryCount > 0) {
337 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
338 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
339 }
340
341 movptr(tmpReg, Address(boxReg, owner_offset)) ;
342 testptr(tmpReg, tmpReg) ;
343 jccb(Assembler::notZero, L_decrement_retry)jccb_0(Assembler::notZero, L_decrement_retry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 343)
;
344
345 // Appears unlocked - try to swing _owner from null to non-null.
346 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
347#ifdef _LP641
348 Register threadReg = r15_thread;
349#else
350 get_thread(scrReg);
351 Register threadReg = scrReg;
352#endif
353 lock();
354 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
355
356 if (RTMRetryCount > 0) {
357 // success done else retry
358 jccb(Assembler::equal, DONE_LABEL)jccb_0(Assembler::equal, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 358)
;
359 bind(L_decrement_retry);
360 // Spin and retry if lock is busy.
361 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
362 }
363 else {
364 bind(L_decrement_retry);
365 }
366}
367
368#endif // INCLUDE_RTM_OPT
369
370// fast_lock and fast_unlock used by C2
371
372// Because the transitions from emitted code to the runtime
373// monitorenter/exit helper stubs are so slow it's critical that
374// we inline both the stack-locking fast path and the inflated fast path.
375//
376// See also: cmpFastLock and cmpFastUnlock.
377//
378// What follows is a specialized inline transliteration of the code
379// in enter() and exit(). If we're concerned about I$ bloat another
380// option would be to emit TrySlowEnter and TrySlowExit methods
381// at startup-time. These methods would accept arguments as
382// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
383// indications in the icc.ZFlag. fast_lock and fast_unlock would simply
384// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
385// In practice, however, the # of lock sites is bounded and is usually small.
386// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
387// if the processor uses simple bimodal branch predictors keyed by EIP
388// Since the helper routines would be called from multiple synchronization
389// sites.
390//
391// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
392// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
393// to those specialized methods. That'd give us a mostly platform-independent
394// implementation that the JITs could optimize and inline at their pleasure.
395// Done correctly, the only time we'd need to cross to native could would be
396// to park() or unpark() threads. We'd also need a few more unsafe operators
397// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
398// (b) explicit barriers or fence operations.
399//
400// TODO:
401//
402// * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr).
403// This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals.
404// Given TLAB allocation, Self is usually manifested in a register, so passing it into
405// the lock operators would typically be faster than reifying Self.
406//
407// * Ideally I'd define the primitives as:
408// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
409// fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
410// Unfortunately ADLC bugs prevent us from expressing the ideal form.
411// Instead, we're stuck with a rather awkward and brittle register assignments below.
412// Furthermore the register assignments are overconstrained, possibly resulting in
413// sub-optimal code near the synchronization site.
414//
415// * Eliminate the sp-proximity tests and just use "== Self" tests instead.
416// Alternately, use a better sp-proximity test.
417//
418// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
419// Either one is sufficient to uniquely identify a thread.
420// TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
421//
422// * Intrinsify notify() and notifyAll() for the common cases where the
423// object is locked by the calling thread but the waitlist is empty.
424// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
425//
426// * use jccb and jmpb instead of jcc and jmp to improve code density.
427// But beware of excessive branch density on AMD Opterons.
428//
429// * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
430// or failure of the fast path. If the fast path fails then we pass
431// control to the slow path, typically in C. In fast_lock and
432// fast_unlock we often branch to DONE_LABEL, just to find that C2
433// will emit a conditional branch immediately after the node.
434// So we have branches to branches and lots of ICC.ZF games.
435// Instead, it might be better to have C2 pass a "FailureLabel"
436// into fast_lock and fast_unlock. In the case of success, control
437// will drop through the node. ICC.ZF is undefined at exit.
438// In the case of failure, the node will branch directly to the
439// FailureLabel
440
441
442// obj: object to lock
443// box: on-stack box address (displaced header location) - KILLED
444// rax,: tmp -- KILLED
445// scr: tmp -- KILLED
446void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
447 Register scrReg, Register cx1Reg, Register cx2Reg,
448 RTMLockingCounters* rtm_counters,
449 RTMLockingCounters* stack_rtm_counters,
450 Metadata* method_data,
451 bool use_rtm, bool profile_rtm) {
452 // Ensure the register assignments are disjoint
453 assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 453, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
454
455 if (use_rtm) {
456 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
457 } else {
458 assert(cx2Reg == noreg, "")do { if (!(cx2Reg == noreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 458, "assert(" "cx2Reg == noreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
459 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
460 }
461
462 // Possible cases that we'll encounter in fast_lock
463 // ------------------------------------------------
464 // * Inflated
465 // -- unlocked
466 // -- Locked
467 // = by self
468 // = by other
469 // * neutral
470 // * stack-locked
471 // -- by self
472 // = sp-proximity test hits
473 // = sp-proximity test generates false-negative
474 // -- by other
475 //
476
477 Label IsInflated, DONE_LABEL;
478
479 if (DiagnoseSyncOnValueBasedClasses != 0) {
480 load_klass(tmpReg, objReg, cx1Reg);
481 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
482 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
483 jcc(Assembler::notZero, DONE_LABEL);
484 }
485
486#if INCLUDE_RTM_OPT1
487 if (UseRTMForStackLocks && use_rtm) {
488 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 488, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"
); ::breakpoint(); } } while (0)
;
489 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
490 stack_rtm_counters, method_data, profile_rtm,
491 DONE_LABEL, IsInflated);
492 }
493#endif // INCLUDE_RTM_OPT
494
495 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
496 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
497 jccb(Assembler::notZero, IsInflated)jccb_0(Assembler::notZero, IsInflated, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 497)
;
498
499 if (!UseHeavyMonitors) {
500 // Attempt stack-locking ...
501 orptr (tmpReg, markWord::unlocked_value);
502 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
503 lock();
504 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
505 jcc(Assembler::equal, DONE_LABEL); // Success
506
507 // Recursive locking.
508 // The object is stack-locked: markword contains stack pointer to BasicLock.
509 // Locked by current thread if difference with current SP is less than one page.
510 subptr(tmpReg, rsp);
511 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
512 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())7 - os::vm_page_size()) );
513 movptr(Address(boxReg, 0), tmpReg);
514 } else {
515 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
516 testptr(objReg, objReg);
517 }
518 jmp(DONE_LABEL);
519
520 bind(IsInflated);
521 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
522
523#if INCLUDE_RTM_OPT1
524 // Use the same RTM locking code in 32- and 64-bit VM.
525 if (use_rtm) {
526 rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg,
527 rtm_counters, method_data, profile_rtm, DONE_LABEL);
528 } else {
529#endif // INCLUDE_RTM_OPT
530
531#ifndef _LP641
532 // The object is inflated.
533
534 // boxReg refers to the on-stack BasicLock in the current frame.
535 // We'd like to write:
536 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
537 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
538 // additional latency as we have another ST in the store buffer that must drain.
539
540 // avoid ST-before-CAS
541 // register juggle because we need tmpReg for cmpxchgptr below
542 movptr(scrReg, boxReg);
543 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
544
545 // Optimistic form: consider XORL tmpReg,tmpReg
546 movptr(tmpReg, NULL_WORD0L);
547
548 // Appears unlocked - try to swing _owner from null to non-null.
549 // Ideally, I'd manifest "Self" with get_thread and then attempt
550 // to CAS the register containing Self into m->Owner.
551 // But we don't have enough registers, so instead we can either try to CAS
552 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
553 // we later store "Self" into m->Owner. Transiently storing a stack address
554 // (rsp or the address of the box) into m->owner is harmless.
555 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
556 lock();
557 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
558 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
559 // If we weren't able to swing _owner from NULL to the BasicLock
560 // then take the slow path.
561 jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 561)
;
562 // update _owner from BasicLock to thread
563 get_thread (scrReg); // beware: clobbers ICCs
564 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), scrReg);
565 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
566
567 // If the CAS fails we can either retry or pass control to the slow path.
568 // We use the latter tactic.
569 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
570 // If the CAS was successful ...
571 // Self has acquired the lock
572 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
573 // Intentional fall-through into DONE_LABEL ...
574#else // _LP64
575 // It's inflated and we use scrReg for ObjectMonitor* in this section.
576 movq(scrReg, tmpReg);
577 xorq(tmpReg, tmpReg);
578 lock();
579 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
580 // Unconditionally set box->_displaced_header = markWord::unused_mark().
581 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
582 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
583 // Propagate ICC.ZF from CAS above into DONE_LABEL.
584 jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success)
585
586 cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock)
587 jcc(Assembler::notEqual, DONE_LABEL); // If not recursive, ZF = 0 at this point (fail)
588 incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
589 xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
590#endif // _LP64
591#if INCLUDE_RTM_OPT1
592 } // use_rtm()
593#endif
594 // DONE_LABEL is a hot target - we'd really like to place it at the
595 // start of cache line by padding with NOPs.
596 // See the AMD and Intel software optimization manuals for the
597 // most efficient "long" NOP encodings.
598 // Unfortunately none of our alignment mechanisms suffice.
599 bind(DONE_LABEL);
600
601 // At DONE_LABEL the icc ZFlag is set as follows ...
602 // fast_unlock uses the same protocol.
603 // ZFlag == 1 -> Success
604 // ZFlag == 0 -> Failure - force control through the slow path
605}
606
607// obj: object to unlock
608// box: box address (displaced header location), killed. Must be EAX.
609// tmp: killed, cannot be obj nor box.
610//
611// Some commentary on balanced locking:
612//
613// fast_lock and fast_unlock are emitted only for provably balanced lock sites.
614// Methods that don't have provably balanced locking are forced to run in the
615// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
616// The interpreter provides two properties:
617// I1: At return-time the interpreter automatically and quietly unlocks any
618// objects acquired the current activation (frame). Recall that the
619// interpreter maintains an on-stack list of locks currently held by
620// a frame.
621// I2: If a method attempts to unlock an object that is not held by the
622// the frame the interpreter throws IMSX.
623//
624// Lets say A(), which has provably balanced locking, acquires O and then calls B().
625// B() doesn't have provably balanced locking so it runs in the interpreter.
626// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
627// is still locked by A().
628//
629// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
630// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
631// should not be unlocked by "normal" java-level locking and vice-versa. The specification
632// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
633// Arguably given that the spec legislates the JNI case as undefined our implementation
634// could reasonably *avoid* checking owner in fast_unlock().
635// In the interest of performance we elide m->Owner==Self check in unlock.
636// A perfectly viable alternative is to elide the owner check except when
637// Xcheck:jni is enabled.
638
639void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
640 assert(boxReg == rax, "")do { if (!(boxReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 640, "assert(" "boxReg == rax" ") failed", ""); ::breakpoint
(); } } while (0)
;
641 assert_different_registers(objReg, boxReg, tmpReg);
642
643 Label DONE_LABEL, Stacked, CheckSucc;
644
645#if INCLUDE_RTM_OPT1
646 if (UseRTMForStackLocks && use_rtm) {
647 assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 647, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"
); ::breakpoint(); } } while (0)
;
648 Label L_regular_unlock;
649 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
650 andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits
651 cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked
652 jccb(Assembler::notEqual, L_regular_unlock)jccb_0(Assembler::notEqual, L_regular_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 652)
; // if !HLE RegularLock
653 xend(); // otherwise end...
654 jmp(DONE_LABEL); // ... and we're done
655 bind(L_regular_unlock);
656 }
657#endif
658
659 if (!UseHeavyMonitors) {
660 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD0L); // Examine the displaced header
661 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
662 }
663 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
664 if (!UseHeavyMonitors) {
665 testptr(tmpReg, markWord::monitor_value); // Inflated?
666 jccb (Assembler::zero, Stacked)jccb_0(Assembler::zero, Stacked, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 666)
;
667 }
668
669 // It's inflated.
670#if INCLUDE_RTM_OPT1
671 if (use_rtm) {
672 Label L_regular_inflated_unlock;
673 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
;
674 movptr(boxReg, Address(tmpReg, owner_offset));
675 testptr(boxReg, boxReg);
676 jccb(Assembler::notZero, L_regular_inflated_unlock)jccb_0(Assembler::notZero, L_regular_inflated_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 676)
;
677 xend();
678 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 678)
;
679 bind(L_regular_inflated_unlock);
680 }
681#endif
682
683 // Despite our balanced locking property we still check that m->_owner == Self
684 // as java routines or native JNI code called by this thread might
685 // have released the lock.
686 // Refer to the comments in synchronizer.cpp for how we might encode extra
687 // state in _succ so we can avoid fetching EntryList|cxq.
688 //
689 // If there's no contention try a 1-0 exit. That is, exit without
690 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
691 // we detect and recover from the race that the 1-0 exit admits.
692 //
693 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
694 // before it STs null into _owner, releasing the lock. Updates
695 // to data protected by the critical section must be visible before
696 // we drop the lock (and thus before any other thread could acquire
697 // the lock and observe the fields protected by the lock).
698 // IA32's memory-model is SPO, so STs are ordered with respect to
699 // each other and there's no need for an explicit barrier (fence).
700 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
701#ifndef _LP641
702 get_thread (boxReg);
703
704 // Note that we could employ various encoding schemes to reduce
705 // the number of loads below (currently 4) to just 2 or 3.
706 // Refer to the comments in synchronizer.cpp.
707 // In practice the chain of fetches doesn't seem to impact performance, however.
708 xorptr(boxReg, boxReg);
709 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
710 jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 710)
;
711 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value
)
));
712 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value
)
));
713 jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 713)
;
714 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), NULL_WORD0L);
715 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 715)
;
716
717 bind (Stacked);
718 // It's not inflated and it's not recursively stack-locked.
719 // It must be stack-locked.
720 // Try to reset the header to displaced header.
721 // The "box" value on the stack is stable, so we can reload
722 // and be assured we observe the same value as above.
723 movptr(tmpReg, Address(boxReg, 0));
724 lock();
725 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
726 // Intention fall-thru into DONE_LABEL
727
728 // DONE_LABEL is a hot target - we'd really like to place it at the
729 // start of cache line by padding with NOPs.
730 // See the AMD and Intel software optimization manuals for the
731 // most efficient "long" NOP encodings.
732 // Unfortunately none of our alignment mechanisms suffice.
733 bind (CheckSucc);
734#else // _LP64
735 // It's inflated
736 Label LNotRecursive, LSuccess, LGoSlowPath;
737
738 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
), 0);
739 jccb(Assembler::equal, LNotRecursive)jccb_0(Assembler::equal, LNotRecursive, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 739)
;
740
741 // Recursive inflated unlock
742 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value
)
));
743 jmpb(LSuccess)jmpb_0(LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 743)
;
744
745 bind(LNotRecursive);
746 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value
)
));
747 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value
)
));
748 jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 748)
;
749 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
750 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
751 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 751)
;
752
753 // Try to avoid passing control into the slow_path ...
754 bind (CheckSucc);
755
756 // The following optional optimization can be elided if necessary
757 // Effectively: if (succ == null) goto slow path
758 // The code reduces the window for a race, however,
759 // and thus benefits performance.
760 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
761 jccb (Assembler::zero, LGoSlowPath)jccb_0(Assembler::zero, LGoSlowPath, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 761)
;
762
763 xorptr(boxReg, boxReg);
764 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
765 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
766
767 // Memory barrier/fence
768 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
769 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
770 // This is faster on Nehalem and AMD Shanghai/Barcelona.
771 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
772 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
773 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
774 lock(); addl(Address(rsp, 0), 0);
775
776 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value
)
), (int32_t)NULL_WORD0L);
777 jccb (Assembler::notZero, LSuccess)jccb_0(Assembler::notZero, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 777)
;
778
779 // Rare inopportune interleaving - race.
780 // The successor vanished in the small window above.
781 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
782 // We need to ensure progress and succession.
783 // Try to reacquire the lock.
784 // If that fails then the new owner is responsible for succession and this
785 // thread needs to take no further action and can exit via the fast path (success).
786 // If the re-acquire succeeds then pass control into the slow path.
787 // As implemented, this latter mode is horrible because we generated more
788 // coherence traffic on the lock *and* artifically extended the critical section
789 // length while by virtue of passing control into the slow path.
790
791 // box is really RAX -- the following CMPXCHG depends on that binding
792 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
793 lock();
794 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value
)
));
795 // There's no successor so we tried to regrab the lock.
796 // If that didn't work, then another thread grabbed the
797 // lock so we're done (and exit was a success).
798 jccb (Assembler::notEqual, LSuccess)jccb_0(Assembler::notEqual, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 798)
;
799 // Intentional fall-through into slow path
800
801 bind (LGoSlowPath);
802 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
803 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 803)
;
804
805 bind (LSuccess);
806 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
807 jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 807)
;
808
809 if (!UseHeavyMonitors) {
810 bind (Stacked);
811 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
812 lock();
813 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
814 }
815#endif
816 bind(DONE_LABEL);
817}
818
819//-------------------------------------------------------------------------------------------
820// Generic instructions support for use in .ad files C2 code generation
821
822void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
823 if (dst != src) {
824 movdqu(dst, src);
825 }
826 if (opcode == Op_AbsVD) {
827 andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
828 } else {
829 assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 829, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD"
); ::breakpoint(); } } while (0)
;
830 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
831 }
832}
833
834void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
835 if (opcode == Op_AbsVD) {
836 vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
837 } else {
838 assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 838, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD"
); ::breakpoint(); } } while (0)
;
839 vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr);
840 }
841}
842
843void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr) {
844 if (dst != src) {
845 movdqu(dst, src);
846 }
847 if (opcode == Op_AbsVF) {
848 andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr);
849 } else {
850 assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 850, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF"
); ::breakpoint(); } } while (0)
;
851 xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr);
852 }
853}
854
855void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
856 if (opcode == Op_AbsVF) {
857 vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr);
858 } else {
859 assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X';
; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 859, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF"
); ::breakpoint(); } } while (0)
;
860 vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr);
861 }
862}
863
864void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) {
865 assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 865, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
866 assert(tmp == xnoreg || elem_bt == T_LONG, "unused")do { if (!(tmp == xnoreg || elem_bt == T_LONG)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 866, "assert(" "tmp == xnoreg || elem_bt == T_LONG" ") failed"
, "unused"); ::breakpoint(); } } while (0)
;
867
868 if (opcode == Op_MinV) {
869 if (elem_bt == T_BYTE) {
870 pminsb(dst, src);
871 } else if (elem_bt == T_SHORT) {
872 pminsw(dst, src);
873 } else if (elem_bt == T_INT) {
874 pminsd(dst, src);
875 } else {
876 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 876, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
877 assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 877, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
878 assert_different_registers(dst, src, tmp);
879 movdqu(xmm0, dst);
880 pcmpgtq(xmm0, src);
881 blendvpd(dst, src); // xmm0 as mask
882 }
883 } else { // opcode == Op_MaxV
884 if (elem_bt == T_BYTE) {
885 pmaxsb(dst, src);
886 } else if (elem_bt == T_SHORT) {
887 pmaxsw(dst, src);
888 } else if (elem_bt == T_INT) {
889 pmaxsd(dst, src);
890 } else {
891 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 891, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
892 assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 892, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
893 assert_different_registers(dst, src, tmp);
894 movdqu(xmm0, src);
895 pcmpgtq(xmm0, dst);
896 blendvpd(dst, src); // xmm0 as mask
897 }
898 }
899}
900
901void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt,
902 XMMRegister dst, XMMRegister src1, XMMRegister src2,
903 int vlen_enc) {
904 assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 904, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
905
906 if (opcode == Op_MinV) {
907 if (elem_bt == T_BYTE) {
908 vpminsb(dst, src1, src2, vlen_enc);
909 } else if (elem_bt == T_SHORT) {
910 vpminsw(dst, src1, src2, vlen_enc);
911 } else if (elem_bt == T_INT) {
912 vpminsd(dst, src1, src2, vlen_enc);
913 } else {
914 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 914, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
915 if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
916 vpminsq(dst, src1, src2, vlen_enc);
917 } else {
918 assert_different_registers(dst, src1, src2);
919 vpcmpgtq(dst, src1, src2, vlen_enc);
920 vblendvpd(dst, src1, src2, dst, vlen_enc);
921 }
922 }
923 } else { // opcode == Op_MaxV
924 if (elem_bt == T_BYTE) {
925 vpmaxsb(dst, src1, src2, vlen_enc);
926 } else if (elem_bt == T_SHORT) {
927 vpmaxsw(dst, src1, src2, vlen_enc);
928 } else if (elem_bt == T_INT) {
929 vpmaxsd(dst, src1, src2, vlen_enc);
930 } else {
931 assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 931, "assert(" "elem_bt == T_LONG" ") failed", "required");
::breakpoint(); } } while (0)
;
932 if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
933 vpmaxsq(dst, src1, src2, vlen_enc);
934 } else {
935 assert_different_registers(dst, src1, src2);
936 vpcmpgtq(dst, src1, src2, vlen_enc);
937 vblendvpd(dst, src2, src1, dst, vlen_enc);
938 }
939 }
940 }
941}
942
943// Float/Double min max
944
945void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt,
946 XMMRegister dst, XMMRegister a, XMMRegister b,
947 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
948 int vlen_enc) {
949 assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 949, "assert(" "UseAVX > 0" ") failed", "required"); ::breakpoint
(); } } while (0)
;
950 assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
951 opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
952 assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 952, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
953 assert_different_registers(a, b, tmp, atmp, btmp);
954
955 bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
956 bool is_double_word = is_double_word_type(elem_bt);
957
958 if (!is_double_word && is_min) {
959 vblendvps(atmp, a, b, a, vlen_enc);
960 vblendvps(btmp, b, a, a, vlen_enc);
961 vminps(tmp, atmp, btmp, vlen_enc);
962 vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
963 vblendvps(dst, tmp, atmp, btmp, vlen_enc);
964 } else if (!is_double_word && !is_min) {
965 vblendvps(btmp, b, a, b, vlen_enc);
966 vblendvps(atmp, a, b, b, vlen_enc);
967 vmaxps(tmp, atmp, btmp, vlen_enc);
968 vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
969 vblendvps(dst, tmp, atmp, btmp, vlen_enc);
970 } else if (is_double_word && is_min) {
971 vblendvpd(atmp, a, b, a, vlen_enc);
972 vblendvpd(btmp, b, a, a, vlen_enc);
973 vminpd(tmp, atmp, btmp, vlen_enc);
974 vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
975 vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
976 } else {
977 assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 977, "assert(" "is_double_word && !is_min" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
978 vblendvpd(btmp, b, a, b, vlen_enc);
979 vblendvpd(atmp, a, b, b, vlen_enc);
980 vmaxpd(tmp, atmp, btmp, vlen_enc);
981 vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
982 vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
983 }
984}
985
986void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
987 XMMRegister dst, XMMRegister a, XMMRegister b,
988 KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
989 int vlen_enc) {
990 assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 990, "assert(" "UseAVX > 2" ") failed", "required"); ::breakpoint
(); } } while (0)
;
991 assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
992 opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV ||
opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
993 assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 993, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
994 assert_different_registers(dst, a, b, atmp, btmp);
995
996 bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
997 bool is_double_word = is_double_word_type(elem_bt);
998 bool merge = true;
999
1000 if (!is_double_word && is_min) {
1001 evpmovd2m(ktmp, a, vlen_enc);
1002 evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
1003 evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
1004 vminps(dst, atmp, btmp, vlen_enc);
1005 evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1006 evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
1007 } else if (!is_double_word && !is_min) {
1008 evpmovd2m(ktmp, b, vlen_enc);
1009 evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
1010 evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
1011 vmaxps(dst, atmp, btmp, vlen_enc);
1012 evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1013 evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
1014 } else if (is_double_word && is_min) {
1015 evpmovq2m(ktmp, a, vlen_enc);
1016 evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
1017 evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
1018 vminpd(dst, atmp, btmp, vlen_enc);
1019 evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1020 evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
1021 } else {
1022 assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1022, "assert(" "is_double_word && !is_min" ") failed"
, "sanity"); ::breakpoint(); } } while (0)
;
1023 evpmovq2m(ktmp, b, vlen_enc);
1024 evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
1025 evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
1026 vmaxpd(dst, atmp, btmp, vlen_enc);
1027 evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
1028 evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
1029 }
1030}
1031
1032// Float/Double signum
1033void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst,
1034 XMMRegister zero, XMMRegister one,
1035 Register scratch) {
1036 assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity")do { if (!(opcode == Op_SignumF || opcode == Op_SignumD)) { (
*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1036, "assert(" "opcode == Op_SignumF || opcode == Op_SignumD"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
1037
1038 Label DONE_LABEL;
1039
1040 if (opcode == Op_SignumF) {
1041 assert(UseSSE > 0, "required")do { if (!(UseSSE > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1041, "assert(" "UseSSE > 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1042 ucomiss(dst, zero);
1043 jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1044 jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1045 movflt(dst, one);
1046 jcc(Assembler::above, DONE_LABEL);
1047 xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch);
1048 } else if (opcode == Op_SignumD) {
1049 assert(UseSSE > 1, "required")do { if (!(UseSSE > 1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1049, "assert(" "UseSSE > 1" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1050 ucomisd(dst, zero);
1051 jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument
1052 jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN
1053 movdbl(dst, one);
1054 jcc(Assembler::above, DONE_LABEL);
1055 xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch);
1056 }
1057
1058 bind(DONE_LABEL);
1059}
1060
1061void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
1062 if (sign) {
1063 pmovsxbw(dst, src);
1064 } else {
1065 pmovzxbw(dst, src);
1066 }
1067}
1068
1069void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1070 if (sign) {
1071 vpmovsxbw(dst, src, vector_len);
1072 } else {
1073 vpmovzxbw(dst, src, vector_len);
1074 }
1075}
1076
1077void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1078 if (sign) {
1079 vpmovsxbd(dst, src, vector_len);
1080 } else {
1081 vpmovzxbd(dst, src, vector_len);
1082 }
1083}
1084
1085void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
1086 if (sign) {
1087 vpmovsxwd(dst, src, vector_len);
1088 } else {
1089 vpmovzxwd(dst, src, vector_len);
1090 }
1091}
1092
1093void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
1094 int shift, int vector_len) {
1095 if (opcode == Op_RotateLeftV) {
1096 if (etype == T_INT) {
1097 evprold(dst, src, shift, vector_len);
1098 } else {
1099 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1099, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1100 evprolq(dst, src, shift, vector_len);
1101 }
1102 } else {
1103 assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1103, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV"
); ::breakpoint(); } } while (0)
;
1104 if (etype == T_INT) {
1105 evprord(dst, src, shift, vector_len);
1106 } else {
1107 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1107, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1108 evprorq(dst, src, shift, vector_len);
1109 }
1110 }
1111}
1112
1113void C2_MacroAssembler::vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
1114 XMMRegister shift, int vector_len) {
1115 if (opcode == Op_RotateLeftV) {
1116 if (etype == T_INT) {
1117 evprolvd(dst, src, shift, vector_len);
1118 } else {
1119 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1119, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1120 evprolvq(dst, src, shift, vector_len);
1121 }
1122 } else {
1123 assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1123, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV"
); ::breakpoint(); } } while (0)
;
1124 if (etype == T_INT) {
1125 evprorvd(dst, src, shift, vector_len);
1126 } else {
1127 assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1127, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG"
); ::breakpoint(); } } while (0)
;
1128 evprorvq(dst, src, shift, vector_len);
1129 }
1130 }
1131}
1132
1133void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
1134 if (opcode == Op_RShiftVI) {
1135 psrad(dst, shift);
1136 } else if (opcode == Op_LShiftVI) {
1137 pslld(dst, shift);
1138 } else {
1139 assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1139, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI"
); ::breakpoint(); } } while (0)
;
1140 psrld(dst, shift);
1141 }
1142}
1143
1144void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) {
1145 switch (opcode) {
1146 case Op_RShiftVI: psrad(dst, shift); break;
1147 case Op_LShiftVI: pslld(dst, shift); break;
1148 case Op_URShiftVI: psrld(dst, shift); break;
1149
1150 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1150, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1151 }
1152}
1153
1154void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
1155 if (opcode == Op_RShiftVI) {
1156 vpsrad(dst, nds, shift, vector_len);
1157 } else if (opcode == Op_LShiftVI) {
1158 vpslld(dst, nds, shift, vector_len);
1159 } else {
1160 assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1160, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI"
); ::breakpoint(); } } while (0)
;
1161 vpsrld(dst, nds, shift, vector_len);
1162 }
1163}
1164
1165void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1166 switch (opcode) {
1167 case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break;
1168 case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break;
1169 case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break;
1170
1171 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1171, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1172 }
1173}
1174
1175void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) {
1176 switch (opcode) {
1177 case Op_RShiftVB: // fall-through
1178 case Op_RShiftVS: psraw(dst, shift); break;
1179
1180 case Op_LShiftVB: // fall-through
1181 case Op_LShiftVS: psllw(dst, shift); break;
1182
1183 case Op_URShiftVS: // fall-through
1184 case Op_URShiftVB: psrlw(dst, shift); break;
1185
1186 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1186, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1187 }
1188}
1189
1190void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1191 switch (opcode) {
1192 case Op_RShiftVB: // fall-through
1193 case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break;
1194
1195 case Op_LShiftVB: // fall-through
1196 case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break;
1197
1198 case Op_URShiftVS: // fall-through
1199 case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break;
1200
1201 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1201, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1202 }
1203}
1204
1205void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) {
1206 switch (opcode) {
1207 case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems
1208 case Op_LShiftVL: psllq(dst, shift); break;
1209 case Op_URShiftVL: psrlq(dst, shift); break;
1210
1211 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1211, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1212 }
1213}
1214
1215void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
1216 if (opcode == Op_RShiftVL) {
1217 psrlq(dst, shift); // using srl to implement sra on pre-avs512 systems
1218 } else if (opcode == Op_LShiftVL) {
1219 psllq(dst, shift);
1220 } else {
1221 assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1221, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL"
); ::breakpoint(); } } while (0)
;
1222 psrlq(dst, shift);
1223 }
1224}
1225
1226void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1227 switch (opcode) {
1228 case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break;
1229 case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break;
1230 case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break;
1231
1232 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1232, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1233 }
1234}
1235
1236void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
1237 if (opcode == Op_RShiftVL) {
1238 evpsraq(dst, nds, shift, vector_len);
1239 } else if (opcode == Op_LShiftVL) {
1240 vpsllq(dst, nds, shift, vector_len);
1241 } else {
1242 assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1242, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL"
); ::breakpoint(); } } while (0)
;
1243 vpsrlq(dst, nds, shift, vector_len);
1244 }
1245}
1246
1247void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1248 switch (opcode) {
1249 case Op_RShiftVB: // fall-through
1250 case Op_RShiftVS: // fall-through
1251 case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break;
1252
1253 case Op_LShiftVB: // fall-through
1254 case Op_LShiftVS: // fall-through
1255 case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break;
1256
1257 case Op_URShiftVB: // fall-through
1258 case Op_URShiftVS: // fall-through
1259 case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break;
1260
1261 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1261, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1262 }
1263}
1264
1265void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
1266 switch (opcode) {
1267 case Op_RShiftVB: // fall-through
1268 case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break;
1269
1270 case Op_LShiftVB: // fall-through
1271 case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break;
1272
1273 case Op_URShiftVB: // fall-through
1274 case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break;
1275
1276 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1276, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1277 }
1278}
1279
1280void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) {
1281 assert(UseAVX >= 2, "required")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1281, "assert(" "UseAVX >= 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1282 switch (opcode) {
1283 case Op_RShiftVL: {
1284 if (UseAVX > 2) {
1285 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1285, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1286 if (!VM_Version::supports_avx512vl()) {
1287 vlen_enc = Assembler::AVX_512bit;
1288 }
1289 evpsravq(dst, src, shift, vlen_enc);
1290 } else {
1291 vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask()));
1292 vpsrlvq(dst, src, shift, vlen_enc);
1293 vpsrlvq(tmp, tmp, shift, vlen_enc);
1294 vpxor(dst, dst, tmp, vlen_enc);
1295 vpsubq(dst, dst, tmp, vlen_enc);
1296 }
1297 break;
1298 }
1299 case Op_LShiftVL: {
1300 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1300, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1301 vpsllvq(dst, src, shift, vlen_enc);
1302 break;
1303 }
1304 case Op_URShiftVL: {
1305 assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1305, "assert(" "tmp == xnoreg" ") failed", "not used"); ::
breakpoint(); } } while (0)
;
1306 vpsrlvq(dst, src, shift, vlen_enc);
1307 break;
1308 }
1309 default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1309, "assert(" "false" ") failed", "%s", NodeClassNames[opcode
]); ::breakpoint(); } } while (0)
;
1310 }
1311}
1312
1313// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst
1314void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
1315 assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1316 opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1317 opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
;
1318 bool sign = (opcode != Op_URShiftVB);
1319 assert(vector_len == 0, "required")do { if (!(vector_len == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1319, "assert(" "vector_len == 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1320 vextendbd(sign, dst, src, 1);
1321 vpmovzxbd(vtmp, shift, 1);
1322 varshiftd(opcode, dst, dst, vtmp, 1);
1323 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch);
1324 vextracti128_high(vtmp, dst);
1325 vpackusdw(dst, dst, vtmp, 0);
1326}
1327
1328// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst
1329void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
1330 assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1331 opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
1332 opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode
== Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB"
") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); }
} while (0)
;
1333 bool sign = (opcode != Op_URShiftVB);
1334 int ext_vector_len = vector_len + 1;
1335 vextendbw(sign, dst, src, ext_vector_len);
1336 vpmovzxbw(vtmp, shift, ext_vector_len);
1337 varshiftw(opcode, dst, dst, vtmp, ext_vector_len);
1338 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch);
1339 if (vector_len == 0) {
1340 vextracti128_high(vtmp, dst);
1341 vpackuswb(dst, dst, vtmp, vector_len);
1342 } else {
1343 vextracti64x4_high(vtmp, dst);
1344 vpackuswb(dst, dst, vtmp, vector_len);
1345 vpermq(dst, dst, 0xD8, vector_len);
1346 }
1347}
1348
1349void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) {
1350 switch(typ) {
1351 case T_BYTE:
1352 pinsrb(dst, val, idx);
1353 break;
1354 case T_SHORT:
1355 pinsrw(dst, val, idx);
1356 break;
1357 case T_INT:
1358 pinsrd(dst, val, idx);
1359 break;
1360 case T_LONG:
1361 pinsrq(dst, val, idx);
1362 break;
1363 default:
1364 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1364, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1365 break;
1366 }
1367}
1368
1369void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) {
1370 switch(typ) {
1371 case T_BYTE:
1372 vpinsrb(dst, src, val, idx);
1373 break;
1374 case T_SHORT:
1375 vpinsrw(dst, src, val, idx);
1376 break;
1377 case T_INT:
1378 vpinsrd(dst, src, val, idx);
1379 break;
1380 case T_LONG:
1381 vpinsrq(dst, src, val, idx);
1382 break;
1383 default:
1384 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1384, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1385 break;
1386 }
1387}
1388
1389void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) {
1390 switch(typ) {
1391 case T_INT:
1392 vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len);
1393 break;
1394 case T_FLOAT:
1395 vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len);
1396 break;
1397 case T_LONG:
1398 vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len);
1399 break;
1400 case T_DOUBLE:
1401 vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len);
1402 break;
1403 default:
1404 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1404, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1405 break;
1406 }
1407}
1408
1409void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) {
1410 switch(typ) {
1411 case T_INT:
1412 evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len);
1413 break;
1414 case T_FLOAT:
1415 evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len);
1416 break;
1417 case T_LONG:
1418 evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len);
1419 break;
1420 case T_DOUBLE:
1421 evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len);
1422 break;
1423 default:
1424 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1424, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1425 break;
1426 }
1427}
1428
1429void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) {
1430 switch(typ) {
1431 case T_INT:
1432 evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len);
1433 break;
1434 case T_FLOAT:
1435 evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len);
1436 break;
1437 case T_LONG:
1438 evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len);
1439 break;
1440 case T_DOUBLE:
1441 evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len);
1442 break;
1443 default:
1444 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1444, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1445 break;
1446 }
1447}
1448
1449void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy) {
1450 if (vlen_in_bytes <= 16) {
1451 pxor (dst, dst);
1452 psubb(dst, src);
1453 switch (elem_bt) {
1454 case T_BYTE: /* nothing to do */ break;
1455 case T_SHORT: pmovsxbw(dst, dst); break;
1456 case T_INT: pmovsxbd(dst, dst); break;
1457 case T_FLOAT: pmovsxbd(dst, dst); break;
1458 case T_LONG: pmovsxbq(dst, dst); break;
1459 case T_DOUBLE: pmovsxbq(dst, dst); break;
1460
1461 default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1461, "assert(" "false" ") failed", "%s", type2name(elem_bt
)); ::breakpoint(); } } while (0)
;
1462 }
1463 } else {
1464 assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, "")do { if (!(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes
< 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1464, "assert(" "!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64"
") failed", ""); ::breakpoint(); } } while (0)
;
1465 int vlen_enc = vector_length_encoding(vlen_in_bytes);
1466
1467 vpxor (dst, dst, dst, vlen_enc);
1468 vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc);
1469
1470 switch (elem_bt) {
1471 case T_BYTE: /* nothing to do */ break;
1472 case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break;
1473 case T_INT: vpmovsxbd(dst, dst, vlen_enc); break;
1474 case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break;
1475 case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break;
1476 case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
1477
1478 default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1478, "assert(" "false" ") failed", "%s", type2name(elem_bt
)); ::breakpoint(); } } while (0)
;
1479 }
1480 }
1481}
1482
1483void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp,
1484 Register tmp, bool novlbwdq, int vlen_enc) {
1485 if (novlbwdq) {
1486 vpmovsxbd(xtmp, src, vlen_enc);
1487 evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()),
1488 Assembler::eq, true, vlen_enc, tmp);
1489 } else {
1490 vpxor(xtmp, xtmp, xtmp, vlen_enc);
1491 vpsubb(xtmp, xtmp, src, vlen_enc);
1492 evpmovb2m(dst, xtmp, vlen_enc);
1493 }
1494}
1495
1496void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
1497 ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
1498 if (vlen_in_bytes == 4) {
1499 movdl(dst, addr);
1500 } else if (vlen_in_bytes == 8) {
1501 movq(dst, addr);
1502 } else if (vlen_in_bytes == 16) {
1503 movdqu(dst, addr, scratch);
1504 } else if (vlen_in_bytes == 32) {
1505 vmovdqu(dst, addr, scratch);
1506 } else {
1507 assert(vlen_in_bytes == 64, "%d", vlen_in_bytes)do { if (!(vlen_in_bytes == 64)) { (*g_assert_poison) = 'X';;
report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1507, "assert(" "vlen_in_bytes == 64" ") failed", "%d", vlen_in_bytes
); ::breakpoint(); } } while (0)
;
1508 evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
1509 }
1510}
1511
1512// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
1513
1514void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
1515 int vector_len = Assembler::AVX_128bit;
1516
1517 switch (opcode) {
1518 case Op_AndReductionV: pand(dst, src); break;
1519 case Op_OrReductionV: por (dst, src); break;
1520 case Op_XorReductionV: pxor(dst, src); break;
1521 case Op_MinReductionV:
1522 switch (typ) {
1523 case T_BYTE: pminsb(dst, src); break;
1524 case T_SHORT: pminsw(dst, src); break;
1525 case T_INT: pminsd(dst, src); break;
1526 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1526, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1527 vpminsq(dst, dst, src, Assembler::AVX_128bit); break;
1528 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1528, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1529 }
1530 break;
1531 case Op_MaxReductionV:
1532 switch (typ) {
1533 case T_BYTE: pmaxsb(dst, src); break;
1534 case T_SHORT: pmaxsw(dst, src); break;
1535 case T_INT: pmaxsd(dst, src); break;
1536 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1536, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1537 vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break;
1538 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1538, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1539 }
1540 break;
1541 case Op_AddReductionVF: addss(dst, src); break;
1542 case Op_AddReductionVD: addsd(dst, src); break;
1543 case Op_AddReductionVI:
1544 switch (typ) {
1545 case T_BYTE: paddb(dst, src); break;
1546 case T_SHORT: paddw(dst, src); break;
1547 case T_INT: paddd(dst, src); break;
1548 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1548, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1549 }
1550 break;
1551 case Op_AddReductionVL: paddq(dst, src); break;
1552 case Op_MulReductionVF: mulss(dst, src); break;
1553 case Op_MulReductionVD: mulsd(dst, src); break;
1554 case Op_MulReductionVI:
1555 switch (typ) {
1556 case T_SHORT: pmullw(dst, src); break;
1557 case T_INT: pmulld(dst, src); break;
1558 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1558, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1559 }
1560 break;
1561 case Op_MulReductionVL: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1561, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1562 vpmullq(dst, dst, src, vector_len); break;
1563 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1563, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1564 }
1565}
1566
1567void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1568 int vector_len = Assembler::AVX_256bit;
1569
1570 switch (opcode) {
1571 case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break;
1572 case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break;
1573 case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break;
1574 case Op_MinReductionV:
1575 switch (typ) {
1576 case T_BYTE: vpminsb(dst, src1, src2, vector_len); break;
1577 case T_SHORT: vpminsw(dst, src1, src2, vector_len); break;
1578 case T_INT: vpminsd(dst, src1, src2, vector_len); break;
1579 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1579, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1580 vpminsq(dst, src1, src2, vector_len); break;
1581 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1581, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1582 }
1583 break;
1584 case Op_MaxReductionV:
1585 switch (typ) {
1586 case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break;
1587 case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break;
1588 case T_INT: vpmaxsd(dst, src1, src2, vector_len); break;
1589 case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1589, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
1590 vpmaxsq(dst, src1, src2, vector_len); break;
1591 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1591, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1592 }
1593 break;
1594 case Op_AddReductionVI:
1595 switch (typ) {
1596 case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
1597 case T_SHORT: vpaddw(dst, src1, src2, vector_len); break;
1598 case T_INT: vpaddd(dst, src1, src2, vector_len); break;
1599 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1599, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1600 }
1601 break;
1602 case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break;
1603 case Op_MulReductionVI:
1604 switch (typ) {
1605 case T_SHORT: vpmullw(dst, src1, src2, vector_len); break;
1606 case T_INT: vpmulld(dst, src1, src2, vector_len); break;
1607 default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1607, "assert(" "false" ") failed", "wrong type"); ::breakpoint
(); } } while (0)
;
1608 }
1609 break;
1610 case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break;
1611 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1611, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1612 }
1613}
1614
1615void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
1616 XMMRegister dst, XMMRegister src,
1617 XMMRegister vtmp1, XMMRegister vtmp2) {
1618 switch (opcode) {
1619 case Op_AddReductionVF:
1620 case Op_MulReductionVF:
1621 reduceF(opcode, vlen, dst, src, vtmp1, vtmp2);
1622 break;
1623
1624 case Op_AddReductionVD:
1625 case Op_MulReductionVD:
1626 reduceD(opcode, vlen, dst, src, vtmp1, vtmp2);
1627 break;
1628
1629 default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1629, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint
(); } } while (0)
;
1630 }
1631}
1632
1633void C2_MacroAssembler::reduceB(int opcode, int vlen,
1634 Register dst, Register src1, XMMRegister src2,
1635 XMMRegister vtmp1, XMMRegister vtmp2) {
1636 switch (vlen) {
1637 case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1638 case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1639 case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1640 case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1641
1642 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1642, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1643 }
1644}
1645
1646void C2_MacroAssembler::mulreduceB(int opcode, int vlen,
1647 Register dst, Register src1, XMMRegister src2,
1648 XMMRegister vtmp1, XMMRegister vtmp2) {
1649 switch (vlen) {
1650 case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1651 case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1652 case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1653 case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1654
1655 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1655, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1656 }
1657}
1658
1659void C2_MacroAssembler::reduceS(int opcode, int vlen,
1660 Register dst, Register src1, XMMRegister src2,
1661 XMMRegister vtmp1, XMMRegister vtmp2) {
1662 switch (vlen) {
1663 case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1664 case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1665 case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1666 case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1667
1668 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1668, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1669 }
1670}
1671
1672void C2_MacroAssembler::reduceI(int opcode, int vlen,
1673 Register dst, Register src1, XMMRegister src2,
1674 XMMRegister vtmp1, XMMRegister vtmp2) {
1675 switch (vlen) {
1676 case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1677 case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1678 case 8: reduce8I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
1679 case 16: reduce16I(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1680
1681 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1681, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1682 }
1683}
1684
1685#ifdef _LP641
1686void C2_MacroAssembler::reduceL(int opcode, int vlen,
1687 Register dst, Register src1, XMMRegister src2,
1688 XMMRegister vtmp1, XMMRegister vtmp2) {
1689 switch (vlen) {
1690 case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1691 case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1692 case 8: reduce8L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
1693
1694 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1694, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1695 }
1696}
1697#endif // _LP64
1698
1699void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1700 switch (vlen) {
1701 case 2:
1702 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1702, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1703 reduce2F(opcode, dst, src, vtmp1);
1704 break;
1705 case 4:
1706 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1706, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1707 reduce4F(opcode, dst, src, vtmp1);
1708 break;
1709 case 8:
1710 reduce8F(opcode, dst, src, vtmp1, vtmp2);
1711 break;
1712 case 16:
1713 reduce16F(opcode, dst, src, vtmp1, vtmp2);
1714 break;
1715 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1715, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1716 }
1717}
1718
1719void C2_MacroAssembler::reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1720 switch (vlen) {
1721 case 2:
1722 assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1722, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint
(); } } while (0)
;
1723 reduce2D(opcode, dst, src, vtmp1);
1724 break;
1725 case 4:
1726 reduce4D(opcode, dst, src, vtmp1, vtmp2);
1727 break;
1728 case 8:
1729 reduce8D(opcode, dst, src, vtmp1, vtmp2);
1730 break;
1731 default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1731, "assert(" "false" ") failed", "wrong vector length");
::breakpoint(); } } while (0)
;
1732 }
1733}
1734
1735void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1736 if (opcode == Op_AddReductionVI) {
1737 if (vtmp1 != src2) {
1738 movdqu(vtmp1, src2);
1739 }
1740 phaddd(vtmp1, vtmp1);
1741 } else {
1742 pshufd(vtmp1, src2, 0x1);
1743 reduce_operation_128(T_INT, opcode, vtmp1, src2);
1744 }
1745 movdl(vtmp2, src1);
1746 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1747 movdl(dst, vtmp1);
1748}
1749
1750void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1751 if (opcode == Op_AddReductionVI) {
1752 if (vtmp1 != src2) {
1753 movdqu(vtmp1, src2);
1754 }
1755 phaddd(vtmp1, src2);
1756 reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1757 } else {
1758 pshufd(vtmp2, src2, 0xE);
1759 reduce_operation_128(T_INT, opcode, vtmp2, src2);
1760 reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1761 }
1762}
1763
1764void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1765 if (opcode == Op_AddReductionVI) {
1766 vphaddd(vtmp1, src2, src2, Assembler::AVX_256bit);
1767 vextracti128_high(vtmp2, vtmp1);
1768 vpaddd(vtmp1, vtmp1, vtmp2, Assembler::AVX_128bit);
1769 reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1770 } else {
1771 vextracti128_high(vtmp1, src2);
1772 reduce_operation_128(T_INT, opcode, vtmp1, src2);
1773 reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1774 }
1775}
1776
1777void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1778 vextracti64x4_high(vtmp2, src2);
1779 reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2);
1780 reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1781}
1782
1783void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1784 pshufd(vtmp2, src2, 0x1);
1785 reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
1786 movdqu(vtmp1, vtmp2);
1787 psrldq(vtmp1, 2);
1788 reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
1789 movdqu(vtmp2, vtmp1);
1790 psrldq(vtmp2, 1);
1791 reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
1792 movdl(vtmp2, src1);
1793 pmovsxbd(vtmp1, vtmp1);
1794 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1795 pextrb(dst, vtmp1, 0x0);
1796 movsbl(dst, dst);
1797}
1798
1799void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1800 pshufd(vtmp1, src2, 0xE);
1801 reduce_operation_128(T_BYTE, opcode, vtmp1, src2);
1802 reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1803}
1804
1805void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1806 vextracti128_high(vtmp2, src2);
1807 reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
1808 reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1809}
1810
1811void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1812 vextracti64x4_high(vtmp1, src2);
1813 reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2);
1814 reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1815}
1816
1817void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1818 pmovsxbw(vtmp2, src2);
1819 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1820}
1821
1822void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1823 if (UseAVX > 1) {
1824 int vector_len = Assembler::AVX_256bit;
1825 vpmovsxbw(vtmp1, src2, vector_len);
1826 reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1827 } else {
1828 pmovsxbw(vtmp2, src2);
1829 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1830 pshufd(vtmp2, src2, 0x1);
1831 pmovsxbw(vtmp2, src2);
1832 reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1833 }
1834}
1835
1836void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1837 if (UseAVX > 2 && VM_Version::supports_avx512bw()) {
1838 int vector_len = Assembler::AVX_512bit;
1839 vpmovsxbw(vtmp1, src2, vector_len);
1840 reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1841 } else {
1842 assert(UseAVX >= 2,"Should not reach here.")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1842, "assert(" "UseAVX >= 2" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
1843 mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2);
1844 vextracti128_high(vtmp2, src2);
1845 mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1846 }
1847}
1848
1849void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1850 mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2);
1851 vextracti64x4_high(vtmp2, src2);
1852 mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
1853}
1854
1855void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1856 if (opcode == Op_AddReductionVI) {
1857 if (vtmp1 != src2) {
1858 movdqu(vtmp1, src2);
1859 }
1860 phaddw(vtmp1, vtmp1);
1861 phaddw(vtmp1, vtmp1);
1862 } else {
1863 pshufd(vtmp2, src2, 0x1);
1864 reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
1865 movdqu(vtmp1, vtmp2);
1866 psrldq(vtmp1, 2);
1867 reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
1868 }
1869 movdl(vtmp2, src1);
1870 pmovsxwd(vtmp1, vtmp1);
1871 reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
1872 pextrw(dst, vtmp1, 0x0);
1873 movswl(dst, dst);
1874}
1875
1876void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1877 if (opcode == Op_AddReductionVI) {
1878 if (vtmp1 != src2) {
1879 movdqu(vtmp1, src2);
1880 }
1881 phaddw(vtmp1, src2);
1882 } else {
1883 pshufd(vtmp1, src2, 0xE);
1884 reduce_operation_128(T_SHORT, opcode, vtmp1, src2);
1885 }
1886 reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1887}
1888
1889void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1890 if (opcode == Op_AddReductionVI) {
1891 int vector_len = Assembler::AVX_256bit;
1892 vphaddw(vtmp2, src2, src2, vector_len);
1893 vpermq(vtmp2, vtmp2, 0xD8, vector_len);
1894 } else {
1895 vextracti128_high(vtmp2, src2);
1896 reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
1897 }
1898 reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1899}
1900
1901void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1902 int vector_len = Assembler::AVX_256bit;
1903 vextracti64x4_high(vtmp1, src2);
1904 reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2);
1905 reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1906}
1907
1908#ifdef _LP641
1909void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1910 pshufd(vtmp2, src2, 0xE);
1911 reduce_operation_128(T_LONG, opcode, vtmp2, src2);
1912 movdq(vtmp1, src1);
1913 reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2);
1914 movdq(dst, vtmp1);
1915}
1916
1917void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1918 vextracti128_high(vtmp1, src2);
1919 reduce_operation_128(T_LONG, opcode, vtmp1, src2);
1920 reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
1921}
1922
1923void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
1924 vextracti64x4_high(vtmp2, src2);
1925 reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2);
1926 reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
1927}
1928
1929void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
1930 assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid")do { if (!(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize
<= 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 1930, "assert(" "ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64"
") failed", "invalid"); ::breakpoint(); } } while (0)
;
1931 mov64(temp, -1L);
1932 bzhiq(temp, temp, len);
1933 kmovql(dst, temp);
1934}
1935#endif // _LP64
1936
1937void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1938 reduce_operation_128(T_FLOAT, opcode, dst, src);
1939 pshufd(vtmp, src, 0x1);
1940 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1941}
1942
1943void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1944 reduce2F(opcode, dst, src, vtmp);
1945 pshufd(vtmp, src, 0x2);
1946 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1947 pshufd(vtmp, src, 0x3);
1948 reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
1949}
1950
1951void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1952 reduce4F(opcode, dst, src, vtmp2);
1953 vextractf128_high(vtmp2, src);
1954 reduce4F(opcode, dst, vtmp2, vtmp1);
1955}
1956
1957void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1958 reduce8F(opcode, dst, src, vtmp1, vtmp2);
1959 vextracti64x4_high(vtmp1, src);
1960 reduce8F(opcode, dst, vtmp1, vtmp1, vtmp2);
1961}
1962
1963void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
1964 reduce_operation_128(T_DOUBLE, opcode, dst, src);
1965 pshufd(vtmp, src, 0xE);
1966 reduce_operation_128(T_DOUBLE, opcode, dst, vtmp);
1967}
1968
1969void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1970 reduce2D(opcode, dst, src, vtmp2);
1971 vextractf128_high(vtmp2, src);
1972 reduce2D(opcode, dst, vtmp2, vtmp1);
1973}
1974
1975void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
1976 reduce4D(opcode, dst, src, vtmp1, vtmp2);
1977 vextracti64x4_high(vtmp1, src);
1978 reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
1979}
1980
1981void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) {
1982 MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
1983}
1984
1985void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) {
1986 MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len);
1987}
1988
1989
1990void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
1991 XMMRegister dst, XMMRegister src,
1992 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
1993 XMMRegister xmm_0, XMMRegister xmm_1) {
1994 int permconst[] = {1, 14};
1995 XMMRegister wsrc = src;
1996 XMMRegister wdst = xmm_0;
1997 XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
1998
1999 int vlen_enc = Assembler::AVX_128bit;
2000 if (vlen == 16) {
2001 vlen_enc = Assembler::AVX_256bit;
2002 }
2003
2004 for (int i = log2(vlen) - 1; i >=0; i--) {
2005 if (i == 0 && !is_dst_valid) {
2006 wdst = dst;
2007 }
2008 if (i == 3) {
2009 vextracti64x4_high(wtmp, wsrc);
2010 } else if (i == 2) {
2011 vextracti128_high(wtmp, wsrc);
2012 } else { // i = [0,1]
2013 vpermilps(wtmp, wsrc, permconst[i], vlen_enc);
2014 }
2015 vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2016 wsrc = wdst;
2017 vlen_enc = Assembler::AVX_128bit;
2018 }
2019 if (is_dst_valid) {
2020 vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2021 }
2022}
2023
2024void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src,
2025 XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
2026 XMMRegister xmm_0, XMMRegister xmm_1) {
2027 XMMRegister wsrc = src;
2028 XMMRegister wdst = xmm_0;
2029 XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
2030 int vlen_enc = Assembler::AVX_128bit;
2031 if (vlen == 8) {
2032 vlen_enc = Assembler::AVX_256bit;
2033 }
2034 for (int i = log2(vlen) - 1; i >=0; i--) {
2035 if (i == 0 && !is_dst_valid) {
2036 wdst = dst;
2037 }
2038 if (i == 1) {
2039 vextracti128_high(wtmp, wsrc);
2040 } else if (i == 2) {
2041 vextracti64x4_high(wtmp, wsrc);
2042 } else {
2043 assert(i == 0, "%d", i)do { if (!(i == 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2043, "assert(" "i == 0" ") failed", "%d", i); ::breakpoint
(); } } while (0)
;
2044 vpermilpd(wtmp, wsrc, 1, vlen_enc);
2045 }
2046 vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2047 wsrc = wdst;
2048 vlen_enc = Assembler::AVX_128bit;
2049 }
2050 if (is_dst_valid) {
2051 vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2052 }
2053}
2054
2055void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) {
2056 switch (bt) {
2057 case T_BYTE: pextrb(dst, src, idx); break;
2058 case T_SHORT: pextrw(dst, src, idx); break;
2059 case T_INT: pextrd(dst, src, idx); break;
2060 case T_LONG: pextrq(dst, src, idx); break;
2061
2062 default:
2063 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2063, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2064 break;
2065 }
2066}
2067
2068XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) {
2069 int esize = type2aelembytes(typ);
2070 int elem_per_lane = 16/esize;
2071 int lane = elemindex / elem_per_lane;
2072 int eindex = elemindex % elem_per_lane;
Value stored to 'eindex' during its initialization is never read
2073
2074 if (lane >= 2) {
2075 assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2075, "assert(" "UseAVX > 2" ") failed", "required"); ::
breakpoint(); } } while (0)
;
2076 vextractf32x4(dst, src, lane & 3);
2077 return dst;
2078 } else if (lane > 0) {
2079 assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2079, "assert(" "UseAVX > 0" ") failed", "required"); ::
breakpoint(); } } while (0)
;
2080 vextractf128(dst, src, lane);
2081 return dst;
2082 } else {
2083 return src;
2084 }
2085}
2086
2087void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) {
2088 int esize = type2aelembytes(typ);
2089 int elem_per_lane = 16/esize;
2090 int eindex = elemindex % elem_per_lane;
2091 assert(is_integral_type(typ),"required")do { if (!(is_integral_type(typ))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2091, "assert(" "is_integral_type(typ)" ") failed", "required"
); ::breakpoint(); } } while (0)
;
2092
2093 if (eindex == 0) {
2094 if (typ == T_LONG) {
2095 movq(dst, src);
2096 } else {
2097 movdl(dst, src);
2098 if (typ == T_BYTE)
2099 movsbl(dst, dst);
2100 else if (typ == T_SHORT)
2101 movswl(dst, dst);
2102 }
2103 } else {
2104 extract(typ, dst, src, eindex);
2105 }
2106}
2107
2108void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) {
2109 int esize = type2aelembytes(typ);
2110 int elem_per_lane = 16/esize;
2111 int eindex = elemindex % elem_per_lane;
2112 assert((typ == T_FLOAT || typ == T_DOUBLE),"required")do { if (!((typ == T_FLOAT || typ == T_DOUBLE))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2112, "assert(" "(typ == T_FLOAT || typ == T_DOUBLE)" ") failed"
, "required"); ::breakpoint(); } } while (0)
;
2113
2114 if (eindex == 0) {
2115 movq(dst, src);
2116 } else {
2117 if (typ == T_FLOAT) {
2118 if (UseAVX == 0) {
2119 movdqu(dst, src);
2120 pshufps(dst, dst, eindex);
2121 } else {
2122 vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
2123 }
2124 } else {
2125 if (UseAVX == 0) {
2126 movdqu(dst, src);
2127 psrldq(dst, eindex*esize);
2128 } else {
2129 vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit);
2130 }
2131 movq(dst, dst);
2132 }
2133 }
2134 // Zero upper bits
2135 if (typ == T_FLOAT) {
2136 if (UseAVX == 0) {
2137 assert((vtmp != xnoreg) && (tmp != noreg), "required.")do { if (!((vtmp != xnoreg) && (tmp != noreg))) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2137, "assert(" "(vtmp != xnoreg) && (tmp != noreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2138 movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp);
2139 pand(dst, vtmp);
2140 } else {
2141 assert((tmp != noreg), "required.")do { if (!((tmp != noreg))) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2141, "assert(" "(tmp != noreg)" ") failed", "required."); ::
breakpoint(); } } while (0)
;
2142 vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp);
2143 }
2144 }
2145}
2146
2147void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len) {
2148 switch(typ) {
2149 case T_BYTE:
2150 case T_BOOLEAN:
2151 evpcmpb(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2152 break;
2153 case T_SHORT:
2154 case T_CHAR:
2155 evpcmpw(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2156 break;
2157 case T_INT:
2158 case T_FLOAT:
2159 evpcmpd(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2160 break;
2161 case T_LONG:
2162 case T_DOUBLE:
2163 evpcmpq(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len);
2164 break;
2165 default:
2166 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2166, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2167 break;
2168 }
2169}
2170
2171void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
2172 switch(typ) {
2173 case T_BOOLEAN:
2174 case T_BYTE:
2175 evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2176 break;
2177 case T_CHAR:
2178 case T_SHORT:
2179 evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2180 break;
2181 case T_INT:
2182 case T_FLOAT:
2183 evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2184 break;
2185 case T_LONG:
2186 case T_DOUBLE:
2187 evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch);
2188 break;
2189 default:
2190 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2190, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2191 break;
2192 }
2193}
2194
2195void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison,
2196 int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) {
2197 int vlen_enc = vector_length_encoding(vlen_in_bytes*2);
2198 switch (typ) {
2199 case T_BYTE:
2200 vpmovzxbw(vtmp1, src1, vlen_enc);
2201 vpmovzxbw(vtmp2, src2, vlen_enc);
2202 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2203 vpacksswb(dst, dst, dst, vlen_enc);
2204 break;
2205 case T_SHORT:
2206 vpmovzxwd(vtmp1, src1, vlen_enc);
2207 vpmovzxwd(vtmp2, src2, vlen_enc);
2208 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2209 vpackssdw(dst, dst, dst, vlen_enc);
2210 break;
2211 case T_INT:
2212 vpmovzxdq(vtmp1, src1, vlen_enc);
2213 vpmovzxdq(vtmp2, src2, vlen_enc);
2214 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2215 vpermilps(dst, dst, 8, vlen_enc);
2216 break;
2217 default:
2218 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2218, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
2219 }
2220 if (vlen_in_bytes == 16) {
2221 vpermpd(dst, dst, 0x8, vlen_enc);
2222 }
2223}
2224
2225void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes,
2226 XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) {
2227 int vlen_enc = vector_length_encoding(vlen_in_bytes);
2228 switch (typ) {
2229 case T_BYTE:
2230 vpmovzxbw(vtmp1, src1, vlen_enc);
2231 vpmovzxbw(vtmp2, src2, vlen_enc);
2232 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2233 vextracti128(vtmp1, src1, 1);
2234 vextracti128(vtmp2, src2, 1);
2235 vpmovzxbw(vtmp1, vtmp1, vlen_enc);
2236 vpmovzxbw(vtmp2, vtmp2, vlen_enc);
2237 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch);
2238 vpacksswb(dst, dst, vtmp3, vlen_enc);
2239 vpermpd(dst, dst, 0xd8, vlen_enc);
2240 break;
2241 case T_SHORT:
2242 vpmovzxwd(vtmp1, src1, vlen_enc);
2243 vpmovzxwd(vtmp2, src2, vlen_enc);
2244 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2245 vextracti128(vtmp1, src1, 1);
2246 vextracti128(vtmp2, src2, 1);
2247 vpmovzxwd(vtmp1, vtmp1, vlen_enc);
2248 vpmovzxwd(vtmp2, vtmp2, vlen_enc);
2249 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch);
2250 vpackssdw(dst, dst, vtmp3, vlen_enc);
2251 vpermpd(dst, dst, 0xd8, vlen_enc);
2252 break;
2253 case T_INT:
2254 vpmovzxdq(vtmp1, src1, vlen_enc);
2255 vpmovzxdq(vtmp2, src2, vlen_enc);
2256 vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2257 vpshufd(dst, dst, 8, vlen_enc);
2258 vpermq(dst, dst, 8, vlen_enc);
2259 vextracti128(vtmp1, src1, 1);
2260 vextracti128(vtmp2, src2, 1);
2261 vpmovzxdq(vtmp1, vtmp1, vlen_enc);
2262 vpmovzxdq(vtmp2, vtmp2, vlen_enc);
2263 vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch);
2264 vpshufd(vtmp3, vtmp3, 8, vlen_enc);
2265 vpermq(vtmp3, vtmp3, 0x80, vlen_enc);
2266 vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc);
2267 break;
2268 default:
2269 assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2269, "assert(" "false" ") failed", "Should not reach here"
); ::breakpoint(); } } while (0)
;
2270 }
2271}
2272
2273void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
2274 switch(typ) {
2275 case T_BYTE:
2276 evpblendmb(dst, kmask, src1, src2, merge, vector_len);
2277 break;
2278 case T_SHORT:
2279 evpblendmw(dst, kmask, src1, src2, merge, vector_len);
2280 break;
2281 case T_INT:
2282 case T_FLOAT:
2283 evpblendmd(dst, kmask, src1, src2, merge, vector_len);
2284 break;
2285 case T_LONG:
2286 case T_DOUBLE:
2287 evpblendmq(dst, kmask, src1, src2, merge, vector_len);
2288 break;
2289 default:
2290 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2290, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2291 break;
2292 }
2293}
2294
2295void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
2296 XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
2297 switch(vlen) {
2298 case 4:
2299 assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2299, "assert(" "vtmp1 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2300 // Broadcast lower 32 bits to 128 bits before ptest
2301 pshufd(vtmp1, src1, 0x0);
2302 if (bt == BoolTest::overflow) {
2303 assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2303, "assert(" "vtmp2 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2304 pshufd(vtmp2, src2, 0x0);
2305 } else {
2306 assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2306, "assert(" "vtmp2 == xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2307 vtmp2 = src2;
2308 }
2309 ptest(vtmp1, vtmp2);
2310 break;
2311 case 8:
2312 assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2312, "assert(" "vtmp1 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2313 // Broadcast lower 64 bits to 128 bits before ptest
2314 pshufd(vtmp1, src1, 0x4);
2315 if (bt == BoolTest::overflow) {
2316 assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2316, "assert(" "vtmp2 != xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2317 pshufd(vtmp2, src2, 0x4);
2318 } else {
2319 assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2319, "assert(" "vtmp2 == xnoreg" ") failed", "required.");
::breakpoint(); } } while (0)
;
2320 vtmp2 = src2;
2321 }
2322 ptest(vtmp1, vtmp2);
2323 break;
2324 case 16:
2325 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2325, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2326 ptest(src1, src2);
2327 break;
2328 case 32:
2329 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2329, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2330 vptest(src1, src2, Assembler::AVX_256bit);
2331 break;
2332 case 64:
2333 {
2334 assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) {
(*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2334, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)"
") failed", "required."); ::breakpoint(); } } while (0)
;
2335 evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
2336 if (bt == BoolTest::ne) {
2337 ktestql(mask, mask);
2338 } else {
2339 assert(bt == BoolTest::overflow, "required")do { if (!(bt == BoolTest::overflow)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2339, "assert(" "bt == BoolTest::overflow" ") failed", "required"
); ::breakpoint(); } } while (0)
;
2340 kortestql(mask, mask);
2341 }
2342 }
2343 break;
2344 default:
2345 assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2345, "assert(" "false" ") failed", "Should not reach here."
); ::breakpoint(); } } while (0)
;
2346 break;
2347 }
2348}
2349
2350//-------------------------------------------------------------------------------------------
2351
2352// IndexOf for constant substrings with size >= 8 chars
2353// which don't need to be loaded through stack.
2354void C2_MacroAssembler::string_indexofC8(Register str1, Register str2,
2355 Register cnt1, Register cnt2,
2356 int int_cnt2, Register result,
2357 XMMRegister vec, Register tmp,
2358 int ae) {
2359 ShortBranchVerifier sbv(this);
2360 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2360, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2361 assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2361, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding"
); ::breakpoint(); } } while (0)
;
2362
2363 // This method uses the pcmpestri instruction with bound registers
2364 // inputs:
2365 // xmm - substring
2366 // rax - substring length (elements count)
2367 // mem - scanned string
2368 // rdx - string length (elements count)
2369 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
2370 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
2371 // outputs:
2372 // rcx - matched index in string
2373 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2373, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2374 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
2375 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
2376 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
2377 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
2378
2379 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
2380 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
2381 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
2382
2383 // Note, inline_string_indexOf() generates checks:
2384 // if (substr.count > string.count) return -1;
2385 // if (substr.count == 0) return 0;
2386 assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars")do { if (!(int_cnt2 >= stride)) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2386, "assert(" "int_cnt2 >= stride" ") failed", "this code is used only for cnt2 >= 8 chars"
); ::breakpoint(); } } while (0)
;
2387
2388 // Load substring.
2389 if (ae == StrIntrinsicNode::UL) {
2390 pmovzxbw(vec, Address(str2, 0));
2391 } else {
2392 movdqu(vec, Address(str2, 0));
2393 }
2394 movl(cnt2, int_cnt2);
2395 movptr(result, str1); // string addr
2396
2397 if (int_cnt2 > stride) {
2398 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2398)
;
2399
2400 // Reload substr for rescan, this code
2401 // is executed only for large substrings (> 8 chars)
2402 bind(RELOAD_SUBSTR);
2403 if (ae == StrIntrinsicNode::UL) {
2404 pmovzxbw(vec, Address(str2, 0));
2405 } else {
2406 movdqu(vec, Address(str2, 0));
2407 }
2408 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
2409
2410 bind(RELOAD_STR);
2411 // We came here after the beginning of the substring was
2412 // matched but the rest of it was not so we need to search
2413 // again. Start from the next element after the previous match.
2414
2415 // cnt2 is number of substring reminding elements and
2416 // cnt1 is number of string reminding elements when cmp failed.
2417 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
2418 subl(cnt1, cnt2);
2419 addl(cnt1, int_cnt2);
2420 movl(cnt2, int_cnt2); // Now restore cnt2
2421
2422 decrementl(cnt1); // Shift to next element
2423 cmpl(cnt1, cnt2);
2424 jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring
2425
2426 addptr(result, (1<<scale1));
2427
2428 } // (int_cnt2 > 8)
2429
2430 // Scan string for start of substr in 16-byte vectors
2431 bind(SCAN_TO_SUBSTR);
2432 pcmpestri(vec, Address(result, 0), mode);
2433 jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2433)
; // CF == 1
2434 subl(cnt1, stride);
2435 jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2435)
; // Scanned full string
2436 cmpl(cnt1, cnt2);
2437 jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2437)
; // Left less then substring
2438 addptr(result, 16);
2439 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2439)
;
2440
2441 // Found a potential substr
2442 bind(FOUND_CANDIDATE);
2443 // Matched whole vector if first element matched (tmp(rcx) == 0).
2444 if (int_cnt2 == stride) {
2445 jccb(Assembler::overflow, RET_FOUND)jccb_0(Assembler::overflow, RET_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2445)
; // OF == 1
2446 } else { // int_cnt2 > 8
2447 jccb(Assembler::overflow, FOUND_SUBSTR)jccb_0(Assembler::overflow, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2447)
;
2448 }
2449 // After pcmpestri tmp(rcx) contains matched element index
2450 // Compute start addr of substr
2451 lea(result, Address(result, tmp, scale1));
2452
2453 // Make sure string is still long enough
2454 subl(cnt1, tmp);
2455 cmpl(cnt1, cnt2);
2456 if (int_cnt2 == stride) {
2457 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2457)
;
2458 } else { // int_cnt2 > 8
2459 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD)jccb_0(Assembler::greaterEqual, MATCH_SUBSTR_HEAD, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2459)
;
2460 }
2461 // Left less then substring.
2462
2463 bind(RET_NOT_FOUND);
2464 movl(result, -1);
2465 jmp(EXIT);
2466
2467 if (int_cnt2 > stride) {
2468 // This code is optimized for the case when whole substring
2469 // is matched if its head is matched.
2470 bind(MATCH_SUBSTR_HEAD);
2471 pcmpestri(vec, Address(result, 0), mode);
2472 // Reload only string if does not match
2473 jcc(Assembler::noOverflow, RELOAD_STR); // OF == 0
2474
2475 Label CONT_SCAN_SUBSTR;
2476 // Compare the rest of substring (> 8 chars).
2477 bind(FOUND_SUBSTR);
2478 // First 8 chars are already matched.
2479 negptr(cnt2);
2480 addptr(cnt2, stride);
2481
2482 bind(SCAN_SUBSTR);
2483 subl(cnt1, stride);
2484 cmpl(cnt2, -stride); // Do not read beyond substring
2485 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::lessEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2485)
;
2486 // Back-up strings to avoid reading beyond substring:
2487 // cnt1 = cnt1 - cnt2 + 8
2488 addl(cnt1, cnt2); // cnt2 is negative
2489 addl(cnt1, stride);
2490 movl(cnt2, stride); negptr(cnt2);
2491 bind(CONT_SCAN_SUBSTR);
2492 if (int_cnt2 < (int)G) {
2493 int tail_off1 = int_cnt2<<scale1;
2494 int tail_off2 = int_cnt2<<scale2;
2495 if (ae == StrIntrinsicNode::UL) {
2496 pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
2497 } else {
2498 movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
2499 }
2500 pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
2501 } else {
2502 // calculate index in register to avoid integer overflow (int_cnt2*2)
2503 movl(tmp, int_cnt2);
2504 addptr(tmp, cnt2);
2505 if (ae == StrIntrinsicNode::UL) {
2506 pmovzxbw(vec, Address(str2, tmp, scale2, 0));
2507 } else {
2508 movdqu(vec, Address(str2, tmp, scale2, 0));
2509 }
2510 pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
2511 }
2512 // Need to reload strings pointers if not matched whole vector
2513 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
2514 addptr(cnt2, stride);
2515 jcc(Assembler::negative, SCAN_SUBSTR);
2516 // Fall through if found full substring
2517
2518 } // (int_cnt2 > 8)
2519
2520 bind(RET_FOUND);
2521 // Found result if we matched full small substring.
2522 // Compute substr offset
2523 subptr(result, str1);
2524 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2525 shrl(result, 1); // index
2526 }
2527 bind(EXIT);
2528
2529} // string_indexofC8
2530
2531// Small strings are loaded through stack if they cross page boundary.
2532void C2_MacroAssembler::string_indexof(Register str1, Register str2,
2533 Register cnt1, Register cnt2,
2534 int int_cnt2, Register result,
2535 XMMRegister vec, Register tmp,
2536 int ae) {
2537 ShortBranchVerifier sbv(this);
2538 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2538, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2539 assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2539, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding"
); ::breakpoint(); } } while (0)
;
2540
2541 //
2542 // int_cnt2 is length of small (< 8 chars) constant substring
2543 // or (-1) for non constant substring in which case its length
2544 // is in cnt2 register.
2545 //
2546 // Note, inline_string_indexOf() generates checks:
2547 // if (substr.count > string.count) return -1;
2548 // if (substr.count == 0) return 0;
2549 //
2550 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
2551 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0")do { if (!(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2
< stride))) { (*g_assert_poison) = 'X';; report_vm_error(
"/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2551, "assert(" "int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride)"
") failed", "should be != 0"); ::breakpoint(); } } while (0)
;
2552 // This method uses the pcmpestri instruction with bound registers
2553 // inputs:
2554 // xmm - substring
2555 // rax - substring length (elements count)
2556 // mem - scanned string
2557 // rdx - string length (elements count)
2558 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
2559 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
2560 // outputs:
2561 // rcx - matched index in string
2562 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2562, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2563 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
2564 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
2565 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
2566
2567 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
2568 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
2569 FOUND_CANDIDATE;
2570
2571 { //========================================================
2572 // We don't know where these strings are located
2573 // and we can't read beyond them. Load them through stack.
2574 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
2575
2576 movptr(tmp, rsp); // save old SP
2577
2578 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
2579 if (int_cnt2 == (1>>scale2)) { // One byte
2580 assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding")do { if (!((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode
::UL))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2580, "assert(" "(ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL)"
") failed", "Only possible for latin1 encoding"); ::breakpoint
(); } } while (0)
;
2581 load_unsigned_byte(result, Address(str2, 0));
2582 movdl(vec, result); // move 32 bits
2583 } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes
2584 // Not enough header space in 32-bit VM: 12+3 = 15.
2585 movl(result, Address(str2, -1));
2586 shrl(result, 8);
2587 movdl(vec, result); // move 32 bits
2588 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char
2589 load_unsigned_short(result, Address(str2, 0));
2590 movdl(vec, result); // move 32 bits
2591 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
2592 movdl(vec, Address(str2, 0)); // move 32 bits
2593 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
2594 movq(vec, Address(str2, 0)); // move 64 bits
2595 } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
2596 // Array header size is 12 bytes in 32-bit VM
2597 // + 6 bytes for 3 chars == 18 bytes,
2598 // enough space to load vec and shift.
2599 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity")do { if (!(HeapWordSize*TypeArrayKlass::header_size() >= 12
)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2599, "assert(" "HeapWordSize*TypeArrayKlass::header_size() >= 12"
") failed", "sanity"); ::breakpoint(); } } while (0)
;
2600 if (ae == StrIntrinsicNode::UL) {
2601 int tail_off = int_cnt2-8;
2602 pmovzxbw(vec, Address(str2, tail_off));
2603 psrldq(vec, -2*tail_off);
2604 }
2605 else {
2606 int tail_off = int_cnt2*(1<<scale2);
2607 movdqu(vec, Address(str2, tail_off-16));
2608 psrldq(vec, 16-tail_off);
2609 }
2610 }
2611 } else { // not constant substring
2612 cmpl(cnt2, stride);
2613 jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2613)
; // Both strings are big enough
2614
2615 // We can read beyond string if srt+16 does not cross page boundary
2616 // since heaps are aligned and mapped by pages.
2617 assert(os::vm_page_size() < (int)G, "default page should be small")do { if (!(os::vm_page_size() < (int)G)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2617, "assert(" "os::vm_page_size() < (int)G" ") failed"
, "default page should be small"); ::breakpoint(); } } while (
0)
;
2618 movl(result, str2); // We need only low 32 bits
2619 andl(result, (os::vm_page_size()-1));
2620 cmpl(result, (os::vm_page_size()-16));
2621 jccb(Assembler::belowEqual, CHECK_STR)jccb_0(Assembler::belowEqual, CHECK_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2621)
;
2622
2623 // Move small strings to stack to allow load 16 bytes into vec.
2624 subptr(rsp, 16);
2625 int stk_offset = wordSize-(1<<scale2);
2626 push(cnt2);
2627
2628 bind(COPY_SUBSTR);
2629 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
2630 load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
2631 movb(Address(rsp, cnt2, scale2, stk_offset), result);
2632 } else if (ae == StrIntrinsicNode::UU) {
2633 load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
2634 movw(Address(rsp, cnt2, scale2, stk_offset), result);
2635 }
2636 decrement(cnt2);
2637 jccb(Assembler::notZero, COPY_SUBSTR)jccb_0(Assembler::notZero, COPY_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2637)
;
2638
2639 pop(cnt2);
2640 movptr(str2, rsp); // New substring address
2641 } // non constant
2642
2643 bind(CHECK_STR);
2644 cmpl(cnt1, stride);
2645 jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2645)
;
2646
2647 // Check cross page boundary.
2648 movl(result, str1); // We need only low 32 bits
2649 andl(result, (os::vm_page_size()-1));
2650 cmpl(result, (os::vm_page_size()-16));
2651 jccb(Assembler::belowEqual, BIG_STRINGS)jccb_0(Assembler::belowEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2651)
;
2652
2653 subptr(rsp, 16);
2654 int stk_offset = -(1<<scale1);
2655 if (int_cnt2 < 0) { // not constant
2656 push(cnt2);
2657 stk_offset += wordSize;
2658 }
2659 movl(cnt2, cnt1);
2660
2661 bind(COPY_STR);
2662 if (ae == StrIntrinsicNode::LL) {
2663 load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
2664 movb(Address(rsp, cnt2, scale1, stk_offset), result);
2665 } else {
2666 load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
2667 movw(Address(rsp, cnt2, scale1, stk_offset), result);
2668 }
2669 decrement(cnt2);
2670 jccb(Assembler::notZero, COPY_STR)jccb_0(Assembler::notZero, COPY_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2670)
;
2671
2672 if (int_cnt2 < 0) { // not constant
2673 pop(cnt2);
2674 }
2675 movptr(str1, rsp); // New string address
2676
2677 bind(BIG_STRINGS);
2678 // Load substring.
2679 if (int_cnt2 < 0) { // -1
2680 if (ae == StrIntrinsicNode::UL) {
2681 pmovzxbw(vec, Address(str2, 0));
2682 } else {
2683 movdqu(vec, Address(str2, 0));
2684 }
2685 push(cnt2); // substr count
2686 push(str2); // substr addr
2687 push(str1); // string addr
2688 } else {
2689 // Small (< 8 chars) constant substrings are loaded already.
2690 movl(cnt2, int_cnt2);
2691 }
2692 push(tmp); // original SP
2693
2694 } // Finished loading
2695
2696 //========================================================
2697 // Start search
2698 //
2699
2700 movptr(result, str1); // string addr
2701
2702 if (int_cnt2 < 0) { // Only for non constant substring
2703 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2703)
;
2704
2705 // SP saved at sp+0
2706 // String saved at sp+1*wordSize
2707 // Substr saved at sp+2*wordSize
2708 // Substr count saved at sp+3*wordSize
2709
2710 // Reload substr for rescan, this code
2711 // is executed only for large substrings (> 8 chars)
2712 bind(RELOAD_SUBSTR);
2713 movptr(str2, Address(rsp, 2*wordSize));
2714 movl(cnt2, Address(rsp, 3*wordSize));
2715 if (ae == StrIntrinsicNode::UL) {
2716 pmovzxbw(vec, Address(str2, 0));
2717 } else {
2718 movdqu(vec, Address(str2, 0));
2719 }
2720 // We came here after the beginning of the substring was
2721 // matched but the rest of it was not so we need to search
2722 // again. Start from the next element after the previous match.
2723 subptr(str1, result); // Restore counter
2724 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2725 shrl(str1, 1);
2726 }
2727 addl(cnt1, str1);
2728 decrementl(cnt1); // Shift to next element
2729 cmpl(cnt1, cnt2);
2730 jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring
2731
2732 addptr(result, (1<<scale1));
2733 } // non constant
2734
2735 // Scan string for start of substr in 16-byte vectors
2736 bind(SCAN_TO_SUBSTR);
2737 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp ==
rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2737, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
2738 pcmpestri(vec, Address(result, 0), mode);
2739 jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2739)
; // CF == 1
2740 subl(cnt1, stride);
2741 jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2741)
; // Scanned full string
2742 cmpl(cnt1, cnt2);
2743 jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2743)
; // Left less then substring
2744 addptr(result, 16);
2745
2746 bind(ADJUST_STR);
2747 cmpl(cnt1, stride); // Do not read beyond string
2748 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2748)
;
2749 // Back-up string to avoid reading beyond string.
2750 lea(result, Address(result, cnt1, scale1, -16));
2751 movl(cnt1, stride);
2752 jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2752)
;
2753
2754 // Found a potential substr
2755 bind(FOUND_CANDIDATE);
2756 // After pcmpestri tmp(rcx) contains matched element index
2757
2758 // Make sure string is still long enough
2759 subl(cnt1, tmp);
2760 cmpl(cnt1, cnt2);
2761 jccb(Assembler::greaterEqual, FOUND_SUBSTR)jccb_0(Assembler::greaterEqual, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2761)
;
2762 // Left less then substring.
2763
2764 bind(RET_NOT_FOUND);
2765 movl(result, -1);
2766 jmp(CLEANUP);
2767
2768 bind(FOUND_SUBSTR);
2769 // Compute start addr of substr
2770 lea(result, Address(result, tmp, scale1));
2771 if (int_cnt2 > 0) { // Constant substring
2772 // Repeat search for small substring (< 8 chars)
2773 // from new point without reloading substring.
2774 // Have to check that we don't read beyond string.
2775 cmpl(tmp, stride-int_cnt2);
2776 jccb(Assembler::greater, ADJUST_STR)jccb_0(Assembler::greater, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2776)
;
2777 // Fall through if matched whole substring.
2778 } else { // non constant
2779 assert(int_cnt2 == -1, "should be != 0")do { if (!(int_cnt2 == -1)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2779, "assert(" "int_cnt2 == -1" ") failed", "should be != 0"
); ::breakpoint(); } } while (0)
;
2780
2781 addl(tmp, cnt2);
2782 // Found result if we matched whole substring.
2783 cmpl(tmp, stride);
2784 jcc(Assembler::lessEqual, RET_FOUND);
2785
2786 // Repeat search for small substring (<= 8 chars)
2787 // from new point 'str1' without reloading substring.
2788 cmpl(cnt2, stride);
2789 // Have to check that we don't read beyond string.
2790 jccb(Assembler::lessEqual, ADJUST_STR)jccb_0(Assembler::lessEqual, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2790)
;
2791
2792 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
2793 // Compare the rest of substring (> 8 chars).
2794 movptr(str1, result);
2795
2796 cmpl(tmp, cnt2);
2797 // First 8 chars are already matched.
2798 jccb(Assembler::equal, CHECK_NEXT)jccb_0(Assembler::equal, CHECK_NEXT, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2798)
;
2799
2800 bind(SCAN_SUBSTR);
2801 pcmpestri(vec, Address(str1, 0), mode);
2802 // Need to reload strings pointers if not matched whole vector
2803 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
2804
2805 bind(CHECK_NEXT);
2806 subl(cnt2, stride);
2807 jccb(Assembler::lessEqual, RET_FOUND_LONG)jccb_0(Assembler::lessEqual, RET_FOUND_LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2807)
; // Found full substring
2808 addptr(str1, 16);
2809 if (ae == StrIntrinsicNode::UL) {
2810 addptr(str2, 8);
2811 } else {
2812 addptr(str2, 16);
2813 }
2814 subl(cnt1, stride);
2815 cmpl(cnt2, stride); // Do not read beyond substring
2816 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::greaterEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2816)
;
2817 // Back-up strings to avoid reading beyond substring.
2818
2819 if (ae == StrIntrinsicNode::UL) {
2820 lea(str2, Address(str2, cnt2, scale2, -8));
2821 lea(str1, Address(str1, cnt2, scale1, -16));
2822 } else {
2823 lea(str2, Address(str2, cnt2, scale2, -16));
2824 lea(str1, Address(str1, cnt2, scale1, -16));
2825 }
2826 subl(cnt1, cnt2);
2827 movl(cnt2, stride);
2828 addl(cnt1, stride);
2829 bind(CONT_SCAN_SUBSTR);
2830 if (ae == StrIntrinsicNode::UL) {
2831 pmovzxbw(vec, Address(str2, 0));
2832 } else {
2833 movdqu(vec, Address(str2, 0));
2834 }
2835 jmp(SCAN_SUBSTR);
2836
2837 bind(RET_FOUND_LONG);
2838 movptr(str1, Address(rsp, wordSize));
2839 } // non constant
2840
2841 bind(RET_FOUND);
2842 // Compute substr offset
2843 subptr(result, str1);
2844 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
2845 shrl(result, 1); // index
2846 }
2847 bind(CLEANUP);
2848 pop(rsp); // restore SP
2849
2850} // string_indexof
2851
2852void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
2853 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
2854 ShortBranchVerifier sbv(this);
2855 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2855, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2856
2857 int stride = 8;
2858
2859 Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
2860 SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
2861 RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
2862 FOUND_SEQ_CHAR, DONE_LABEL;
2863
2864 movptr(result, str1);
2865 if (UseAVX >= 2) {
2866 cmpl(cnt1, stride);
2867 jcc(Assembler::less, SCAN_TO_CHAR);
2868 cmpl(cnt1, 2*stride);
2869 jcc(Assembler::less, SCAN_TO_8_CHAR_INIT);
2870 movdl(vec1, ch);
2871 vpbroadcastw(vec1, vec1, Assembler::AVX_256bit);
2872 vpxor(vec2, vec2);
2873 movl(tmp, cnt1);
2874 andl(tmp, 0xFFFFFFF0); //vector count (in chars)
2875 andl(cnt1,0x0000000F); //tail count (in chars)
2876
2877 bind(SCAN_TO_16_CHAR_LOOP);
2878 vmovdqu(vec3, Address(result, 0));
2879 vpcmpeqw(vec3, vec3, vec1, 1);
2880 vptest(vec2, vec3);
2881 jcc(Assembler::carryClear, FOUND_CHAR);
2882 addptr(result, 32);
2883 subl(tmp, 2*stride);
2884 jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
2885 jmp(SCAN_TO_8_CHAR);
2886 bind(SCAN_TO_8_CHAR_INIT);
2887 movdl(vec1, ch);
2888 pshuflw(vec1, vec1, 0x00);
2889 pshufd(vec1, vec1, 0);
2890 pxor(vec2, vec2);
2891 }
2892 bind(SCAN_TO_8_CHAR);
2893 cmpl(cnt1, stride);
2894 jcc(Assembler::less, SCAN_TO_CHAR);
2895 if (UseAVX < 2) {
2896 movdl(vec1, ch);
2897 pshuflw(vec1, vec1, 0x00);
2898 pshufd(vec1, vec1, 0);
2899 pxor(vec2, vec2);
2900 }
2901 movl(tmp, cnt1);
2902 andl(tmp, 0xFFFFFFF8); //vector count (in chars)
2903 andl(cnt1,0x00000007); //tail count (in chars)
2904
2905 bind(SCAN_TO_8_CHAR_LOOP);
2906 movdqu(vec3, Address(result, 0));
2907 pcmpeqw(vec3, vec1);
2908 ptest(vec2, vec3);
2909 jcc(Assembler::carryClear, FOUND_CHAR);
2910 addptr(result, 16);
2911 subl(tmp, stride);
2912 jcc(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
2913 bind(SCAN_TO_CHAR);
2914 testl(cnt1, cnt1);
2915 jcc(Assembler::zero, RET_NOT_FOUND);
2916 bind(SCAN_TO_CHAR_LOOP);
2917 load_unsigned_short(tmp, Address(result, 0));
2918 cmpl(ch, tmp);
2919 jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2919)
;
2920 addptr(result, 2);
2921 subl(cnt1, 1);
2922 jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2922)
;
2923 jmp(SCAN_TO_CHAR_LOOP);
2924
2925 bind(RET_NOT_FOUND);
2926 movl(result, -1);
2927 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2927)
;
2928
2929 bind(FOUND_CHAR);
2930 if (UseAVX >= 2) {
2931 vpmovmskb(tmp, vec3);
2932 } else {
2933 pmovmskb(tmp, vec3);
2934 }
2935 bsfl(ch, tmp);
2936 addptr(result, ch);
2937
2938 bind(FOUND_SEQ_CHAR);
2939 subptr(result, str1);
2940 shrl(result, 1);
2941
2942 bind(DONE_LABEL);
2943} // string_indexof_char
2944
2945void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
2946 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
2947 ShortBranchVerifier sbv(this);
2948 assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 2948, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required"
); ::breakpoint(); } } while (0)
;
2949
2950 int stride = 16;
2951
2952 Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
2953 SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
2954 RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
2955 FOUND_SEQ_CHAR, DONE_LABEL;
2956
2957 movptr(result, str1);
2958 if (UseAVX >= 2) {
2959 cmpl(cnt1, stride);
2960 jcc(Assembler::less, SCAN_TO_CHAR_INIT);
2961 cmpl(cnt1, stride*2);
2962 jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
2963 movdl(vec1, ch);
2964 vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
2965 vpxor(vec2, vec2);
2966 movl(tmp, cnt1);
2967 andl(tmp, 0xFFFFFFE0); //vector count (in chars)
2968 andl(cnt1,0x0000001F); //tail count (in chars)
2969
2970 bind(SCAN_TO_32_CHAR_LOOP);
2971 vmovdqu(vec3, Address(result, 0));
2972 vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
2973 vptest(vec2, vec3);
2974 jcc(Assembler::carryClear, FOUND_CHAR);
2975 addptr(result, 32);
2976 subl(tmp, stride*2);
2977 jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
2978 jmp(SCAN_TO_16_CHAR);
2979
2980 bind(SCAN_TO_16_CHAR_INIT);
2981 movdl(vec1, ch);
2982 pxor(vec2, vec2);
2983 pshufb(vec1, vec2);
2984 }
2985
2986 bind(SCAN_TO_16_CHAR);
2987 cmpl(cnt1, stride);
2988 jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
2989 if (UseAVX < 2) {
2990 movdl(vec1, ch);
2991 pxor(vec2, vec2);
2992 pshufb(vec1, vec2);
2993 }
2994 movl(tmp, cnt1);
2995 andl(tmp, 0xFFFFFFF0); //vector count (in bytes)
2996 andl(cnt1,0x0000000F); //tail count (in bytes)
2997
2998 bind(SCAN_TO_16_CHAR_LOOP);
2999 movdqu(vec3, Address(result, 0));
3000 pcmpeqb(vec3, vec1);
3001 ptest(vec2, vec3);
3002 jcc(Assembler::carryClear, FOUND_CHAR);
3003 addptr(result, 16);
3004 subl(tmp, stride);
3005 jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
3006
3007 bind(SCAN_TO_CHAR_INIT);
3008 testl(cnt1, cnt1);
3009 jcc(Assembler::zero, RET_NOT_FOUND);
3010 bind(SCAN_TO_CHAR_LOOP);
3011 load_unsigned_byte(tmp, Address(result, 0));
3012 cmpl(ch, tmp);
3013 jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3013)
;
3014 addptr(result, 1);
3015 subl(cnt1, 1);
3016 jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3016)
;
3017 jmp(SCAN_TO_CHAR_LOOP);
3018
3019 bind(RET_NOT_FOUND);
3020 movl(result, -1);
3021 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3021)
;
3022
3023 bind(FOUND_CHAR);
3024 if (UseAVX >= 2) {
3025 vpmovmskb(tmp, vec3);
3026 } else {
3027 pmovmskb(tmp, vec3);
3028 }
3029 bsfl(ch, tmp);
3030 addptr(result, ch);
3031
3032 bind(FOUND_SEQ_CHAR);
3033 subptr(result, str1);
3034
3035 bind(DONE_LABEL);
3036} // stringL_indexof_char
3037
3038// helper function for string_compare
3039void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
3040 Address::ScaleFactor scale, Address::ScaleFactor scale1,
3041 Address::ScaleFactor scale2, Register index, int ae) {
3042 if (ae == StrIntrinsicNode::LL) {
3043 load_unsigned_byte(elem1, Address(str1, index, scale, 0));
3044 load_unsigned_byte(elem2, Address(str2, index, scale, 0));
3045 } else if (ae == StrIntrinsicNode::UU) {
3046 load_unsigned_short(elem1, Address(str1, index, scale, 0));
3047 load_unsigned_short(elem2, Address(str2, index, scale, 0));
3048 } else {
3049 load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
3050 load_unsigned_short(elem2, Address(str2, index, scale2, 0));
3051 }
3052}
3053
3054// Compare strings, used for char[] and byte[].
3055void C2_MacroAssembler::string_compare(Register str1, Register str2,
3056 Register cnt1, Register cnt2, Register result,
3057 XMMRegister vec1, int ae, KRegister mask) {
3058 ShortBranchVerifier sbv(this);
3059 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
3060 Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
3061 int stride, stride2, adr_stride, adr_stride1, adr_stride2;
3062 int stride2x2 = 0x40;
3063 Address::ScaleFactor scale = Address::no_scale;
3064 Address::ScaleFactor scale1 = Address::no_scale;
3065 Address::ScaleFactor scale2 = Address::no_scale;
3066
3067 if (ae != StrIntrinsicNode::LL) {
3068 stride2x2 = 0x20;
3069 }
3070
3071 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
3072 shrl(cnt2, 1);
3073 }
3074 // Compute the minimum of the string lengths and the
3075 // difference of the string lengths (stack).
3076 // Do the conditional move stuff
3077 movl(result, cnt1);
3078 subl(cnt1, cnt2);
3079 push(cnt1);
3080 cmov32(Assembler::lessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2)
3081
3082 // Is the minimum length zero?
3083 testl(cnt2, cnt2);
3084 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3085 if (ae == StrIntrinsicNode::LL) {
3086 // Load first bytes
3087 load_unsigned_byte(result, Address(str1, 0)); // result = str1[0]
3088 load_unsigned_byte(cnt1, Address(str2, 0)); // cnt1 = str2[0]
3089 } else if (ae == StrIntrinsicNode::UU) {
3090 // Load first characters
3091 load_unsigned_short(result, Address(str1, 0));
3092 load_unsigned_short(cnt1, Address(str2, 0));
3093 } else {
3094 load_unsigned_byte(result, Address(str1, 0));
3095 load_unsigned_short(cnt1, Address(str2, 0));
3096 }
3097 subl(result, cnt1);
3098 jcc(Assembler::notZero, POP_LABEL);
3099
3100 if (ae == StrIntrinsicNode::UU) {
3101 // Divide length by 2 to get number of chars
3102 shrl(cnt2, 1);
3103 }
3104 cmpl(cnt2, 1);
3105 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3106
3107 // Check if the strings start at the same location and setup scale and stride
3108 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3109 cmpptr(str1, str2);
3110 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
3111 if (ae == StrIntrinsicNode::LL) {
3112 scale = Address::times_1;
3113 stride = 16;
3114 } else {
3115 scale = Address::times_2;
3116 stride = 8;
3117 }
3118 } else {
3119 scale1 = Address::times_1;
3120 scale2 = Address::times_2;
3121 // scale not used
3122 stride = 8;
3123 }
3124
3125 if (UseAVX >= 2 && UseSSE42Intrinsics) {
3126 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
3127 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
3128 Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
3129 Label COMPARE_TAIL_LONG;
3130 Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
3131
3132 int pcmpmask = 0x19;
3133 if (ae == StrIntrinsicNode::LL) {
3134 pcmpmask &= ~0x01;
3135 }
3136
3137 // Setup to compare 16-chars (32-bytes) vectors,
3138 // start from first character again because it has aligned address.
3139 if (ae == StrIntrinsicNode::LL) {
3140 stride2 = 32;
3141 } else {
3142 stride2 = 16;
3143 }
3144 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3145 adr_stride = stride << scale;
3146 } else {
3147 adr_stride1 = 8; //stride << scale1;
3148 adr_stride2 = 16; //stride << scale2;
3149 }
3150
3151 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1
== rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3151, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
3152 // rax and rdx are used by pcmpestri as elements counters
3153 movl(result, cnt2);
3154 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
3155 jcc(Assembler::zero, COMPARE_TAIL_LONG);
3156
3157 // fast path : compare first 2 8-char vectors.
3158 bind(COMPARE_16_CHARS);
3159 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3160 movdqu(vec1, Address(str1, 0));
3161 } else {
3162 pmovzxbw(vec1, Address(str1, 0));
3163 }
3164 pcmpestri(vec1, Address(str2, 0), pcmpmask);
3165 jccb(Assembler::below, COMPARE_INDEX_CHAR)jccb_0(Assembler::below, COMPARE_INDEX_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3165)
;
3166
3167 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3168 movdqu(vec1, Address(str1, adr_stride));
3169 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
3170 } else {
3171 pmovzxbw(vec1, Address(str1, adr_stride1));
3172 pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
3173 }
3174 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS)jccb_0(Assembler::aboveEqual, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3174)
;
3175 addl(cnt1, stride);
3176
3177 // Compare the characters at index in cnt1
3178 bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
3179 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
3180 subl(result, cnt2);
3181 jmp(POP_LABEL);
3182
3183 // Setup the registers to start vector comparison loop
3184 bind(COMPARE_WIDE_VECTORS);
3185 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3186 lea(str1, Address(str1, result, scale));
3187 lea(str2, Address(str2, result, scale));
3188 } else {
3189 lea(str1, Address(str1, result, scale1));
3190 lea(str2, Address(str2, result, scale2));
3191 }
3192 subl(result, stride2);
3193 subl(cnt2, stride2);
3194 jcc(Assembler::zero, COMPARE_WIDE_TAIL);
3195 negptr(result);
3196
3197 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
3198 bind(COMPARE_WIDE_VECTORS_LOOP);
3199
3200#ifdef _LP641
3201 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
3202 cmpl(cnt2, stride2x2);
3203 jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3203)
;
3204 testl(cnt2, stride2x2-1); // cnt2 holds the vector count
3205 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3205)
; // means we cannot subtract by 0x40
3206
3207 bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
3208 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3209 evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
3210 evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
3211 } else {
3212 vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
3213 evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
3214 }
3215 kortestql(mask, mask);
3216 jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
3217 addptr(result, stride2x2); // update since we already compared at this addr
3218 subl(cnt2, stride2x2); // and sub the size too
3219 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3219)
;
3220
3221 vpxor(vec1, vec1);
3222 jmpb(COMPARE_WIDE_TAIL)jmpb_0(COMPARE_WIDE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3222)
;
3223 }//if (VM_Version::supports_avx512vlbw())
3224#endif // _LP64
3225
3226
3227 bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3228 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3229 vmovdqu(vec1, Address(str1, result, scale));
3230 vpxor(vec1, Address(str2, result, scale));
3231 } else {
3232 vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit);
3233 vpxor(vec1, Address(str2, result, scale2));
3234 }
3235 vptest(vec1, vec1);
3236 jcc(Assembler::notZero, VECTOR_NOT_EQUAL);
3237 addptr(result, stride2);
3238 subl(cnt2, stride2);
3239 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
3240 // clean upper bits of YMM registers
3241 vpxor(vec1, vec1);
3242
3243 // compare wide vectors tail
3244 bind(COMPARE_WIDE_TAIL);
3245 testptr(result, result);
3246 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3247
3248 movl(result, stride2);
3249 movl(cnt2, result);
3250 negptr(result);
3251 jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3252
3253 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
3254 bind(VECTOR_NOT_EQUAL);
3255 // clean upper bits of YMM registers
3256 vpxor(vec1, vec1);
3257 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3258 lea(str1, Address(str1, result, scale));
3259 lea(str2, Address(str2, result, scale));
3260 } else {
3261 lea(str1, Address(str1, result, scale1));
3262 lea(str2, Address(str2, result, scale2));
3263 }
3264 jmp(COMPARE_16_CHARS);
3265
3266 // Compare tail chars, length between 1 to 15 chars
3267 bind(COMPARE_TAIL_LONG);
3268 movl(cnt2, result);
3269 cmpl(cnt2, stride);
3270 jcc(Assembler::less, COMPARE_SMALL_STR);
3271
3272 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3273 movdqu(vec1, Address(str1, 0));
3274 } else {
3275 pmovzxbw(vec1, Address(str1, 0));
3276 }
3277 pcmpestri(vec1, Address(str2, 0), pcmpmask);
3278 jcc(Assembler::below, COMPARE_INDEX_CHAR);
3279 subptr(cnt2, stride);
3280 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3281 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3282 lea(str1, Address(str1, result, scale));
3283 lea(str2, Address(str2, result, scale));
3284 } else {
3285 lea(str1, Address(str1, result, scale1));
3286 lea(str2, Address(str2, result, scale2));
3287 }
3288 negptr(cnt2);
3289 jmpb(WHILE_HEAD_LABEL)jmpb_0(WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3289)
;
3290
3291 bind(COMPARE_SMALL_STR);
3292 } else if (UseSSE42Intrinsics) {
3293 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
3294 int pcmpmask = 0x19;
3295 // Setup to compare 8-char (16-byte) vectors,
3296 // start from first character again because it has aligned address.
3297 movl(result, cnt2);
3298 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
3299 if (ae == StrIntrinsicNode::LL) {
3300 pcmpmask &= ~0x01;
3301 }
3302 jcc(Assembler::zero, COMPARE_TAIL);
3303 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3304 lea(str1, Address(str1, result, scale));
3305 lea(str2, Address(str2, result, scale));
3306 } else {
3307 lea(str1, Address(str1, result, scale1));
3308 lea(str2, Address(str2, result, scale2));
3309 }
3310 negptr(result);
3311
3312 // pcmpestri
3313 // inputs:
3314 // vec1- substring
3315 // rax - negative string length (elements count)
3316 // mem - scanned string
3317 // rdx - string length (elements count)
3318 // pcmpmask - cmp mode: 11000 (string compare with negated result)
3319 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
3320 // outputs:
3321 // rcx - first mismatched element index
3322 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1
== rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3322, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx"
") failed", "pcmpestri"); ::breakpoint(); } } while (0)
;
3323
3324 bind(COMPARE_WIDE_VECTORS);
3325 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3326 movdqu(vec1, Address(str1, result, scale));
3327 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
3328 } else {
3329 pmovzxbw(vec1, Address(str1, result, scale1));
3330 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
3331 }
3332 // After pcmpestri cnt1(rcx) contains mismatched element index
3333
3334 jccb(Assembler::below, VECTOR_NOT_EQUAL)jccb_0(Assembler::below, VECTOR_NOT_EQUAL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3334)
; // CF==1
3335 addptr(result, stride);
3336 subptr(cnt2, stride);
3337 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3337)
;
3338
3339 // compare wide vectors tail
3340 testptr(result, result);
3341 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3342
3343 movl(cnt2, stride);
3344 movl(result, stride);
3345 negptr(result);
3346 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3347 movdqu(vec1, Address(str1, result, scale));
3348 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
3349 } else {
3350 pmovzxbw(vec1, Address(str1, result, scale1));
3351 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
3352 }
3353 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL)jccb_0(Assembler::aboveEqual, LENGTH_DIFF_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3353)
;
3354
3355 // Mismatched characters in the vectors
3356 bind(VECTOR_NOT_EQUAL);
3357 addptr(cnt1, result);
3358 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
3359 subl(result, cnt2);
3360 jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3360)
;
3361
3362 bind(COMPARE_TAIL); // limit is zero
3363 movl(cnt2, result);
3364 // Fallthru to tail compare
3365 }
3366 // Shift str2 and str1 to the end of the arrays, negate min
3367 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
3368 lea(str1, Address(str1, cnt2, scale));
3369 lea(str2, Address(str2, cnt2, scale));
3370 } else {
3371 lea(str1, Address(str1, cnt2, scale1));
3372 lea(str2, Address(str2, cnt2, scale2));
3373 }
3374 decrementl(cnt2); // first character was compared already
3375 negptr(cnt2);
3376
3377 // Compare the rest of the elements
3378 bind(WHILE_HEAD_LABEL);
3379 load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
3380 subl(result, cnt1);
3381 jccb(Assembler::notZero, POP_LABEL)jccb_0(Assembler::notZero, POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3381)
;
3382 increment(cnt2);
3383 jccb(Assembler::notZero, WHILE_HEAD_LABEL)jccb_0(Assembler::notZero, WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3383)
;
3384
3385 // Strings are equal up to min length. Return the length difference.
3386 bind(LENGTH_DIFF_LABEL);
3387 pop(result);
3388 if (ae == StrIntrinsicNode::UU) {
3389 // Divide diff by 2 to get number of chars
3390 sarl(result, 1);
3391 }
3392 jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3392)
;
3393
3394#ifdef _LP641
3395 if (VM_Version::supports_avx512vlbw()) {
3396
3397 bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
3398
3399 kmovql(cnt1, mask);
3400 notq(cnt1);
3401 bsfq(cnt2, cnt1);
3402 if (ae != StrIntrinsicNode::LL) {
3403 // Divide diff by 2 to get number of chars
3404 sarl(cnt2, 1);
3405 }
3406 addq(result, cnt2);
3407 if (ae == StrIntrinsicNode::LL) {
3408 load_unsigned_byte(cnt1, Address(str2, result));
3409 load_unsigned_byte(result, Address(str1, result));
3410 } else if (ae == StrIntrinsicNode::UU) {
3411 load_unsigned_short(cnt1, Address(str2, result, scale));
3412 load_unsigned_short(result, Address(str1, result, scale));
3413 } else {
3414 load_unsigned_short(cnt1, Address(str2, result, scale2));
3415 load_unsigned_byte(result, Address(str1, result, scale1));
3416 }
3417 subl(result, cnt1);
3418 jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3418)
;
3419 }//if (VM_Version::supports_avx512vlbw())
3420#endif // _LP64
3421
3422 // Discard the stored length difference
3423 bind(POP_LABEL);
3424 pop(cnt1);
3425
3426 // That's it
3427 bind(DONE_LABEL);
3428 if(ae == StrIntrinsicNode::UL) {
3429 negl(result);
3430 }
3431
3432}
3433
3434// Search for Non-ASCII character (Negative byte value) in a byte array,
3435// return true if it has any and false otherwise.
3436// ..\jdk\src\java.base\share\classes\java\lang\StringCoding.java
3437// @IntrinsicCandidate
3438// private static boolean hasNegatives(byte[] ba, int off, int len) {
3439// for (int i = off; i < off + len; i++) {
3440// if (ba[i] < 0) {
3441// return true;
3442// }
3443// }
3444// return false;
3445// }
3446void C2_MacroAssembler::has_negatives(Register ary1, Register len,
3447 Register result, Register tmp1,
3448 XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
3449 // rsi: byte array
3450 // rcx: len
3451 // rax: result
3452 ShortBranchVerifier sbv(this);
3453 assert_different_registers(ary1, len, result, tmp1);
3454 assert_different_registers(vec1, vec2);
3455 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
3456
3457 // len == 0
3458 testl(len, len);
3459 jcc(Assembler::zero, FALSE_LABEL);
3460
3461 if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512
3462 VM_Version::supports_avx512vlbw() &&
3463 VM_Version::supports_bmi2()) {
3464
3465 Label test_64_loop, test_tail;
3466 Register tmp3_aliased = len;
3467
3468 movl(tmp1, len);
3469 vpxor(vec2, vec2, vec2, Assembler::AVX_512bit);
3470
3471 andl(tmp1, 64 - 1); // tail count (in chars) 0x3F
3472 andl(len, ~(64 - 1)); // vector count (in chars)
3473 jccb(Assembler::zero, test_tail)jccb_0(Assembler::zero, test_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3473)
;
3474
3475 lea(ary1, Address(ary1, len, Address::times_1));
3476 negptr(len);
3477
3478 bind(test_64_loop);
3479 // Check whether our 64 elements of size byte contain negatives
3480 evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
3481 kortestql(mask1, mask1);
3482 jcc(Assembler::notZero, TRUE_LABEL);
3483
3484 addptr(len, 64);
3485 jccb(Assembler::notZero, test_64_loop)jccb_0(Assembler::notZero, test_64_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3485)
;
3486
3487
3488 bind(test_tail);
3489 // bail out when there is nothing to be done
3490 testl(tmp1, -1);
3491 jcc(Assembler::zero, FALSE_LABEL);
3492
3493 // ~(~0 << len) applied up to two times (for 32-bit scenario)
3494#ifdef _LP641
3495 mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
3496 shlxq(tmp3_aliased, tmp3_aliased, tmp1);
3497 notq(tmp3_aliased);
3498 kmovql(mask2, tmp3_aliased);
3499#else
3500 Label k_init;
3501 jmp(k_init);
3502
3503 // We could not read 64-bits from a general purpose register thus we move
3504 // data required to compose 64 1's to the instruction stream
3505 // We emit 64 byte wide series of elements from 0..63 which later on would
3506 // be used as a compare targets with tail count contained in tmp1 register.
3507 // Result would be a k register having tmp1 consecutive number or 1
3508 // counting from least significant bit.
3509 address tmp = pc();
3510 emit_int64(0x0706050403020100);
3511 emit_int64(0x0F0E0D0C0B0A0908);
3512 emit_int64(0x1716151413121110);
3513 emit_int64(0x1F1E1D1C1B1A1918);
3514 emit_int64(0x2726252423222120);
3515 emit_int64(0x2F2E2D2C2B2A2928);
3516 emit_int64(0x3736353433323130);
3517 emit_int64(0x3F3E3D3C3B3A3938);
3518
3519 bind(k_init);
3520 lea(len, InternalAddress(tmp));
3521 // create mask to test for negative byte inside a vector
3522 evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
3523 evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
3524
3525#endif
3526 evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
3527 ktestq(mask1, mask2);
3528 jcc(Assembler::notZero, TRUE_LABEL);
3529
3530 jmp(FALSE_LABEL);
3531 } else {
3532 movl(result, len); // copy
3533
3534 if (UseAVX >= 2 && UseSSE >= 2) {
3535 // With AVX2, use 32-byte vector compare
3536 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3537
3538 // Compare 32-byte vectors
3539 andl(result, 0x0000001f); // tail count (in bytes)
3540 andl(len, 0xffffffe0); // vector count (in bytes)
3541 jccb(Assembler::zero, COMPARE_TAIL)jccb_0(Assembler::zero, COMPARE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3541)
;
3542
3543 lea(ary1, Address(ary1, len, Address::times_1));
3544 negptr(len);
3545
3546 movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector
3547 movdl(vec2, tmp1);
3548 vpbroadcastd(vec2, vec2, Assembler::AVX_256bit);
3549
3550 bind(COMPARE_WIDE_VECTORS);
3551 vmovdqu(vec1, Address(ary1, len, Address::times_1));
3552 vptest(vec1, vec2);
3553 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3553)
;
3554 addptr(len, 32);
3555 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3556
3557 testl(result, result);
3558 jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3558)
;
3559
3560 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
3561 vptest(vec1, vec2);
3562 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3562)
;
3563 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3563)
;
3564
3565 bind(COMPARE_TAIL); // len is zero
3566 movl(len, result);
3567 // Fallthru to tail compare
3568 } else if (UseSSE42Intrinsics) {
3569 // With SSE4.2, use double quad vector compare
3570 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3571
3572 // Compare 16-byte vectors
3573 andl(result, 0x0000000f); // tail count (in bytes)
3574 andl(len, 0xfffffff0); // vector count (in bytes)
3575 jcc(Assembler::zero, COMPARE_TAIL);
3576
3577 lea(ary1, Address(ary1, len, Address::times_1));
3578 negptr(len);
3579
3580 movl(tmp1, 0x80808080);
3581 movdl(vec2, tmp1);
3582 pshufd(vec2, vec2, 0);
3583
3584 bind(COMPARE_WIDE_VECTORS);
3585 movdqu(vec1, Address(ary1, len, Address::times_1));
3586 ptest(vec1, vec2);
3587 jcc(Assembler::notZero, TRUE_LABEL);
3588 addptr(len, 16);
3589 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3590
3591 testl(result, result);
3592 jcc(Assembler::zero, FALSE_LABEL);
3593
3594 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
3595 ptest(vec1, vec2);
3596 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3596)
;
3597 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3597)
;
3598
3599 bind(COMPARE_TAIL); // len is zero
3600 movl(len, result);
3601 // Fallthru to tail compare
3602 }
3603 }
3604 // Compare 4-byte vectors
3605 andl(len, 0xfffffffc); // vector count (in bytes)
3606 jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3606)
;
3607
3608 lea(ary1, Address(ary1, len, Address::times_1));
3609 negptr(len);
3610
3611 bind(COMPARE_VECTORS);
3612 movl(tmp1, Address(ary1, len, Address::times_1));
3613 andl(tmp1, 0x80808080);
3614 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3614)
;
3615 addptr(len, 4);
3616 jcc(Assembler::notZero, COMPARE_VECTORS);
3617
3618 // Compare trailing char (final 2 bytes), if any
3619 bind(COMPARE_CHAR);
3620 testl(result, 0x2); // tail char
3621 jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3621)
;
3622 load_unsigned_short(tmp1, Address(ary1, 0));
3623 andl(tmp1, 0x00008080);
3624 jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3624)
;
3625 subptr(result, 2);
3626 lea(ary1, Address(ary1, 2));
3627
3628 bind(COMPARE_BYTE);
3629 testl(result, 0x1); // tail byte
3630 jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3630)
;
3631 load_unsigned_byte(tmp1, Address(ary1, 0));
3632 andl(tmp1, 0x00000080);
3633 jccb(Assembler::notEqual, TRUE_LABEL)jccb_0(Assembler::notEqual, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3633)
;
3634 jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3634)
;
3635
3636 bind(TRUE_LABEL);
3637 movl(result, 1); // return true
3638 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3638)
;
3639
3640 bind(FALSE_LABEL);
3641 xorl(result, result); // return false
3642
3643 // That's it
3644 bind(DONE);
3645 if (UseAVX >= 2 && UseSSE >= 2) {
3646 // clean upper bits of YMM registers
3647 vpxor(vec1, vec1);
3648 vpxor(vec2, vec2);
3649 }
3650}
3651// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
3652void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
3653 Register limit, Register result, Register chr,
3654 XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
3655 ShortBranchVerifier sbv(this);
3656 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
3657
3658 int length_offset = arrayOopDesc::length_offset_in_bytes();
3659 int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
3660
3661 if (is_array_equ) {
3662 // Check the input args
3663 cmpoop(ary1, ary2);
3664 jcc(Assembler::equal, TRUE_LABEL);
3665
3666 // Need additional checks for arrays_equals.
3667 testptr(ary1, ary1);
3668 jcc(Assembler::zero, FALSE_LABEL);
3669 testptr(ary2, ary2);
3670 jcc(Assembler::zero, FALSE_LABEL);
3671
3672 // Check the lengths
3673 movl(limit, Address(ary1, length_offset));
3674 cmpl(limit, Address(ary2, length_offset));
3675 jcc(Assembler::notEqual, FALSE_LABEL);
3676 }
3677
3678 // count == 0
3679 testl(limit, limit);
3680 jcc(Assembler::zero, TRUE_LABEL);
3681
3682 if (is_array_equ) {
3683 // Load array address
3684 lea(ary1, Address(ary1, base_offset));
3685 lea(ary2, Address(ary2, base_offset));
3686 }
3687
3688 if (is_array_equ && is_char) {
3689 // arrays_equals when used for char[].
3690 shll(limit, 1); // byte count != 0
3691 }
3692 movl(result, limit); // copy
3693
3694 if (UseAVX >= 2) {
3695 // With AVX2, use 32-byte vector compare
3696 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3697
3698 // Compare 32-byte vectors
3699 andl(result, 0x0000001f); // tail count (in bytes)
3700 andl(limit, 0xffffffe0); // vector count (in bytes)
3701 jcc(Assembler::zero, COMPARE_TAIL);
3702
3703 lea(ary1, Address(ary1, limit, Address::times_1));
3704 lea(ary2, Address(ary2, limit, Address::times_1));
3705 negptr(limit);
3706
3707#ifdef _LP641
3708 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
3709 Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3;
3710
3711 cmpl(limit, -64);
3712 jcc(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2);
3713
3714 bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
3715
3716 evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
3717 evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
3718 kortestql(mask, mask);
3719 jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
3720 addptr(limit, 64); // update since we already compared at this addr
3721 cmpl(limit, -64);
3722 jccb(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3722)
;
3723
3724 // At this point we may still need to compare -limit+result bytes.
3725 // We could execute the next two instruction and just continue via non-wide path:
3726 // cmpl(limit, 0);
3727 // jcc(Assembler::equal, COMPARE_TAIL); // true
3728 // But since we stopped at the points ary{1,2}+limit which are
3729 // not farther than 64 bytes from the ends of arrays ary{1,2}+result
3730 // (|limit| <= 32 and result < 32),
3731 // we may just compare the last 64 bytes.
3732 //
3733 addptr(result, -64); // it is safe, bc we just came from this area
3734 evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
3735 evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
3736 kortestql(mask, mask);
3737 jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
3738
3739 jmp(TRUE_LABEL);
3740
3741 bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
3742
3743 }//if (VM_Version::supports_avx512vlbw())
3744#endif //_LP64
3745 bind(COMPARE_WIDE_VECTORS);
3746 vmovdqu(vec1, Address(ary1, limit, Address::times_1));
3747 vmovdqu(vec2, Address(ary2, limit, Address::times_1));
3748 vpxor(vec1, vec2);
3749
3750 vptest(vec1, vec1);
3751 jcc(Assembler::notZero, FALSE_LABEL);
3752 addptr(limit, 32);
3753 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3754
3755 testl(result, result);
3756 jcc(Assembler::zero, TRUE_LABEL);
3757
3758 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
3759 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
3760 vpxor(vec1, vec2);
3761
3762 vptest(vec1, vec1);
3763 jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3763)
;
3764 jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3764)
;
3765
3766 bind(COMPARE_TAIL); // limit is zero
3767 movl(limit, result);
3768 // Fallthru to tail compare
3769 } else if (UseSSE42Intrinsics) {
3770 // With SSE4.2, use double quad vector compare
3771 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
3772
3773 // Compare 16-byte vectors
3774 andl(result, 0x0000000f); // tail count (in bytes)
3775 andl(limit, 0xfffffff0); // vector count (in bytes)
3776 jcc(Assembler::zero, COMPARE_TAIL);
3777
3778 lea(ary1, Address(ary1, limit, Address::times_1));
3779 lea(ary2, Address(ary2, limit, Address::times_1));
3780 negptr(limit);
3781
3782 bind(COMPARE_WIDE_VECTORS);
3783 movdqu(vec1, Address(ary1, limit, Address::times_1));
3784 movdqu(vec2, Address(ary2, limit, Address::times_1));
3785 pxor(vec1, vec2);
3786
3787 ptest(vec1, vec1);
3788 jcc(Assembler::notZero, FALSE_LABEL);
3789 addptr(limit, 16);
3790 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
3791
3792 testl(result, result);
3793 jcc(Assembler::zero, TRUE_LABEL);
3794
3795 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
3796 movdqu(vec2, Address(ary2, result, Address::times_1, -16));
3797 pxor(vec1, vec2);
3798
3799 ptest(vec1, vec1);
3800 jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3800)
;
3801 jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3801)
;
3802
3803 bind(COMPARE_TAIL); // limit is zero
3804 movl(limit, result);
3805 // Fallthru to tail compare
3806 }
3807
3808 // Compare 4-byte vectors
3809 andl(limit, 0xfffffffc); // vector count (in bytes)
3810 jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3810)
;
3811
3812 lea(ary1, Address(ary1, limit, Address::times_1));
3813 lea(ary2, Address(ary2, limit, Address::times_1));
3814 negptr(limit);
3815
3816 bind(COMPARE_VECTORS);
3817 movl(chr, Address(ary1, limit, Address::times_1));
3818 cmpl(chr, Address(ary2, limit, Address::times_1));
3819 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3819)
;
3820 addptr(limit, 4);
3821 jcc(Assembler::notZero, COMPARE_VECTORS);
3822
3823 // Compare trailing char (final 2 bytes), if any
3824 bind(COMPARE_CHAR);
3825 testl(result, 0x2); // tail char
3826 jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3826)
;
3827 load_unsigned_short(chr, Address(ary1, 0));
3828 load_unsigned_short(limit, Address(ary2, 0));
3829 cmpl(chr, limit);
3830 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3830)
;
3831
3832 if (is_array_equ && is_char) {
3833 bind(COMPARE_BYTE);
3834 } else {
3835 lea(ary1, Address(ary1, 2));
3836 lea(ary2, Address(ary2, 2));
3837
3838 bind(COMPARE_BYTE);
3839 testl(result, 0x1); // tail byte
3840 jccb(Assembler::zero, TRUE_LABEL)jccb_0(Assembler::zero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3840)
;
3841 load_unsigned_byte(chr, Address(ary1, 0));
3842 load_unsigned_byte(limit, Address(ary2, 0));
3843 cmpl(chr, limit);
3844 jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3844)
;
3845 }
3846 bind(TRUE_LABEL);
3847 movl(result, 1); // return true
3848 jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3848)
;
3849
3850 bind(FALSE_LABEL);
3851 xorl(result, result); // return false
3852
3853 // That's it
3854 bind(DONE);
3855 if (UseAVX >= 2) {
3856 // clean upper bits of YMM registers
3857 vpxor(vec1, vec1);
3858 vpxor(vec2, vec2);
3859 }
3860}
3861
3862void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3863 XMMRegister src1, int imm8, bool merge, int vlen_enc) {
3864 switch(ideal_opc) {
3865 case Op_LShiftVS:
3866 Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break;
3867 case Op_LShiftVI:
3868 Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break;
3869 case Op_LShiftVL:
3870 Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break;
3871 case Op_RShiftVS:
3872 Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break;
3873 case Op_RShiftVI:
3874 Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break;
3875 case Op_RShiftVL:
3876 Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break;
3877 case Op_URShiftVS:
3878 Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break;
3879 case Op_URShiftVI:
3880 Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break;
3881 case Op_URShiftVL:
3882 Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break;
3883 case Op_RotateRightV:
3884 evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3885 case Op_RotateLeftV:
3886 evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3887 default:
3888 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3888, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
3889 }
3890}
3891
3892void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3893 XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc,
3894 bool is_varshift) {
3895 switch (ideal_opc) {
3896 case Op_AddVB:
3897 evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3898 case Op_AddVS:
3899 evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3900 case Op_AddVI:
3901 evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3902 case Op_AddVL:
3903 evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
3904 case Op_AddVF:
3905 evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
3906 case Op_AddVD:
3907 evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
3908 case Op_SubVB:
3909 evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
3910 case Op_SubVS:
3911 evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
3912 case Op_SubVI:
3913 evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
3914 case Op_SubVL:
3915 evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
3916 case Op_SubVF:
3917 evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
3918 case Op_SubVD:
3919 evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
3920 case Op_MulVS:
3921 evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
3922 case Op_MulVI:
3923 evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
3924 case Op_MulVL:
3925 evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
3926 case Op_MulVF:
3927 evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
3928 case Op_MulVD:
3929 evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
3930 case Op_DivVF:
3931 evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
3932 case Op_DivVD:
3933 evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
3934 case Op_SqrtVF:
3935 evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break;
3936 case Op_SqrtVD:
3937 evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break;
3938 case Op_AbsVB:
3939 evpabsb(dst, mask, src2, merge, vlen_enc); break;
3940 case Op_AbsVS:
3941 evpabsw(dst, mask, src2, merge, vlen_enc); break;
3942 case Op_AbsVI:
3943 evpabsd(dst, mask, src2, merge, vlen_enc); break;
3944 case Op_AbsVL:
3945 evpabsq(dst, mask, src2, merge, vlen_enc); break;
3946 case Op_FmaVF:
3947 evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
3948 case Op_FmaVD:
3949 evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
3950 case Op_VectorRearrange:
3951 evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break;
3952 case Op_LShiftVS:
3953 evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3954 case Op_LShiftVI:
3955 evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3956 case Op_LShiftVL:
3957 evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3958 case Op_RShiftVS:
3959 evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3960 case Op_RShiftVI:
3961 evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3962 case Op_RShiftVL:
3963 evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3964 case Op_URShiftVS:
3965 evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3966 case Op_URShiftVI:
3967 evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3968 case Op_URShiftVL:
3969 evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3970 case Op_RotateLeftV:
3971 evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3972 case Op_RotateRightV:
3973 evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3974 case Op_MaxV:
3975 evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3976 case Op_MinV:
3977 evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3978 case Op_XorV:
3979 evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3980 case Op_OrV:
3981 evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3982 case Op_AndV:
3983 evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3984 default:
3985 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 3985, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
3986 }
3987}
3988
3989void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3990 XMMRegister src1, Address src2, bool merge, int vlen_enc) {
3991 switch (ideal_opc) {
3992 case Op_AddVB:
3993 evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3994 case Op_AddVS:
3995 evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3996 case Op_AddVI:
3997 evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3998 case Op_AddVL:
3999 evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
4000 case Op_AddVF:
4001 evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
4002 case Op_AddVD:
4003 evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
4004 case Op_SubVB:
4005 evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
4006 case Op_SubVS:
4007 evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
4008 case Op_SubVI:
4009 evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
4010 case Op_SubVL:
4011 evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
4012 case Op_SubVF:
4013 evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
4014 case Op_SubVD:
4015 evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
4016 case Op_MulVS:
4017 evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
4018 case Op_MulVI:
4019 evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
4020 case Op_MulVL:
4021 evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
4022 case Op_MulVF:
4023 evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
4024 case Op_MulVD:
4025 evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
4026 case Op_DivVF:
4027 evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
4028 case Op_DivVD:
4029 evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
4030 case Op_FmaVF:
4031 evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
4032 case Op_FmaVD:
4033 evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
4034 case Op_MaxV:
4035 evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4036 case Op_MinV:
4037 evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4038 case Op_XorV:
4039 evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4040 case Op_OrV:
4041 evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4042 case Op_AndV:
4043 evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4044 default:
4045 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4045, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
4046 }
4047}
4048
4049void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
4050 KRegister src1, KRegister src2) {
4051 BasicType etype = T_ILLEGAL;
4052 switch(mask_len) {
4053 case 2:
4054 case 4:
4055 case 8: etype = T_BYTE; break;
4056 case 16: etype = T_SHORT; break;
4057 case 32: etype = T_INT; break;
4058 case 64: etype = T_LONG; break;
4059 default: fatal("Unsupported type")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4059, "Unsupported type"); ::breakpoint(); } while (0)
; break;
4060 }
4061 assert(etype != T_ILLEGAL, "")do { if (!(etype != T_ILLEGAL)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4061, "assert(" "etype != T_ILLEGAL" ") failed", ""); ::breakpoint
(); } } while (0)
;
4062 switch(ideal_opc) {
4063 case Op_AndVMask:
4064 kand(etype, dst, src1, src2); break;
4065 case Op_OrVMask:
4066 kor(etype, dst, src1, src2); break;
4067 case Op_XorVMask:
4068 kxor(etype, dst, src1, src2); break;
4069 default:
4070 fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4070, "Unsupported masked operation"); ::breakpoint(); } while
(0)
; break;
4071 }
4072}
4073
4074/*
4075 * Algorithm for vector D2L and F2I conversions:-
4076 * a) Perform vector D2L/F2I cast.
4077 * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
4078 * It signifies that source value could be any of the special floating point
4079 * values(NaN,-Inf,Inf,Max,-Min).
4080 * c) Set destination to zero if source is NaN value.
4081 * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
4082 */
4083
4084void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
4085 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
4086 Register scratch, int vec_enc) {
4087 Label done;
4088 evcvttpd2qq(dst, src, vec_enc);
4089 evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
4090 evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
4091 kortestwl(ktmp1, ktmp1);
4092 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4092)
;
4093
4094 vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
4095 evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
4096 evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
4097
4098 kxorwl(ktmp1, ktmp1, ktmp2);
4099 evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
4100 vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
4101 evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
4102 bind(done);
4103}
4104
4105void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
4106 XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
4107 AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
4108 Label done;
4109 vcvttps2dq(dst, src, vec_enc);
4110 vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
4111 vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
4112 vptest(xtmp2, xtmp2, vec_enc);
4113 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4113)
;
4114
4115 vpcmpeqd(xtmp4, xtmp4, xtmp4, vec_enc);
4116 vpxor(xtmp1, xtmp1, xtmp4, vec_enc);
4117
4118 vpxor(xtmp4, xtmp4, xtmp4, vec_enc);
4119 vcmpps(xtmp3, src, src, Assembler::UNORD_Q, vec_enc);
4120 vblendvps(dst, dst, xtmp4, xtmp3, vec_enc);
4121
4122 // Recompute the mask for remaining special value.
4123 vpxor(xtmp2, xtmp2, xtmp3, vec_enc);
4124 // Extract SRC values corresponding to TRUE mask lanes.
4125 vpand(xtmp4, xtmp2, src, vec_enc);
4126 // Flip mask bits so that MSB bit of MASK lanes corresponding to +ve special
4127 // values are set.
4128 vpxor(xtmp3, xtmp2, xtmp4, vec_enc);
4129
4130 vblendvps(dst, dst, xtmp1, xtmp3, vec_enc);
4131 bind(done);
4132}
4133
4134void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
4135 KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
4136 Register scratch, int vec_enc) {
4137 Label done;
4138 vcvttps2dq(dst, src, vec_enc);
4139 evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
4140 Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
4141 kortestwl(ktmp1, ktmp1);
4142 jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4142)
;
4143
4144 vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
4145 evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
4146 evmovdqul(dst, ktmp2, xtmp2, true, vec_enc);
4147
4148 kxorwl(ktmp1, ktmp1, ktmp2);
4149 evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
4150 vpternlogd(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
4151 evmovdqul(dst, ktmp1, xtmp2, true, vec_enc);
4152 bind(done);
4153}
4154
4155#ifdef _LP641
4156void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
4157 Register rtmp2, XMMRegister xtmp, int mask_len,
4158 int vec_enc) {
4159 int index = 0;
4160 int vindex = 0;
4161 mov64(rtmp1, 0x0101010101010101L);
4162 pdep(rtmp1, src, rtmp1);
4163 if (mask_len > 8) {
4164 movq(rtmp2, src);
4165 vpxor(xtmp, xtmp, xtmp, vec_enc);
4166 movq(xtmp, rtmp1);
4167 }
4168 movq(dst, rtmp1);
4169
4170 mask_len -= 8;
4171 while (mask_len > 0) {
4172 assert ((mask_len & 0x7) == 0, "mask must be multiple of 8")do { if (!((mask_len & 0x7) == 0)) { (*g_assert_poison) =
'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4172, "assert(" "(mask_len & 0x7) == 0" ") failed", "mask must be multiple of 8"
); ::breakpoint(); } } while (0)
;
4173 index++;
4174 if ((index % 2) == 0) {
4175 pxor(xtmp, xtmp);
4176 }
4177 mov64(rtmp1, 0x0101010101010101L);
4178 shrq(rtmp2, 8);
4179 pdep(rtmp1, rtmp2, rtmp1);
4180 pinsrq(xtmp, rtmp1, index % 2);
4181 vindex = index / 2;
4182 if (vindex) {
4183 // Write entire 16 byte vector when both 64 bit
4184 // lanes are update to save redundant instructions.
4185 if (index % 2) {
4186 vinsertf128(dst, dst, xtmp, vindex);
4187 }
4188 } else {
4189 vmovdqu(dst, xtmp);
4190 }
4191 mask_len -= 8;
4192 }
4193}
4194
4195void C2_MacroAssembler::vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen) {
4196 switch(opc) {
4197 case Op_VectorMaskTrueCount:
4198 popcntq(dst, tmp);
4199 break;
4200 case Op_VectorMaskLastTrue:
4201 if (VM_Version::supports_lzcnt()) {
4202 lzcntq(tmp, tmp);
4203 movl(dst, 63);
4204 subl(dst, tmp);
4205 } else {
4206 movl(dst, -1);
4207 bsrq(tmp, tmp);
4208 cmov32(Assembler::notZero, dst, tmp);
4209 }
4210 break;
4211 case Op_VectorMaskFirstTrue:
4212 if (VM_Version::supports_bmi1()) {
4213 if (masklen < 32) {
4214 orl(tmp, 1 << masklen);
4215 tzcntl(dst, tmp);
4216 } else if (masklen == 32) {
4217 tzcntl(dst, tmp);
4218 } else {
4219 assert(masklen == 64, "")do { if (!(masklen == 64)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4219, "assert(" "masklen == 64" ") failed", ""); ::breakpoint
(); } } while (0)
;
4220 tzcntq(dst, tmp);
4221 }
4222 } else {
4223 if (masklen < 32) {
4224 orl(tmp, 1 << masklen);
4225 bsfl(dst, tmp);
4226 } else {
4227 assert(masklen == 32 || masklen == 64, "")do { if (!(masklen == 32 || masklen == 64)) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4227, "assert(" "masklen == 32 || masklen == 64" ") failed"
, ""); ::breakpoint(); } } while (0)
;
4228 movl(dst, masklen);
4229 if (masklen == 32) {
4230 bsfl(tmp, tmp);
4231 } else {
4232 bsfq(tmp, tmp);
4233 }
4234 cmov32(Assembler::notZero, dst, tmp);
4235 }
4236 }
4237 break;
4238 case Op_VectorMaskToLong:
4239 assert(dst == tmp, "Dst and tmp should be the same for toLong operations")do { if (!(dst == tmp)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4239, "assert(" "dst == tmp" ") failed", "Dst and tmp should be the same for toLong operations"
); ::breakpoint(); } } while (0)
;
4240 break;
4241 default: assert(false, "Unhandled mask operation")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4241, "assert(" "false" ") failed", "Unhandled mask operation"
); ::breakpoint(); } } while (0)
;
4242 }
4243}
4244
4245void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp,
4246 int masklen, int masksize, int vec_enc) {
4247 assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4247, "assert(" "VM_Version::supports_popcnt()" ") failed",
""); ::breakpoint(); } } while (0)
;
4248
4249 if(VM_Version::supports_avx512bw()) {
4250 kmovql(tmp, mask);
4251 } else {
4252 assert(masklen <= 16, "")do { if (!(masklen <= 16)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4252, "assert(" "masklen <= 16" ") failed", ""); ::breakpoint
(); } } while (0)
;
4253 kmovwl(tmp, mask);
4254 }
4255
4256 // Mask generated out of partial vector comparisons/replicate/mask manipulation
4257 // operations needs to be clipped.
4258 if (masksize < 16 && opc != Op_VectorMaskFirstTrue) {
4259 andq(tmp, (1 << masklen) - 1);
4260 }
4261
4262 vector_mask_operation_helper(opc, dst, tmp, masklen);
4263}
4264
4265void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
4266 Register tmp, int masklen, BasicType bt, int vec_enc) {
4267 assert(vec_enc == AVX_128bit && VM_Version::supports_avx() ||do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx
() || vec_enc == AVX_256bit && (VM_Version::supports_avx2
() || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)"
") failed", ""); ::breakpoint(); } } while (0)
4268 vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4), "")do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx
() || vec_enc == AVX_256bit && (VM_Version::supports_avx2
() || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X'
;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)"
") failed", ""); ::breakpoint(); } } while (0)
;
4269 assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison
) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4269, "assert(" "VM_Version::supports_popcnt()" ") failed",
""); ::breakpoint(); } } while (0)
;
4270
4271 bool need_clip = false;
4272 switch(bt) {
4273 case T_BOOLEAN:
4274 // While masks of other types contain 0, -1; boolean masks contain lane values of 0, 1
4275 vpxor(xtmp, xtmp, xtmp, vec_enc);
4276 vpsubb(xtmp, xtmp, mask, vec_enc);
4277 vpmovmskb(tmp, xtmp, vec_enc);
4278 need_clip = masklen < 16;
4279 break;
4280 case T_BYTE:
4281 vpmovmskb(tmp, mask, vec_enc);
4282 need_clip = masklen < 16;
4283 break;
4284 case T_SHORT:
4285 vpacksswb(xtmp, mask, mask, vec_enc);
4286 if (masklen >= 16) {
4287 vpermpd(xtmp, xtmp, 8, vec_enc);
4288 }
4289 vpmovmskb(tmp, xtmp, Assembler::AVX_128bit);
4290 need_clip = masklen < 16;
4291 break;
4292 case T_INT:
4293 case T_FLOAT:
4294 vmovmskps(tmp, mask, vec_enc);
4295 need_clip = masklen < 4;
4296 break;
4297 case T_LONG:
4298 case T_DOUBLE:
4299 vmovmskpd(tmp, mask, vec_enc);
4300 need_clip = masklen < 2;
4301 break;
4302 default: assert(false, "Unhandled type, %s", type2name(bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error
("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp"
, 4302, "assert(" "false" ") failed", "Unhandled type, %s", type2name
(bt)); ::breakpoint(); } } while (0)
;
4303 }
4304
4305 // Mask generated out of partial vector comparisons/replicate/mask manipulation
4306 // operations needs to be clipped.
4307 if (need_clip && opc != Op_VectorMaskFirstTrue) {
4308 // need_clip implies masklen < 32
4309 andq(tmp, (1 << masklen) - 1);
4310 }
4311
4312 vector_mask_operation_helper(opc, dst, tmp, masklen);
4313}
4314#endif