File: | jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp |
Warning: | line 2072, column 7 Value stored to 'eindex' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #include "precompiled.hpp" |
26 | #include "asm/assembler.hpp" |
27 | #include "asm/assembler.inline.hpp" |
28 | #include "oops/methodData.hpp" |
29 | #include "opto/c2_MacroAssembler.hpp" |
30 | #include "opto/intrinsicnode.hpp" |
31 | #include "opto/opcodes.hpp" |
32 | #include "opto/subnode.hpp" |
33 | #include "runtime/objectMonitor.hpp" |
34 | #include "runtime/stubRoutines.hpp" |
35 | |
36 | inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) { |
37 | switch (vlen_in_bytes) { |
38 | case 4: // fall-through |
39 | case 8: // fall-through |
40 | case 16: return Assembler::AVX_128bit; |
41 | case 32: return Assembler::AVX_256bit; |
42 | case 64: return Assembler::AVX_512bit; |
43 | |
44 | default: { |
45 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 45); ::breakpoint(); } while (0); |
46 | return Assembler::AVX_NoVec; |
47 | } |
48 | } |
49 | } |
50 | |
51 | void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) { |
52 | guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 52, "guarantee(" "PostLoopMultiversioning" ") failed", "must be" ); ::breakpoint(); } } while (0); |
53 | Assembler::movl(dst, 1); |
54 | Assembler::shlxl(dst, dst, src); |
55 | Assembler::decl(dst); |
56 | Assembler::kmovdl(mask, dst); |
57 | Assembler::movl(dst, src); |
58 | } |
59 | |
60 | void C2_MacroAssembler::restorevectmask(KRegister mask) { |
61 | guarantee(PostLoopMultiversioning, "must be")do { if (!(PostLoopMultiversioning)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 61, "guarantee(" "PostLoopMultiversioning" ") failed", "must be" ); ::breakpoint(); } } while (0); |
62 | Assembler::knotwl(mask, k0); |
63 | } |
64 | |
65 | #if INCLUDE_RTM_OPT1 |
66 | |
67 | // Update rtm_counters based on abort status |
68 | // input: abort_status |
69 | // rtm_counters (RTMLockingCounters*) |
70 | // flags are killed |
71 | void C2_MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) { |
72 | |
73 | atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset())); |
74 | if (PrintPreciseRTMLockingStatistics) { |
75 | for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { |
76 | Label check_abort; |
77 | testl(abort_status, (1<<i)); |
78 | jccb(Assembler::equal, check_abort)jccb_0(Assembler::equal, check_abort, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 78); |
79 | atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx)))); |
80 | bind(check_abort); |
81 | } |
82 | } |
83 | } |
84 | |
85 | // Branch if (random & (count-1) != 0), count is 2^n |
86 | // tmp, scr and flags are killed |
87 | void C2_MacroAssembler::branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel) { |
88 | assert(tmp == rax, "")do { if (!(tmp == rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 88, "assert(" "tmp == rax" ") failed", ""); ::breakpoint(); } } while (0); |
89 | assert(scr == rdx, "")do { if (!(scr == rdx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 89, "assert(" "scr == rdx" ") failed", ""); ::breakpoint(); } } while (0); |
90 | rdtsc(); // modifies EDX:EAX |
91 | andptr(tmp, count-1); |
92 | jccb(Assembler::notZero, brLabel)jccb_0(Assembler::notZero, brLabel, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 92); |
93 | } |
94 | |
95 | // Perform abort ratio calculation, set no_rtm bit if high ratio |
96 | // input: rtm_counters_Reg (RTMLockingCounters* address) |
97 | // tmpReg, rtm_counters_Reg and flags are killed |
98 | void C2_MacroAssembler::rtm_abort_ratio_calculation(Register tmpReg, |
99 | Register rtm_counters_Reg, |
100 | RTMLockingCounters* rtm_counters, |
101 | Metadata* method_data) { |
102 | Label L_done, L_check_always_rtm1, L_check_always_rtm2; |
103 | |
104 | if (RTMLockingCalculationDelay > 0) { |
105 | // Delay calculation |
106 | movptr(tmpReg, ExternalAddress((address) RTMLockingCounters::rtm_calculation_flag_addr()), tmpReg); |
107 | testptr(tmpReg, tmpReg); |
108 | jccb(Assembler::equal, L_done)jccb_0(Assembler::equal, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 108); |
109 | } |
110 | // Abort ratio calculation only if abort_count > RTMAbortThreshold |
111 | // Aborted transactions = abort_count * 100 |
112 | // All transactions = total_count * RTMTotalCountIncrRate |
113 | // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) |
114 | |
115 | movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset())); |
116 | cmpptr(tmpReg, RTMAbortThreshold); |
117 | jccb(Assembler::below, L_check_always_rtm2)jccb_0(Assembler::below, L_check_always_rtm2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 117); |
118 | imulptr(tmpReg, tmpReg, 100); |
119 | |
120 | Register scrReg = rtm_counters_Reg; |
121 | movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); |
122 | imulptr(scrReg, scrReg, RTMTotalCountIncrRate); |
123 | imulptr(scrReg, scrReg, RTMAbortRatio); |
124 | cmpptr(tmpReg, scrReg); |
125 | jccb(Assembler::below, L_check_always_rtm1)jccb_0(Assembler::below, L_check_always_rtm1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 125); |
126 | if (method_data != NULL__null) { |
127 | // set rtm_state to "no rtm" in MDO |
128 | mov_metadata(tmpReg, method_data); |
129 | lock(); |
130 | orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM); |
131 | } |
132 | jmpb(L_done)jmpb_0(L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 132); |
133 | bind(L_check_always_rtm1); |
134 | // Reload RTMLockingCounters* address |
135 | lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); |
136 | bind(L_check_always_rtm2); |
137 | movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset())); |
138 | cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); |
139 | jccb(Assembler::below, L_done)jccb_0(Assembler::below, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 139); |
140 | if (method_data != NULL__null) { |
141 | // set rtm_state to "always rtm" in MDO |
142 | mov_metadata(tmpReg, method_data); |
143 | lock(); |
144 | orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM); |
145 | } |
146 | bind(L_done); |
147 | } |
148 | |
149 | // Update counters and perform abort ratio calculation |
150 | // input: abort_status_Reg |
151 | // rtm_counters_Reg, flags are killed |
152 | void C2_MacroAssembler::rtm_profiling(Register abort_status_Reg, |
153 | Register rtm_counters_Reg, |
154 | RTMLockingCounters* rtm_counters, |
155 | Metadata* method_data, |
156 | bool profile_rtm) { |
157 | |
158 | assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 158, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM" ); ::breakpoint(); } } while (0); |
159 | // update rtm counters based on rax value at abort |
160 | // reads abort_status_Reg, updates flags |
161 | lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters)); |
162 | rtm_counters_update(abort_status_Reg, rtm_counters_Reg); |
163 | if (profile_rtm) { |
164 | // Save abort status because abort_status_Reg is used by following code. |
165 | if (RTMRetryCount > 0) { |
166 | push(abort_status_Reg); |
167 | } |
168 | assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 168, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM" ); ::breakpoint(); } } while (0); |
169 | rtm_abort_ratio_calculation(abort_status_Reg, rtm_counters_Reg, rtm_counters, method_data); |
170 | // restore abort status |
171 | if (RTMRetryCount > 0) { |
172 | pop(abort_status_Reg); |
173 | } |
174 | } |
175 | } |
176 | |
177 | // Retry on abort if abort's status is 0x6: can retry (0x2) | memory conflict (0x4) |
178 | // inputs: retry_count_Reg |
179 | // : abort_status_Reg |
180 | // output: retry_count_Reg decremented by 1 |
181 | // flags are killed |
182 | void C2_MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, Label& retryLabel) { |
183 | Label doneRetry; |
184 | assert(abort_status_Reg == rax, "")do { if (!(abort_status_Reg == rax)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 184, "assert(" "abort_status_Reg == rax" ") failed", ""); :: breakpoint(); } } while (0); |
185 | // The abort reason bits are in eax (see all states in rtmLocking.hpp) |
186 | // 0x6 = conflict on which we can retry (0x2) | memory conflict (0x4) |
187 | // if reason is in 0x6 and retry count != 0 then retry |
188 | andptr(abort_status_Reg, 0x6); |
189 | jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 189); |
190 | testl(retry_count_Reg, retry_count_Reg); |
191 | jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 191); |
192 | pause(); |
193 | decrementl(retry_count_Reg); |
194 | jmp(retryLabel); |
195 | bind(doneRetry); |
196 | } |
197 | |
198 | // Spin and retry if lock is busy, |
199 | // inputs: box_Reg (monitor address) |
200 | // : retry_count_Reg |
201 | // output: retry_count_Reg decremented by 1 |
202 | // : clear z flag if retry count exceeded |
203 | // tmp_Reg, scr_Reg, flags are killed |
204 | void C2_MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register box_Reg, |
205 | Register tmp_Reg, Register scr_Reg, Label& retryLabel) { |
206 | Label SpinLoop, SpinExit, doneRetry; |
207 | int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ); |
208 | |
209 | testl(retry_count_Reg, retry_count_Reg); |
210 | jccb(Assembler::zero, doneRetry)jccb_0(Assembler::zero, doneRetry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 210); |
211 | decrementl(retry_count_Reg); |
212 | movptr(scr_Reg, RTMSpinLoopCount); |
213 | |
214 | bind(SpinLoop); |
215 | pause(); |
216 | decrementl(scr_Reg); |
217 | jccb(Assembler::lessEqual, SpinExit)jccb_0(Assembler::lessEqual, SpinExit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 217); |
218 | movptr(tmp_Reg, Address(box_Reg, owner_offset)); |
219 | testptr(tmp_Reg, tmp_Reg); |
220 | jccb(Assembler::notZero, SpinLoop)jccb_0(Assembler::notZero, SpinLoop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 220); |
221 | |
222 | bind(SpinExit); |
223 | jmp(retryLabel); |
224 | bind(doneRetry); |
225 | incrementl(retry_count_Reg); // clear z flag |
226 | } |
227 | |
228 | // Use RTM for normal stack locks |
229 | // Input: objReg (object to lock) |
230 | void C2_MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg, |
231 | Register retry_on_abort_count_Reg, |
232 | RTMLockingCounters* stack_rtm_counters, |
233 | Metadata* method_data, bool profile_rtm, |
234 | Label& DONE_LABEL, Label& IsInflated) { |
235 | assert(UseRTMForStackLocks, "why call this otherwise?")do { if (!(UseRTMForStackLocks)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 235, "assert(" "UseRTMForStackLocks" ") failed", "why call this otherwise?" ); ::breakpoint(); } } while (0); |
236 | assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 236, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint (); } } while (0); |
237 | assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 237, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint (); } } while (0); |
238 | Label L_rtm_retry, L_decrement_retry, L_on_abort; |
239 | |
240 | if (RTMRetryCount > 0) { |
241 | movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort |
242 | bind(L_rtm_retry); |
243 | } |
244 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); |
245 | testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral |
246 | jcc(Assembler::notZero, IsInflated); |
247 | |
248 | if (PrintPreciseRTMLockingStatistics || profile_rtm) { |
249 | Label L_noincrement; |
250 | if (RTMTotalCountIncrRate > 1) { |
251 | // tmpReg, scrReg and flags are killed |
252 | branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement); |
253 | } |
254 | assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(stack_rtm_counters != __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 254, "assert(" "stack_rtm_counters != __null" ") failed", "should not be NULL when profiling RTM" ); ::breakpoint(); } } while (0); |
255 | atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg); |
256 | bind(L_noincrement); |
257 | } |
258 | xbegin(L_on_abort); |
259 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword |
260 | andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits |
261 | cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked |
262 | jcc(Assembler::equal, DONE_LABEL); // all done if unlocked |
263 | |
264 | Register abort_status_Reg = tmpReg; // status of abort is stored in RAX |
265 | if (UseRTMXendForLockBusy) { |
266 | xend(); |
267 | movptr(abort_status_Reg, 0x2); // Set the abort status to 2 (so we can retry) |
268 | jmp(L_decrement_retry); |
269 | } |
270 | else { |
271 | xabort(0); |
272 | } |
273 | bind(L_on_abort); |
274 | if (PrintPreciseRTMLockingStatistics || profile_rtm) { |
275 | rtm_profiling(abort_status_Reg, scrReg, stack_rtm_counters, method_data, profile_rtm); |
276 | } |
277 | bind(L_decrement_retry); |
278 | if (RTMRetryCount > 0) { |
279 | // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) |
280 | rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); |
281 | } |
282 | } |
283 | |
284 | // Use RTM for inflating locks |
285 | // inputs: objReg (object to lock) |
286 | // boxReg (on-stack box address (displaced header location) - KILLED) |
287 | // tmpReg (ObjectMonitor address + markWord::monitor_value) |
288 | void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg, |
289 | Register scrReg, Register retry_on_busy_count_Reg, |
290 | Register retry_on_abort_count_Reg, |
291 | RTMLockingCounters* rtm_counters, |
292 | Metadata* method_data, bool profile_rtm, |
293 | Label& DONE_LABEL) { |
294 | assert(UseRTMLocking, "why call this otherwise?")do { if (!(UseRTMLocking)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 294, "assert(" "UseRTMLocking" ") failed", "why call this otherwise?" ); ::breakpoint(); } } while (0); |
295 | assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 295, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint (); } } while (0); |
296 | assert(scrReg == rdx, "")do { if (!(scrReg == rdx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 296, "assert(" "scrReg == rdx" ") failed", ""); ::breakpoint (); } } while (0); |
297 | Label L_rtm_retry, L_decrement_retry, L_on_abort; |
298 | int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ); |
299 | |
300 | // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
301 | movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
302 | movptr(boxReg, tmpReg); // Save ObjectMonitor address |
303 | |
304 | if (RTMRetryCount > 0) { |
305 | movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy |
306 | movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort |
307 | bind(L_rtm_retry); |
308 | } |
309 | if (PrintPreciseRTMLockingStatistics || profile_rtm) { |
310 | Label L_noincrement; |
311 | if (RTMTotalCountIncrRate > 1) { |
312 | // tmpReg, scrReg and flags are killed |
313 | branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement); |
314 | } |
315 | assert(rtm_counters != NULL, "should not be NULL when profiling RTM")do { if (!(rtm_counters != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 315, "assert(" "rtm_counters != __null" ") failed", "should not be NULL when profiling RTM" ); ::breakpoint(); } } while (0); |
316 | atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg); |
317 | bind(L_noincrement); |
318 | } |
319 | xbegin(L_on_abort); |
320 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); |
321 | movptr(tmpReg, Address(tmpReg, owner_offset)); |
322 | testptr(tmpReg, tmpReg); |
323 | jcc(Assembler::zero, DONE_LABEL); |
324 | if (UseRTMXendForLockBusy) { |
325 | xend(); |
326 | jmp(L_decrement_retry); |
327 | } |
328 | else { |
329 | xabort(0); |
330 | } |
331 | bind(L_on_abort); |
332 | Register abort_status_Reg = tmpReg; // status of abort is stored in RAX |
333 | if (PrintPreciseRTMLockingStatistics || profile_rtm) { |
334 | rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm); |
335 | } |
336 | if (RTMRetryCount > 0) { |
337 | // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4) |
338 | rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); |
339 | } |
340 | |
341 | movptr(tmpReg, Address(boxReg, owner_offset)) ; |
342 | testptr(tmpReg, tmpReg) ; |
343 | jccb(Assembler::notZero, L_decrement_retry)jccb_0(Assembler::notZero, L_decrement_retry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 343) ; |
344 | |
345 | // Appears unlocked - try to swing _owner from null to non-null. |
346 | // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. |
347 | #ifdef _LP641 |
348 | Register threadReg = r15_thread; |
349 | #else |
350 | get_thread(scrReg); |
351 | Register threadReg = scrReg; |
352 | #endif |
353 | lock(); |
354 | cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg |
355 | |
356 | if (RTMRetryCount > 0) { |
357 | // success done else retry |
358 | jccb(Assembler::equal, DONE_LABEL)jccb_0(Assembler::equal, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 358) ; |
359 | bind(L_decrement_retry); |
360 | // Spin and retry if lock is busy. |
361 | rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); |
362 | } |
363 | else { |
364 | bind(L_decrement_retry); |
365 | } |
366 | } |
367 | |
368 | #endif // INCLUDE_RTM_OPT |
369 | |
370 | // fast_lock and fast_unlock used by C2 |
371 | |
372 | // Because the transitions from emitted code to the runtime |
373 | // monitorenter/exit helper stubs are so slow it's critical that |
374 | // we inline both the stack-locking fast path and the inflated fast path. |
375 | // |
376 | // See also: cmpFastLock and cmpFastUnlock. |
377 | // |
378 | // What follows is a specialized inline transliteration of the code |
379 | // in enter() and exit(). If we're concerned about I$ bloat another |
380 | // option would be to emit TrySlowEnter and TrySlowExit methods |
381 | // at startup-time. These methods would accept arguments as |
382 | // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure |
383 | // indications in the icc.ZFlag. fast_lock and fast_unlock would simply |
384 | // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. |
385 | // In practice, however, the # of lock sites is bounded and is usually small. |
386 | // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer |
387 | // if the processor uses simple bimodal branch predictors keyed by EIP |
388 | // Since the helper routines would be called from multiple synchronization |
389 | // sites. |
390 | // |
391 | // An even better approach would be write "MonitorEnter()" and "MonitorExit()" |
392 | // in java - using j.u.c and unsafe - and just bind the lock and unlock sites |
393 | // to those specialized methods. That'd give us a mostly platform-independent |
394 | // implementation that the JITs could optimize and inline at their pleasure. |
395 | // Done correctly, the only time we'd need to cross to native could would be |
396 | // to park() or unpark() threads. We'd also need a few more unsafe operators |
397 | // to (a) prevent compiler-JIT reordering of non-volatile accesses, and |
398 | // (b) explicit barriers or fence operations. |
399 | // |
400 | // TODO: |
401 | // |
402 | // * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr). |
403 | // This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals. |
404 | // Given TLAB allocation, Self is usually manifested in a register, so passing it into |
405 | // the lock operators would typically be faster than reifying Self. |
406 | // |
407 | // * Ideally I'd define the primitives as: |
408 | // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. |
409 | // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED |
410 | // Unfortunately ADLC bugs prevent us from expressing the ideal form. |
411 | // Instead, we're stuck with a rather awkward and brittle register assignments below. |
412 | // Furthermore the register assignments are overconstrained, possibly resulting in |
413 | // sub-optimal code near the synchronization site. |
414 | // |
415 | // * Eliminate the sp-proximity tests and just use "== Self" tests instead. |
416 | // Alternately, use a better sp-proximity test. |
417 | // |
418 | // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. |
419 | // Either one is sufficient to uniquely identify a thread. |
420 | // TODO: eliminate use of sp in _owner and use get_thread(tr) instead. |
421 | // |
422 | // * Intrinsify notify() and notifyAll() for the common cases where the |
423 | // object is locked by the calling thread but the waitlist is empty. |
424 | // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). |
425 | // |
426 | // * use jccb and jmpb instead of jcc and jmp to improve code density. |
427 | // But beware of excessive branch density on AMD Opterons. |
428 | // |
429 | // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success |
430 | // or failure of the fast path. If the fast path fails then we pass |
431 | // control to the slow path, typically in C. In fast_lock and |
432 | // fast_unlock we often branch to DONE_LABEL, just to find that C2 |
433 | // will emit a conditional branch immediately after the node. |
434 | // So we have branches to branches and lots of ICC.ZF games. |
435 | // Instead, it might be better to have C2 pass a "FailureLabel" |
436 | // into fast_lock and fast_unlock. In the case of success, control |
437 | // will drop through the node. ICC.ZF is undefined at exit. |
438 | // In the case of failure, the node will branch directly to the |
439 | // FailureLabel |
440 | |
441 | |
442 | // obj: object to lock |
443 | // box: on-stack box address (displaced header location) - KILLED |
444 | // rax,: tmp -- KILLED |
445 | // scr: tmp -- KILLED |
446 | void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, |
447 | Register scrReg, Register cx1Reg, Register cx2Reg, |
448 | RTMLockingCounters* rtm_counters, |
449 | RTMLockingCounters* stack_rtm_counters, |
450 | Metadata* method_data, |
451 | bool use_rtm, bool profile_rtm) { |
452 | // Ensure the register assignments are disjoint |
453 | assert(tmpReg == rax, "")do { if (!(tmpReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 453, "assert(" "tmpReg == rax" ") failed", ""); ::breakpoint (); } } while (0); |
454 | |
455 | if (use_rtm) { |
456 | assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg); |
457 | } else { |
458 | assert(cx2Reg == noreg, "")do { if (!(cx2Reg == noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 458, "assert(" "cx2Reg == noreg" ") failed", ""); ::breakpoint (); } } while (0); |
459 | assert_different_registers(objReg, boxReg, tmpReg, scrReg); |
460 | } |
461 | |
462 | // Possible cases that we'll encounter in fast_lock |
463 | // ------------------------------------------------ |
464 | // * Inflated |
465 | // -- unlocked |
466 | // -- Locked |
467 | // = by self |
468 | // = by other |
469 | // * neutral |
470 | // * stack-locked |
471 | // -- by self |
472 | // = sp-proximity test hits |
473 | // = sp-proximity test generates false-negative |
474 | // -- by other |
475 | // |
476 | |
477 | Label IsInflated, DONE_LABEL; |
478 | |
479 | if (DiagnoseSyncOnValueBasedClasses != 0) { |
480 | load_klass(tmpReg, objReg, cx1Reg); |
481 | movl(tmpReg, Address(tmpReg, Klass::access_flags_offset())); |
482 | testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS); |
483 | jcc(Assembler::notZero, DONE_LABEL); |
484 | } |
485 | |
486 | #if INCLUDE_RTM_OPT1 |
487 | if (UseRTMForStackLocks && use_rtm) { |
488 | assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 488, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive" ); ::breakpoint(); } } while (0); |
489 | rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, |
490 | stack_rtm_counters, method_data, profile_rtm, |
491 | DONE_LABEL, IsInflated); |
492 | } |
493 | #endif // INCLUDE_RTM_OPT |
494 | |
495 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH] |
496 | testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral |
497 | jccb(Assembler::notZero, IsInflated)jccb_0(Assembler::notZero, IsInflated, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 497); |
498 | |
499 | if (!UseHeavyMonitors) { |
500 | // Attempt stack-locking ... |
501 | orptr (tmpReg, markWord::unlocked_value); |
502 | movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS |
503 | lock(); |
504 | cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg |
505 | jcc(Assembler::equal, DONE_LABEL); // Success |
506 | |
507 | // Recursive locking. |
508 | // The object is stack-locked: markword contains stack pointer to BasicLock. |
509 | // Locked by current thread if difference with current SP is less than one page. |
510 | subptr(tmpReg, rsp); |
511 | // Next instruction set ZFlag == 1 (Success) if difference is less then one page. |
512 | andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())7 - os::vm_page_size()) ); |
513 | movptr(Address(boxReg, 0), tmpReg); |
514 | } else { |
515 | // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0. |
516 | testptr(objReg, objReg); |
517 | } |
518 | jmp(DONE_LABEL); |
519 | |
520 | bind(IsInflated); |
521 | // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value |
522 | |
523 | #if INCLUDE_RTM_OPT1 |
524 | // Use the same RTM locking code in 32- and 64-bit VM. |
525 | if (use_rtm) { |
526 | rtm_inflated_locking(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg, |
527 | rtm_counters, method_data, profile_rtm, DONE_LABEL); |
528 | } else { |
529 | #endif // INCLUDE_RTM_OPT |
530 | |
531 | #ifndef _LP641 |
532 | // The object is inflated. |
533 | |
534 | // boxReg refers to the on-stack BasicLock in the current frame. |
535 | // We'd like to write: |
536 | // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices. |
537 | // This is convenient but results a ST-before-CAS penalty. The following CAS suffers |
538 | // additional latency as we have another ST in the store buffer that must drain. |
539 | |
540 | // avoid ST-before-CAS |
541 | // register juggle because we need tmpReg for cmpxchgptr below |
542 | movptr(scrReg, boxReg); |
543 | movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] |
544 | |
545 | // Optimistic form: consider XORL tmpReg,tmpReg |
546 | movptr(tmpReg, NULL_WORD0L); |
547 | |
548 | // Appears unlocked - try to swing _owner from null to non-null. |
549 | // Ideally, I'd manifest "Self" with get_thread and then attempt |
550 | // to CAS the register containing Self into m->Owner. |
551 | // But we don't have enough registers, so instead we can either try to CAS |
552 | // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds |
553 | // we later store "Self" into m->Owner. Transiently storing a stack address |
554 | // (rsp or the address of the box) into m->owner is harmless. |
555 | // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. |
556 | lock(); |
557 | cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ))); |
558 | movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 |
559 | // If we weren't able to swing _owner from NULL to the BasicLock |
560 | // then take the slow path. |
561 | jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 561); |
562 | // update _owner from BasicLock to thread |
563 | get_thread (scrReg); // beware: clobbers ICCs |
564 | movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value )), scrReg); |
565 | xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success |
566 | |
567 | // If the CAS fails we can either retry or pass control to the slow path. |
568 | // We use the latter tactic. |
569 | // Pass the CAS result in the icc.ZFlag into DONE_LABEL |
570 | // If the CAS was successful ... |
571 | // Self has acquired the lock |
572 | // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. |
573 | // Intentional fall-through into DONE_LABEL ... |
574 | #else // _LP64 |
575 | // It's inflated and we use scrReg for ObjectMonitor* in this section. |
576 | movq(scrReg, tmpReg); |
577 | xorq(tmpReg, tmpReg); |
578 | lock(); |
579 | cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ))); |
580 | // Unconditionally set box->_displaced_header = markWord::unused_mark(). |
581 | // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
582 | movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); |
583 | // Propagate ICC.ZF from CAS above into DONE_LABEL. |
584 | jcc(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success) |
585 | |
586 | cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock) |
587 | jcc(Assembler::notEqual, DONE_LABEL); // If not recursive, ZF = 0 at this point (fail) |
588 | incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value ))); |
589 | xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success |
590 | #endif // _LP64 |
591 | #if INCLUDE_RTM_OPT1 |
592 | } // use_rtm() |
593 | #endif |
594 | // DONE_LABEL is a hot target - we'd really like to place it at the |
595 | // start of cache line by padding with NOPs. |
596 | // See the AMD and Intel software optimization manuals for the |
597 | // most efficient "long" NOP encodings. |
598 | // Unfortunately none of our alignment mechanisms suffice. |
599 | bind(DONE_LABEL); |
600 | |
601 | // At DONE_LABEL the icc ZFlag is set as follows ... |
602 | // fast_unlock uses the same protocol. |
603 | // ZFlag == 1 -> Success |
604 | // ZFlag == 0 -> Failure - force control through the slow path |
605 | } |
606 | |
607 | // obj: object to unlock |
608 | // box: box address (displaced header location), killed. Must be EAX. |
609 | // tmp: killed, cannot be obj nor box. |
610 | // |
611 | // Some commentary on balanced locking: |
612 | // |
613 | // fast_lock and fast_unlock are emitted only for provably balanced lock sites. |
614 | // Methods that don't have provably balanced locking are forced to run in the |
615 | // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. |
616 | // The interpreter provides two properties: |
617 | // I1: At return-time the interpreter automatically and quietly unlocks any |
618 | // objects acquired the current activation (frame). Recall that the |
619 | // interpreter maintains an on-stack list of locks currently held by |
620 | // a frame. |
621 | // I2: If a method attempts to unlock an object that is not held by the |
622 | // the frame the interpreter throws IMSX. |
623 | // |
624 | // Lets say A(), which has provably balanced locking, acquires O and then calls B(). |
625 | // B() doesn't have provably balanced locking so it runs in the interpreter. |
626 | // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O |
627 | // is still locked by A(). |
628 | // |
629 | // The only other source of unbalanced locking would be JNI. The "Java Native Interface: |
630 | // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter |
631 | // should not be unlocked by "normal" java-level locking and vice-versa. The specification |
632 | // doesn't specify what will occur if a program engages in such mixed-mode locking, however. |
633 | // Arguably given that the spec legislates the JNI case as undefined our implementation |
634 | // could reasonably *avoid* checking owner in fast_unlock(). |
635 | // In the interest of performance we elide m->Owner==Self check in unlock. |
636 | // A perfectly viable alternative is to elide the owner check except when |
637 | // Xcheck:jni is enabled. |
638 | |
639 | void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { |
640 | assert(boxReg == rax, "")do { if (!(boxReg == rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 640, "assert(" "boxReg == rax" ") failed", ""); ::breakpoint (); } } while (0); |
641 | assert_different_registers(objReg, boxReg, tmpReg); |
642 | |
643 | Label DONE_LABEL, Stacked, CheckSucc; |
644 | |
645 | #if INCLUDE_RTM_OPT1 |
646 | if (UseRTMForStackLocks && use_rtm) { |
647 | assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive")do { if (!(!UseHeavyMonitors)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 647, "assert(" "!UseHeavyMonitors" ") failed", "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive" ); ::breakpoint(); } } while (0); |
648 | Label L_regular_unlock; |
649 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword |
650 | andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits |
651 | cmpptr(tmpReg, markWord::unlocked_value); // bits = 01 unlocked |
652 | jccb(Assembler::notEqual, L_regular_unlock)jccb_0(Assembler::notEqual, L_regular_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 652); // if !HLE RegularLock |
653 | xend(); // otherwise end... |
654 | jmp(DONE_LABEL); // ... and we're done |
655 | bind(L_regular_unlock); |
656 | } |
657 | #endif |
658 | |
659 | if (!UseHeavyMonitors) { |
660 | cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD0L); // Examine the displaced header |
661 | jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock |
662 | } |
663 | movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword |
664 | if (!UseHeavyMonitors) { |
665 | testptr(tmpReg, markWord::monitor_value); // Inflated? |
666 | jccb (Assembler::zero, Stacked)jccb_0(Assembler::zero, Stacked, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 666); |
667 | } |
668 | |
669 | // It's inflated. |
670 | #if INCLUDE_RTM_OPT1 |
671 | if (use_rtm) { |
672 | Label L_regular_inflated_unlock; |
673 | int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ); |
674 | movptr(boxReg, Address(tmpReg, owner_offset)); |
675 | testptr(boxReg, boxReg); |
676 | jccb(Assembler::notZero, L_regular_inflated_unlock)jccb_0(Assembler::notZero, L_regular_inflated_unlock, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 676); |
677 | xend(); |
678 | jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 678); |
679 | bind(L_regular_inflated_unlock); |
680 | } |
681 | #endif |
682 | |
683 | // Despite our balanced locking property we still check that m->_owner == Self |
684 | // as java routines or native JNI code called by this thread might |
685 | // have released the lock. |
686 | // Refer to the comments in synchronizer.cpp for how we might encode extra |
687 | // state in _succ so we can avoid fetching EntryList|cxq. |
688 | // |
689 | // If there's no contention try a 1-0 exit. That is, exit without |
690 | // a costly MEMBAR or CAS. See synchronizer.cpp for details on how |
691 | // we detect and recover from the race that the 1-0 exit admits. |
692 | // |
693 | // Conceptually fast_unlock() must execute a STST|LDST "release" barrier |
694 | // before it STs null into _owner, releasing the lock. Updates |
695 | // to data protected by the critical section must be visible before |
696 | // we drop the lock (and thus before any other thread could acquire |
697 | // the lock and observe the fields protected by the lock). |
698 | // IA32's memory-model is SPO, so STs are ordered with respect to |
699 | // each other and there's no need for an explicit barrier (fence). |
700 | // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html. |
701 | #ifndef _LP641 |
702 | get_thread (boxReg); |
703 | |
704 | // Note that we could employ various encoding schemes to reduce |
705 | // the number of loads below (currently 4) to just 2 or 3. |
706 | // Refer to the comments in synchronizer.cpp. |
707 | // In practice the chain of fetches doesn't seem to impact performance, however. |
708 | xorptr(boxReg, boxReg); |
709 | orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value ))); |
710 | jccb (Assembler::notZero, DONE_LABEL)jccb_0(Assembler::notZero, DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 710); |
711 | movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value ))); |
712 | orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value ))); |
713 | jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 713); |
714 | movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value )), NULL_WORD0L); |
715 | jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 715); |
716 | |
717 | bind (Stacked); |
718 | // It's not inflated and it's not recursively stack-locked. |
719 | // It must be stack-locked. |
720 | // Try to reset the header to displaced header. |
721 | // The "box" value on the stack is stable, so we can reload |
722 | // and be assured we observe the same value as above. |
723 | movptr(tmpReg, Address(boxReg, 0)); |
724 | lock(); |
725 | cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box |
726 | // Intention fall-thru into DONE_LABEL |
727 | |
728 | // DONE_LABEL is a hot target - we'd really like to place it at the |
729 | // start of cache line by padding with NOPs. |
730 | // See the AMD and Intel software optimization manuals for the |
731 | // most efficient "long" NOP encodings. |
732 | // Unfortunately none of our alignment mechanisms suffice. |
733 | bind (CheckSucc); |
734 | #else // _LP64 |
735 | // It's inflated |
736 | Label LNotRecursive, LSuccess, LGoSlowPath; |
737 | |
738 | cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value )), 0); |
739 | jccb(Assembler::equal, LNotRecursive)jccb_0(Assembler::equal, LNotRecursive, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 739); |
740 | |
741 | // Recursive inflated unlock |
742 | decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)((ObjectMonitor::recursions_offset_in_bytes()) - markWord::monitor_value ))); |
743 | jmpb(LSuccess)jmpb_0(LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 743); |
744 | |
745 | bind(LNotRecursive); |
746 | movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)((ObjectMonitor::cxq_offset_in_bytes()) - markWord::monitor_value ))); |
747 | orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)((ObjectMonitor::EntryList_offset_in_bytes()) - markWord::monitor_value ))); |
748 | jccb (Assembler::notZero, CheckSucc)jccb_0(Assembler::notZero, CheckSucc, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 748); |
749 | // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
750 | movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value )), (int32_t)NULL_WORD0L); |
751 | jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 751); |
752 | |
753 | // Try to avoid passing control into the slow_path ... |
754 | bind (CheckSucc); |
755 | |
756 | // The following optional optimization can be elided if necessary |
757 | // Effectively: if (succ == null) goto slow path |
758 | // The code reduces the window for a race, however, |
759 | // and thus benefits performance. |
760 | cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value )), (int32_t)NULL_WORD0L); |
761 | jccb (Assembler::zero, LGoSlowPath)jccb_0(Assembler::zero, LGoSlowPath, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 761); |
762 | |
763 | xorptr(boxReg, boxReg); |
764 | // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. |
765 | movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value )), (int32_t)NULL_WORD0L); |
766 | |
767 | // Memory barrier/fence |
768 | // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ |
769 | // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. |
770 | // This is faster on Nehalem and AMD Shanghai/Barcelona. |
771 | // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences |
772 | // We might also restructure (ST Owner=0;barrier;LD _Succ) to |
773 | // (mov box,0; xchgq box, &m->Owner; LD _succ) . |
774 | lock(); addl(Address(rsp, 0), 0); |
775 | |
776 | cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)((ObjectMonitor::succ_offset_in_bytes()) - markWord::monitor_value )), (int32_t)NULL_WORD0L); |
777 | jccb (Assembler::notZero, LSuccess)jccb_0(Assembler::notZero, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 777); |
778 | |
779 | // Rare inopportune interleaving - race. |
780 | // The successor vanished in the small window above. |
781 | // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. |
782 | // We need to ensure progress and succession. |
783 | // Try to reacquire the lock. |
784 | // If that fails then the new owner is responsible for succession and this |
785 | // thread needs to take no further action and can exit via the fast path (success). |
786 | // If the re-acquire succeeds then pass control into the slow path. |
787 | // As implemented, this latter mode is horrible because we generated more |
788 | // coherence traffic on the lock *and* artifically extended the critical section |
789 | // length while by virtue of passing control into the slow path. |
790 | |
791 | // box is really RAX -- the following CMPXCHG depends on that binding |
792 | // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) |
793 | lock(); |
794 | cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)((ObjectMonitor::owner_offset_in_bytes()) - markWord::monitor_value ))); |
795 | // There's no successor so we tried to regrab the lock. |
796 | // If that didn't work, then another thread grabbed the |
797 | // lock so we're done (and exit was a success). |
798 | jccb (Assembler::notEqual, LSuccess)jccb_0(Assembler::notEqual, LSuccess, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 798); |
799 | // Intentional fall-through into slow path |
800 | |
801 | bind (LGoSlowPath); |
802 | orl (boxReg, 1); // set ICC.ZF=0 to indicate failure |
803 | jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 803); |
804 | |
805 | bind (LSuccess); |
806 | testl (boxReg, 0); // set ICC.ZF=1 to indicate success |
807 | jmpb (DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 807); |
808 | |
809 | if (!UseHeavyMonitors) { |
810 | bind (Stacked); |
811 | movptr(tmpReg, Address (boxReg, 0)); // re-fetch |
812 | lock(); |
813 | cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box |
814 | } |
815 | #endif |
816 | bind(DONE_LABEL); |
817 | } |
818 | |
819 | //------------------------------------------------------------------------------------------- |
820 | // Generic instructions support for use in .ad files C2 code generation |
821 | |
822 | void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, Register scr) { |
823 | if (dst != src) { |
824 | movdqu(dst, src); |
825 | } |
826 | if (opcode == Op_AbsVD) { |
827 | andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr); |
828 | } else { |
829 | assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 829, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD" ); ::breakpoint(); } } while (0); |
830 | xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr); |
831 | } |
832 | } |
833 | |
834 | void C2_MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { |
835 | if (opcode == Op_AbsVD) { |
836 | vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr); |
837 | } else { |
838 | assert((opcode == Op_NegVD),"opcode should be Op_NegD")do { if (!((opcode == Op_NegVD))) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 838, "assert(" "(opcode == Op_NegVD)" ") failed", "opcode should be Op_NegD" ); ::breakpoint(); } } while (0); |
839 | vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr); |
840 | } |
841 | } |
842 | |
843 | void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr) { |
844 | if (dst != src) { |
845 | movdqu(dst, src); |
846 | } |
847 | if (opcode == Op_AbsVF) { |
848 | andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr); |
849 | } else { |
850 | assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 850, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF" ); ::breakpoint(); } } while (0); |
851 | xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr); |
852 | } |
853 | } |
854 | |
855 | void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) { |
856 | if (opcode == Op_AbsVF) { |
857 | vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr); |
858 | } else { |
859 | assert((opcode == Op_NegVF),"opcode should be Op_NegF")do { if (!((opcode == Op_NegVF))) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 859, "assert(" "(opcode == Op_NegVF)" ") failed", "opcode should be Op_NegF" ); ::breakpoint(); } } while (0); |
860 | vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr); |
861 | } |
862 | } |
863 | |
864 | void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) { |
865 | assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 865, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
866 | assert(tmp == xnoreg || elem_bt == T_LONG, "unused")do { if (!(tmp == xnoreg || elem_bt == T_LONG)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 866, "assert(" "tmp == xnoreg || elem_bt == T_LONG" ") failed" , "unused"); ::breakpoint(); } } while (0); |
867 | |
868 | if (opcode == Op_MinV) { |
869 | if (elem_bt == T_BYTE) { |
870 | pminsb(dst, src); |
871 | } else if (elem_bt == T_SHORT) { |
872 | pminsw(dst, src); |
873 | } else if (elem_bt == T_INT) { |
874 | pminsd(dst, src); |
875 | } else { |
876 | assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 876, "assert(" "elem_bt == T_LONG" ") failed", "required"); ::breakpoint(); } } while (0); |
877 | assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 877, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint (); } } while (0); |
878 | assert_different_registers(dst, src, tmp); |
879 | movdqu(xmm0, dst); |
880 | pcmpgtq(xmm0, src); |
881 | blendvpd(dst, src); // xmm0 as mask |
882 | } |
883 | } else { // opcode == Op_MaxV |
884 | if (elem_bt == T_BYTE) { |
885 | pmaxsb(dst, src); |
886 | } else if (elem_bt == T_SHORT) { |
887 | pmaxsw(dst, src); |
888 | } else if (elem_bt == T_INT) { |
889 | pmaxsd(dst, src); |
890 | } else { |
891 | assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 891, "assert(" "elem_bt == T_LONG" ") failed", "required"); ::breakpoint(); } } while (0); |
892 | assert(tmp == xmm0, "required")do { if (!(tmp == xmm0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 892, "assert(" "tmp == xmm0" ") failed", "required"); ::breakpoint (); } } while (0); |
893 | assert_different_registers(dst, src, tmp); |
894 | movdqu(xmm0, src); |
895 | pcmpgtq(xmm0, dst); |
896 | blendvpd(dst, src); // xmm0 as mask |
897 | } |
898 | } |
899 | } |
900 | |
901 | void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt, |
902 | XMMRegister dst, XMMRegister src1, XMMRegister src2, |
903 | int vlen_enc) { |
904 | assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MaxV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 904, "assert(" "opcode == Op_MinV || opcode == Op_MaxV" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
905 | |
906 | if (opcode == Op_MinV) { |
907 | if (elem_bt == T_BYTE) { |
908 | vpminsb(dst, src1, src2, vlen_enc); |
909 | } else if (elem_bt == T_SHORT) { |
910 | vpminsw(dst, src1, src2, vlen_enc); |
911 | } else if (elem_bt == T_INT) { |
912 | vpminsd(dst, src1, src2, vlen_enc); |
913 | } else { |
914 | assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 914, "assert(" "elem_bt == T_LONG" ") failed", "required"); ::breakpoint(); } } while (0); |
915 | if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { |
916 | vpminsq(dst, src1, src2, vlen_enc); |
917 | } else { |
918 | assert_different_registers(dst, src1, src2); |
919 | vpcmpgtq(dst, src1, src2, vlen_enc); |
920 | vblendvpd(dst, src1, src2, dst, vlen_enc); |
921 | } |
922 | } |
923 | } else { // opcode == Op_MaxV |
924 | if (elem_bt == T_BYTE) { |
925 | vpmaxsb(dst, src1, src2, vlen_enc); |
926 | } else if (elem_bt == T_SHORT) { |
927 | vpmaxsw(dst, src1, src2, vlen_enc); |
928 | } else if (elem_bt == T_INT) { |
929 | vpmaxsd(dst, src1, src2, vlen_enc); |
930 | } else { |
931 | assert(elem_bt == T_LONG, "required")do { if (!(elem_bt == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 931, "assert(" "elem_bt == T_LONG" ") failed", "required"); ::breakpoint(); } } while (0); |
932 | if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { |
933 | vpmaxsq(dst, src1, src2, vlen_enc); |
934 | } else { |
935 | assert_different_registers(dst, src1, src2); |
936 | vpcmpgtq(dst, src1, src2, vlen_enc); |
937 | vblendvpd(dst, src2, src1, dst, vlen_enc); |
938 | } |
939 | } |
940 | } |
941 | } |
942 | |
943 | // Float/Double min max |
944 | |
945 | void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt, |
946 | XMMRegister dst, XMMRegister a, XMMRegister b, |
947 | XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, |
948 | int vlen_enc) { |
949 | assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 949, "assert(" "UseAVX > 0" ") failed", "required"); ::breakpoint (); } } while (0); |
950 | assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV" ") failed", "sanity"); ::breakpoint(); } } while (0) |
951 | opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 951, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV" ") failed", "sanity"); ::breakpoint(); } } while (0); |
952 | assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 952, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
953 | assert_different_registers(a, b, tmp, atmp, btmp); |
954 | |
955 | bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV); |
956 | bool is_double_word = is_double_word_type(elem_bt); |
957 | |
958 | if (!is_double_word && is_min) { |
959 | vblendvps(atmp, a, b, a, vlen_enc); |
960 | vblendvps(btmp, b, a, a, vlen_enc); |
961 | vminps(tmp, atmp, btmp, vlen_enc); |
962 | vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
963 | vblendvps(dst, tmp, atmp, btmp, vlen_enc); |
964 | } else if (!is_double_word && !is_min) { |
965 | vblendvps(btmp, b, a, b, vlen_enc); |
966 | vblendvps(atmp, a, b, b, vlen_enc); |
967 | vmaxps(tmp, atmp, btmp, vlen_enc); |
968 | vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
969 | vblendvps(dst, tmp, atmp, btmp, vlen_enc); |
970 | } else if (is_double_word && is_min) { |
971 | vblendvpd(atmp, a, b, a, vlen_enc); |
972 | vblendvpd(btmp, b, a, a, vlen_enc); |
973 | vminpd(tmp, atmp, btmp, vlen_enc); |
974 | vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
975 | vblendvpd(dst, tmp, atmp, btmp, vlen_enc); |
976 | } else { |
977 | assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 977, "assert(" "is_double_word && !is_min" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
978 | vblendvpd(btmp, b, a, b, vlen_enc); |
979 | vblendvpd(atmp, a, b, b, vlen_enc); |
980 | vmaxpd(tmp, atmp, btmp, vlen_enc); |
981 | vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
982 | vblendvpd(dst, tmp, atmp, btmp, vlen_enc); |
983 | } |
984 | } |
985 | |
986 | void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, |
987 | XMMRegister dst, XMMRegister a, XMMRegister b, |
988 | KRegister ktmp, XMMRegister atmp, XMMRegister btmp, |
989 | int vlen_enc) { |
990 | assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 990, "assert(" "UseAVX > 2" ") failed", "required"); ::breakpoint (); } } while (0); |
991 | assert(opcode == Op_MinV || opcode == Op_MinReductionV ||do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV" ") failed", "sanity"); ::breakpoint(); } } while (0) |
992 | opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity")do { if (!(opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 992, "assert(" "opcode == Op_MinV || opcode == Op_MinReductionV || opcode == Op_MaxV || opcode == Op_MaxReductionV" ") failed", "sanity"); ::breakpoint(); } } while (0); |
993 | assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity")do { if (!(elem_bt == T_FLOAT || elem_bt == T_DOUBLE)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 993, "assert(" "elem_bt == T_FLOAT || elem_bt == T_DOUBLE" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
994 | assert_different_registers(dst, a, b, atmp, btmp); |
995 | |
996 | bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV); |
997 | bool is_double_word = is_double_word_type(elem_bt); |
998 | bool merge = true; |
999 | |
1000 | if (!is_double_word && is_min) { |
1001 | evpmovd2m(ktmp, a, vlen_enc); |
1002 | evblendmps(atmp, ktmp, a, b, merge, vlen_enc); |
1003 | evblendmps(btmp, ktmp, b, a, merge, vlen_enc); |
1004 | vminps(dst, atmp, btmp, vlen_enc); |
1005 | evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
1006 | evmovdqul(dst, ktmp, atmp, merge, vlen_enc); |
1007 | } else if (!is_double_word && !is_min) { |
1008 | evpmovd2m(ktmp, b, vlen_enc); |
1009 | evblendmps(atmp, ktmp, a, b, merge, vlen_enc); |
1010 | evblendmps(btmp, ktmp, b, a, merge, vlen_enc); |
1011 | vmaxps(dst, atmp, btmp, vlen_enc); |
1012 | evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
1013 | evmovdqul(dst, ktmp, atmp, merge, vlen_enc); |
1014 | } else if (is_double_word && is_min) { |
1015 | evpmovq2m(ktmp, a, vlen_enc); |
1016 | evblendmpd(atmp, ktmp, a, b, merge, vlen_enc); |
1017 | evblendmpd(btmp, ktmp, b, a, merge, vlen_enc); |
1018 | vminpd(dst, atmp, btmp, vlen_enc); |
1019 | evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
1020 | evmovdquq(dst, ktmp, atmp, merge, vlen_enc); |
1021 | } else { |
1022 | assert(is_double_word && !is_min, "sanity")do { if (!(is_double_word && !is_min)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1022, "assert(" "is_double_word && !is_min" ") failed" , "sanity"); ::breakpoint(); } } while (0); |
1023 | evpmovq2m(ktmp, b, vlen_enc); |
1024 | evblendmpd(atmp, ktmp, a, b, merge, vlen_enc); |
1025 | evblendmpd(btmp, ktmp, b, a, merge, vlen_enc); |
1026 | vmaxpd(dst, atmp, btmp, vlen_enc); |
1027 | evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); |
1028 | evmovdquq(dst, ktmp, atmp, merge, vlen_enc); |
1029 | } |
1030 | } |
1031 | |
1032 | // Float/Double signum |
1033 | void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, |
1034 | XMMRegister zero, XMMRegister one, |
1035 | Register scratch) { |
1036 | assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity")do { if (!(opcode == Op_SignumF || opcode == Op_SignumD)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1036, "assert(" "opcode == Op_SignumF || opcode == Op_SignumD" ") failed", "sanity"); ::breakpoint(); } } while (0); |
1037 | |
1038 | Label DONE_LABEL; |
1039 | |
1040 | if (opcode == Op_SignumF) { |
1041 | assert(UseSSE > 0, "required")do { if (!(UseSSE > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1041, "assert(" "UseSSE > 0" ") failed", "required"); :: breakpoint(); } } while (0); |
1042 | ucomiss(dst, zero); |
1043 | jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument |
1044 | jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN |
1045 | movflt(dst, one); |
1046 | jcc(Assembler::above, DONE_LABEL); |
1047 | xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scratch); |
1048 | } else if (opcode == Op_SignumD) { |
1049 | assert(UseSSE > 1, "required")do { if (!(UseSSE > 1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1049, "assert(" "UseSSE > 1" ") failed", "required"); :: breakpoint(); } } while (0); |
1050 | ucomisd(dst, zero); |
1051 | jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument |
1052 | jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN |
1053 | movdbl(dst, one); |
1054 | jcc(Assembler::above, DONE_LABEL); |
1055 | xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scratch); |
1056 | } |
1057 | |
1058 | bind(DONE_LABEL); |
1059 | } |
1060 | |
1061 | void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) { |
1062 | if (sign) { |
1063 | pmovsxbw(dst, src); |
1064 | } else { |
1065 | pmovzxbw(dst, src); |
1066 | } |
1067 | } |
1068 | |
1069 | void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { |
1070 | if (sign) { |
1071 | vpmovsxbw(dst, src, vector_len); |
1072 | } else { |
1073 | vpmovzxbw(dst, src, vector_len); |
1074 | } |
1075 | } |
1076 | |
1077 | void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { |
1078 | if (sign) { |
1079 | vpmovsxbd(dst, src, vector_len); |
1080 | } else { |
1081 | vpmovzxbd(dst, src, vector_len); |
1082 | } |
1083 | } |
1084 | |
1085 | void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { |
1086 | if (sign) { |
1087 | vpmovsxwd(dst, src, vector_len); |
1088 | } else { |
1089 | vpmovzxwd(dst, src, vector_len); |
1090 | } |
1091 | } |
1092 | |
1093 | void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, |
1094 | int shift, int vector_len) { |
1095 | if (opcode == Op_RotateLeftV) { |
1096 | if (etype == T_INT) { |
1097 | evprold(dst, src, shift, vector_len); |
1098 | } else { |
1099 | assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1099, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG" ); ::breakpoint(); } } while (0); |
1100 | evprolq(dst, src, shift, vector_len); |
1101 | } |
1102 | } else { |
1103 | assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1103, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV" ); ::breakpoint(); } } while (0); |
1104 | if (etype == T_INT) { |
1105 | evprord(dst, src, shift, vector_len); |
1106 | } else { |
1107 | assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1107, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG" ); ::breakpoint(); } } while (0); |
1108 | evprorq(dst, src, shift, vector_len); |
1109 | } |
1110 | } |
1111 | } |
1112 | |
1113 | void C2_MacroAssembler::vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, |
1114 | XMMRegister shift, int vector_len) { |
1115 | if (opcode == Op_RotateLeftV) { |
1116 | if (etype == T_INT) { |
1117 | evprolvd(dst, src, shift, vector_len); |
1118 | } else { |
1119 | assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1119, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG" ); ::breakpoint(); } } while (0); |
1120 | evprolvq(dst, src, shift, vector_len); |
1121 | } |
1122 | } else { |
1123 | assert(opcode == Op_RotateRightV, "opcode should be Op_RotateRightV")do { if (!(opcode == Op_RotateRightV)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1123, "assert(" "opcode == Op_RotateRightV" ") failed", "opcode should be Op_RotateRightV" ); ::breakpoint(); } } while (0); |
1124 | if (etype == T_INT) { |
1125 | evprorvd(dst, src, shift, vector_len); |
1126 | } else { |
1127 | assert(etype == T_LONG, "expected type T_LONG")do { if (!(etype == T_LONG)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1127, "assert(" "etype == T_LONG" ") failed", "expected type T_LONG" ); ::breakpoint(); } } while (0); |
1128 | evprorvq(dst, src, shift, vector_len); |
1129 | } |
1130 | } |
1131 | } |
1132 | |
1133 | void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) { |
1134 | if (opcode == Op_RShiftVI) { |
1135 | psrad(dst, shift); |
1136 | } else if (opcode == Op_LShiftVI) { |
1137 | pslld(dst, shift); |
1138 | } else { |
1139 | assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1139, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI" ); ::breakpoint(); } } while (0); |
1140 | psrld(dst, shift); |
1141 | } |
1142 | } |
1143 | |
1144 | void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) { |
1145 | switch (opcode) { |
1146 | case Op_RShiftVI: psrad(dst, shift); break; |
1147 | case Op_LShiftVI: pslld(dst, shift); break; |
1148 | case Op_URShiftVI: psrld(dst, shift); break; |
1149 | |
1150 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1150, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1151 | } |
1152 | } |
1153 | |
1154 | void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
1155 | if (opcode == Op_RShiftVI) { |
1156 | vpsrad(dst, nds, shift, vector_len); |
1157 | } else if (opcode == Op_LShiftVI) { |
1158 | vpslld(dst, nds, shift, vector_len); |
1159 | } else { |
1160 | assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI")do { if (!((opcode == Op_URShiftVI))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1160, "assert(" "(opcode == Op_URShiftVI)" ") failed", "opcode should be Op_URShiftVI" ); ::breakpoint(); } } while (0); |
1161 | vpsrld(dst, nds, shift, vector_len); |
1162 | } |
1163 | } |
1164 | |
1165 | void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { |
1166 | switch (opcode) { |
1167 | case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break; |
1168 | case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break; |
1169 | case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break; |
1170 | |
1171 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1171, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1172 | } |
1173 | } |
1174 | |
1175 | void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) { |
1176 | switch (opcode) { |
1177 | case Op_RShiftVB: // fall-through |
1178 | case Op_RShiftVS: psraw(dst, shift); break; |
1179 | |
1180 | case Op_LShiftVB: // fall-through |
1181 | case Op_LShiftVS: psllw(dst, shift); break; |
1182 | |
1183 | case Op_URShiftVS: // fall-through |
1184 | case Op_URShiftVB: psrlw(dst, shift); break; |
1185 | |
1186 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1186, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1187 | } |
1188 | } |
1189 | |
1190 | void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { |
1191 | switch (opcode) { |
1192 | case Op_RShiftVB: // fall-through |
1193 | case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break; |
1194 | |
1195 | case Op_LShiftVB: // fall-through |
1196 | case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break; |
1197 | |
1198 | case Op_URShiftVS: // fall-through |
1199 | case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break; |
1200 | |
1201 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1201, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1202 | } |
1203 | } |
1204 | |
1205 | void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) { |
1206 | switch (opcode) { |
1207 | case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems |
1208 | case Op_LShiftVL: psllq(dst, shift); break; |
1209 | case Op_URShiftVL: psrlq(dst, shift); break; |
1210 | |
1211 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1211, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1212 | } |
1213 | } |
1214 | |
1215 | void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) { |
1216 | if (opcode == Op_RShiftVL) { |
1217 | psrlq(dst, shift); // using srl to implement sra on pre-avs512 systems |
1218 | } else if (opcode == Op_LShiftVL) { |
1219 | psllq(dst, shift); |
1220 | } else { |
1221 | assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1221, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL" ); ::breakpoint(); } } while (0); |
1222 | psrlq(dst, shift); |
1223 | } |
1224 | } |
1225 | |
1226 | void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { |
1227 | switch (opcode) { |
1228 | case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break; |
1229 | case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break; |
1230 | case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break; |
1231 | |
1232 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1232, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1233 | } |
1234 | } |
1235 | |
1236 | void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len) { |
1237 | if (opcode == Op_RShiftVL) { |
1238 | evpsraq(dst, nds, shift, vector_len); |
1239 | } else if (opcode == Op_LShiftVL) { |
1240 | vpsllq(dst, nds, shift, vector_len); |
1241 | } else { |
1242 | assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL")do { if (!((opcode == Op_URShiftVL))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1242, "assert(" "(opcode == Op_URShiftVL)" ") failed", "opcode should be Op_URShiftVL" ); ::breakpoint(); } } while (0); |
1243 | vpsrlq(dst, nds, shift, vector_len); |
1244 | } |
1245 | } |
1246 | |
1247 | void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { |
1248 | switch (opcode) { |
1249 | case Op_RShiftVB: // fall-through |
1250 | case Op_RShiftVS: // fall-through |
1251 | case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break; |
1252 | |
1253 | case Op_LShiftVB: // fall-through |
1254 | case Op_LShiftVS: // fall-through |
1255 | case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break; |
1256 | |
1257 | case Op_URShiftVB: // fall-through |
1258 | case Op_URShiftVS: // fall-through |
1259 | case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break; |
1260 | |
1261 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1261, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1262 | } |
1263 | } |
1264 | |
1265 | void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { |
1266 | switch (opcode) { |
1267 | case Op_RShiftVB: // fall-through |
1268 | case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break; |
1269 | |
1270 | case Op_LShiftVB: // fall-through |
1271 | case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break; |
1272 | |
1273 | case Op_URShiftVB: // fall-through |
1274 | case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break; |
1275 | |
1276 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1276, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1277 | } |
1278 | } |
1279 | |
1280 | void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) { |
1281 | assert(UseAVX >= 2, "required")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1281, "assert(" "UseAVX >= 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1282 | switch (opcode) { |
1283 | case Op_RShiftVL: { |
1284 | if (UseAVX > 2) { |
1285 | assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1285, "assert(" "tmp == xnoreg" ") failed", "not used"); :: breakpoint(); } } while (0); |
1286 | if (!VM_Version::supports_avx512vl()) { |
1287 | vlen_enc = Assembler::AVX_512bit; |
1288 | } |
1289 | evpsravq(dst, src, shift, vlen_enc); |
1290 | } else { |
1291 | vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask())); |
1292 | vpsrlvq(dst, src, shift, vlen_enc); |
1293 | vpsrlvq(tmp, tmp, shift, vlen_enc); |
1294 | vpxor(dst, dst, tmp, vlen_enc); |
1295 | vpsubq(dst, dst, tmp, vlen_enc); |
1296 | } |
1297 | break; |
1298 | } |
1299 | case Op_LShiftVL: { |
1300 | assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1300, "assert(" "tmp == xnoreg" ") failed", "not used"); :: breakpoint(); } } while (0); |
1301 | vpsllvq(dst, src, shift, vlen_enc); |
1302 | break; |
1303 | } |
1304 | case Op_URShiftVL: { |
1305 | assert(tmp == xnoreg, "not used")do { if (!(tmp == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1305, "assert(" "tmp == xnoreg" ") failed", "not used"); :: breakpoint(); } } while (0); |
1306 | vpsrlvq(dst, src, shift, vlen_enc); |
1307 | break; |
1308 | } |
1309 | default: assert(false, "%s", NodeClassNames[opcode])do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1309, "assert(" "false" ") failed", "%s", NodeClassNames[opcode ]); ::breakpoint(); } } while (0); |
1310 | } |
1311 | } |
1312 | |
1313 | // Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst |
1314 | void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { |
1315 | assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0) |
1316 | opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0) |
1317 | opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1317, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0); |
1318 | bool sign = (opcode != Op_URShiftVB); |
1319 | assert(vector_len == 0, "required")do { if (!(vector_len == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1319, "assert(" "vector_len == 0" ") failed", "required"); :: breakpoint(); } } while (0); |
1320 | vextendbd(sign, dst, src, 1); |
1321 | vpmovzxbd(vtmp, shift, 1); |
1322 | varshiftd(opcode, dst, dst, vtmp, 1); |
1323 | vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch); |
1324 | vextracti128_high(vtmp, dst); |
1325 | vpackusdw(dst, dst, vtmp, 0); |
1326 | } |
1327 | |
1328 | // Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst |
1329 | void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { |
1330 | assert(opcode == Op_LShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0) |
1331 | opcode == Op_RShiftVB ||do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0) |
1332 | opcode == Op_URShiftVB, "%s", NodeClassNames[opcode])do { if (!(opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1332, "assert(" "opcode == Op_LShiftVB || opcode == Op_RShiftVB || opcode == Op_URShiftVB" ") failed", "%s", NodeClassNames[opcode]); ::breakpoint(); } } while (0); |
1333 | bool sign = (opcode != Op_URShiftVB); |
1334 | int ext_vector_len = vector_len + 1; |
1335 | vextendbw(sign, dst, src, ext_vector_len); |
1336 | vpmovzxbw(vtmp, shift, ext_vector_len); |
1337 | varshiftw(opcode, dst, dst, vtmp, ext_vector_len); |
1338 | vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch); |
1339 | if (vector_len == 0) { |
1340 | vextracti128_high(vtmp, dst); |
1341 | vpackuswb(dst, dst, vtmp, vector_len); |
1342 | } else { |
1343 | vextracti64x4_high(vtmp, dst); |
1344 | vpackuswb(dst, dst, vtmp, vector_len); |
1345 | vpermq(dst, dst, 0xD8, vector_len); |
1346 | } |
1347 | } |
1348 | |
1349 | void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) { |
1350 | switch(typ) { |
1351 | case T_BYTE: |
1352 | pinsrb(dst, val, idx); |
1353 | break; |
1354 | case T_SHORT: |
1355 | pinsrw(dst, val, idx); |
1356 | break; |
1357 | case T_INT: |
1358 | pinsrd(dst, val, idx); |
1359 | break; |
1360 | case T_LONG: |
1361 | pinsrq(dst, val, idx); |
1362 | break; |
1363 | default: |
1364 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1364, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1365 | break; |
1366 | } |
1367 | } |
1368 | |
1369 | void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) { |
1370 | switch(typ) { |
1371 | case T_BYTE: |
1372 | vpinsrb(dst, src, val, idx); |
1373 | break; |
1374 | case T_SHORT: |
1375 | vpinsrw(dst, src, val, idx); |
1376 | break; |
1377 | case T_INT: |
1378 | vpinsrd(dst, src, val, idx); |
1379 | break; |
1380 | case T_LONG: |
1381 | vpinsrq(dst, src, val, idx); |
1382 | break; |
1383 | default: |
1384 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1384, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1385 | break; |
1386 | } |
1387 | } |
1388 | |
1389 | void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) { |
1390 | switch(typ) { |
1391 | case T_INT: |
1392 | vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len); |
1393 | break; |
1394 | case T_FLOAT: |
1395 | vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len); |
1396 | break; |
1397 | case T_LONG: |
1398 | vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len); |
1399 | break; |
1400 | case T_DOUBLE: |
1401 | vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len); |
1402 | break; |
1403 | default: |
1404 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1404, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1405 | break; |
1406 | } |
1407 | } |
1408 | |
1409 | void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) { |
1410 | switch(typ) { |
1411 | case T_INT: |
1412 | evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len); |
1413 | break; |
1414 | case T_FLOAT: |
1415 | evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len); |
1416 | break; |
1417 | case T_LONG: |
1418 | evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len); |
1419 | break; |
1420 | case T_DOUBLE: |
1421 | evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len); |
1422 | break; |
1423 | default: |
1424 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1424, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1425 | break; |
1426 | } |
1427 | } |
1428 | |
1429 | void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) { |
1430 | switch(typ) { |
1431 | case T_INT: |
1432 | evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len); |
1433 | break; |
1434 | case T_FLOAT: |
1435 | evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len); |
1436 | break; |
1437 | case T_LONG: |
1438 | evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len); |
1439 | break; |
1440 | case T_DOUBLE: |
1441 | evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len); |
1442 | break; |
1443 | default: |
1444 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1444, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1445 | break; |
1446 | } |
1447 | } |
1448 | |
1449 | void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt, bool is_legacy) { |
1450 | if (vlen_in_bytes <= 16) { |
1451 | pxor (dst, dst); |
1452 | psubb(dst, src); |
1453 | switch (elem_bt) { |
1454 | case T_BYTE: /* nothing to do */ break; |
1455 | case T_SHORT: pmovsxbw(dst, dst); break; |
1456 | case T_INT: pmovsxbd(dst, dst); break; |
1457 | case T_FLOAT: pmovsxbd(dst, dst); break; |
1458 | case T_LONG: pmovsxbq(dst, dst); break; |
1459 | case T_DOUBLE: pmovsxbq(dst, dst); break; |
1460 | |
1461 | default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1461, "assert(" "false" ") failed", "%s", type2name(elem_bt )); ::breakpoint(); } } while (0); |
1462 | } |
1463 | } else { |
1464 | assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, "")do { if (!(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1464, "assert(" "!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64" ") failed", ""); ::breakpoint(); } } while (0); |
1465 | int vlen_enc = vector_length_encoding(vlen_in_bytes); |
1466 | |
1467 | vpxor (dst, dst, dst, vlen_enc); |
1468 | vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc); |
1469 | |
1470 | switch (elem_bt) { |
1471 | case T_BYTE: /* nothing to do */ break; |
1472 | case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break; |
1473 | case T_INT: vpmovsxbd(dst, dst, vlen_enc); break; |
1474 | case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break; |
1475 | case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break; |
1476 | case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break; |
1477 | |
1478 | default: assert(false, "%s", type2name(elem_bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1478, "assert(" "false" ") failed", "%s", type2name(elem_bt )); ::breakpoint(); } } while (0); |
1479 | } |
1480 | } |
1481 | } |
1482 | |
1483 | void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp, |
1484 | Register tmp, bool novlbwdq, int vlen_enc) { |
1485 | if (novlbwdq) { |
1486 | vpmovsxbd(xtmp, src, vlen_enc); |
1487 | evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()), |
1488 | Assembler::eq, true, vlen_enc, tmp); |
1489 | } else { |
1490 | vpxor(xtmp, xtmp, xtmp, vlen_enc); |
1491 | vpsubb(xtmp, xtmp, src, vlen_enc); |
1492 | evpmovb2m(dst, xtmp, vlen_enc); |
1493 | } |
1494 | } |
1495 | |
1496 | void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) { |
1497 | ExternalAddress addr(StubRoutines::x86::vector_iota_indices()); |
1498 | if (vlen_in_bytes == 4) { |
1499 | movdl(dst, addr); |
1500 | } else if (vlen_in_bytes == 8) { |
1501 | movq(dst, addr); |
1502 | } else if (vlen_in_bytes == 16) { |
1503 | movdqu(dst, addr, scratch); |
1504 | } else if (vlen_in_bytes == 32) { |
1505 | vmovdqu(dst, addr, scratch); |
1506 | } else { |
1507 | assert(vlen_in_bytes == 64, "%d", vlen_in_bytes)do { if (!(vlen_in_bytes == 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1507, "assert(" "vlen_in_bytes == 64" ") failed", "%d", vlen_in_bytes ); ::breakpoint(); } } while (0); |
1508 | evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch); |
1509 | } |
1510 | } |
1511 | |
1512 | // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. |
1513 | |
1514 | void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) { |
1515 | int vector_len = Assembler::AVX_128bit; |
1516 | |
1517 | switch (opcode) { |
1518 | case Op_AndReductionV: pand(dst, src); break; |
1519 | case Op_OrReductionV: por (dst, src); break; |
1520 | case Op_XorReductionV: pxor(dst, src); break; |
1521 | case Op_MinReductionV: |
1522 | switch (typ) { |
1523 | case T_BYTE: pminsb(dst, src); break; |
1524 | case T_SHORT: pminsw(dst, src); break; |
1525 | case T_INT: pminsd(dst, src); break; |
1526 | case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1526, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1527 | vpminsq(dst, dst, src, Assembler::AVX_128bit); break; |
1528 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1528, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1529 | } |
1530 | break; |
1531 | case Op_MaxReductionV: |
1532 | switch (typ) { |
1533 | case T_BYTE: pmaxsb(dst, src); break; |
1534 | case T_SHORT: pmaxsw(dst, src); break; |
1535 | case T_INT: pmaxsd(dst, src); break; |
1536 | case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1536, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1537 | vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break; |
1538 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1538, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1539 | } |
1540 | break; |
1541 | case Op_AddReductionVF: addss(dst, src); break; |
1542 | case Op_AddReductionVD: addsd(dst, src); break; |
1543 | case Op_AddReductionVI: |
1544 | switch (typ) { |
1545 | case T_BYTE: paddb(dst, src); break; |
1546 | case T_SHORT: paddw(dst, src); break; |
1547 | case T_INT: paddd(dst, src); break; |
1548 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1548, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1549 | } |
1550 | break; |
1551 | case Op_AddReductionVL: paddq(dst, src); break; |
1552 | case Op_MulReductionVF: mulss(dst, src); break; |
1553 | case Op_MulReductionVD: mulsd(dst, src); break; |
1554 | case Op_MulReductionVI: |
1555 | switch (typ) { |
1556 | case T_SHORT: pmullw(dst, src); break; |
1557 | case T_INT: pmulld(dst, src); break; |
1558 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1558, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1559 | } |
1560 | break; |
1561 | case Op_MulReductionVL: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1561, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1562 | vpmullq(dst, dst, src, vector_len); break; |
1563 | default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1563, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint (); } } while (0); |
1564 | } |
1565 | } |
1566 | |
1567 | void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) { |
1568 | int vector_len = Assembler::AVX_256bit; |
1569 | |
1570 | switch (opcode) { |
1571 | case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break; |
1572 | case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break; |
1573 | case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break; |
1574 | case Op_MinReductionV: |
1575 | switch (typ) { |
1576 | case T_BYTE: vpminsb(dst, src1, src2, vector_len); break; |
1577 | case T_SHORT: vpminsw(dst, src1, src2, vector_len); break; |
1578 | case T_INT: vpminsd(dst, src1, src2, vector_len); break; |
1579 | case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1579, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1580 | vpminsq(dst, src1, src2, vector_len); break; |
1581 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1581, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1582 | } |
1583 | break; |
1584 | case Op_MaxReductionV: |
1585 | switch (typ) { |
1586 | case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break; |
1587 | case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break; |
1588 | case T_INT: vpmaxsd(dst, src1, src2, vector_len); break; |
1589 | case T_LONG: assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1589, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
1590 | vpmaxsq(dst, src1, src2, vector_len); break; |
1591 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1591, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1592 | } |
1593 | break; |
1594 | case Op_AddReductionVI: |
1595 | switch (typ) { |
1596 | case T_BYTE: vpaddb(dst, src1, src2, vector_len); break; |
1597 | case T_SHORT: vpaddw(dst, src1, src2, vector_len); break; |
1598 | case T_INT: vpaddd(dst, src1, src2, vector_len); break; |
1599 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1599, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1600 | } |
1601 | break; |
1602 | case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break; |
1603 | case Op_MulReductionVI: |
1604 | switch (typ) { |
1605 | case T_SHORT: vpmullw(dst, src1, src2, vector_len); break; |
1606 | case T_INT: vpmulld(dst, src1, src2, vector_len); break; |
1607 | default: assert(false, "wrong type")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1607, "assert(" "false" ") failed", "wrong type"); ::breakpoint (); } } while (0); |
1608 | } |
1609 | break; |
1610 | case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break; |
1611 | default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1611, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint (); } } while (0); |
1612 | } |
1613 | } |
1614 | |
1615 | void C2_MacroAssembler::reduce_fp(int opcode, int vlen, |
1616 | XMMRegister dst, XMMRegister src, |
1617 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1618 | switch (opcode) { |
1619 | case Op_AddReductionVF: |
1620 | case Op_MulReductionVF: |
1621 | reduceF(opcode, vlen, dst, src, vtmp1, vtmp2); |
1622 | break; |
1623 | |
1624 | case Op_AddReductionVD: |
1625 | case Op_MulReductionVD: |
1626 | reduceD(opcode, vlen, dst, src, vtmp1, vtmp2); |
1627 | break; |
1628 | |
1629 | default: assert(false, "wrong opcode")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1629, "assert(" "false" ") failed", "wrong opcode"); ::breakpoint (); } } while (0); |
1630 | } |
1631 | } |
1632 | |
1633 | void C2_MacroAssembler::reduceB(int opcode, int vlen, |
1634 | Register dst, Register src1, XMMRegister src2, |
1635 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1636 | switch (vlen) { |
1637 | case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1638 | case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1639 | case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1640 | case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1641 | |
1642 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1642, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1643 | } |
1644 | } |
1645 | |
1646 | void C2_MacroAssembler::mulreduceB(int opcode, int vlen, |
1647 | Register dst, Register src1, XMMRegister src2, |
1648 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1649 | switch (vlen) { |
1650 | case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1651 | case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1652 | case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1653 | case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1654 | |
1655 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1655, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1656 | } |
1657 | } |
1658 | |
1659 | void C2_MacroAssembler::reduceS(int opcode, int vlen, |
1660 | Register dst, Register src1, XMMRegister src2, |
1661 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1662 | switch (vlen) { |
1663 | case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1664 | case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1665 | case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1666 | case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1667 | |
1668 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1668, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1669 | } |
1670 | } |
1671 | |
1672 | void C2_MacroAssembler::reduceI(int opcode, int vlen, |
1673 | Register dst, Register src1, XMMRegister src2, |
1674 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1675 | switch (vlen) { |
1676 | case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1677 | case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1678 | case 8: reduce8I (opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1679 | case 16: reduce16I(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1680 | |
1681 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1681, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1682 | } |
1683 | } |
1684 | |
1685 | #ifdef _LP641 |
1686 | void C2_MacroAssembler::reduceL(int opcode, int vlen, |
1687 | Register dst, Register src1, XMMRegister src2, |
1688 | XMMRegister vtmp1, XMMRegister vtmp2) { |
1689 | switch (vlen) { |
1690 | case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1691 | case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1692 | case 8: reduce8L(opcode, dst, src1, src2, vtmp1, vtmp2); break; |
1693 | |
1694 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1694, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1695 | } |
1696 | } |
1697 | #endif // _LP64 |
1698 | |
1699 | void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1700 | switch (vlen) { |
1701 | case 2: |
1702 | assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1702, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint (); } } while (0); |
1703 | reduce2F(opcode, dst, src, vtmp1); |
1704 | break; |
1705 | case 4: |
1706 | assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1706, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint (); } } while (0); |
1707 | reduce4F(opcode, dst, src, vtmp1); |
1708 | break; |
1709 | case 8: |
1710 | reduce8F(opcode, dst, src, vtmp1, vtmp2); |
1711 | break; |
1712 | case 16: |
1713 | reduce16F(opcode, dst, src, vtmp1, vtmp2); |
1714 | break; |
1715 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1715, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1716 | } |
1717 | } |
1718 | |
1719 | void C2_MacroAssembler::reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1720 | switch (vlen) { |
1721 | case 2: |
1722 | assert(vtmp2 == xnoreg, "")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1722, "assert(" "vtmp2 == xnoreg" ") failed", ""); ::breakpoint (); } } while (0); |
1723 | reduce2D(opcode, dst, src, vtmp1); |
1724 | break; |
1725 | case 4: |
1726 | reduce4D(opcode, dst, src, vtmp1, vtmp2); |
1727 | break; |
1728 | case 8: |
1729 | reduce8D(opcode, dst, src, vtmp1, vtmp2); |
1730 | break; |
1731 | default: assert(false, "wrong vector length")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1731, "assert(" "false" ") failed", "wrong vector length"); ::breakpoint(); } } while (0); |
1732 | } |
1733 | } |
1734 | |
1735 | void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1736 | if (opcode == Op_AddReductionVI) { |
1737 | if (vtmp1 != src2) { |
1738 | movdqu(vtmp1, src2); |
1739 | } |
1740 | phaddd(vtmp1, vtmp1); |
1741 | } else { |
1742 | pshufd(vtmp1, src2, 0x1); |
1743 | reduce_operation_128(T_INT, opcode, vtmp1, src2); |
1744 | } |
1745 | movdl(vtmp2, src1); |
1746 | reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); |
1747 | movdl(dst, vtmp1); |
1748 | } |
1749 | |
1750 | void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1751 | if (opcode == Op_AddReductionVI) { |
1752 | if (vtmp1 != src2) { |
1753 | movdqu(vtmp1, src2); |
1754 | } |
1755 | phaddd(vtmp1, src2); |
1756 | reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1757 | } else { |
1758 | pshufd(vtmp2, src2, 0xE); |
1759 | reduce_operation_128(T_INT, opcode, vtmp2, src2); |
1760 | reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1761 | } |
1762 | } |
1763 | |
1764 | void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1765 | if (opcode == Op_AddReductionVI) { |
1766 | vphaddd(vtmp1, src2, src2, Assembler::AVX_256bit); |
1767 | vextracti128_high(vtmp2, vtmp1); |
1768 | vpaddd(vtmp1, vtmp1, vtmp2, Assembler::AVX_128bit); |
1769 | reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1770 | } else { |
1771 | vextracti128_high(vtmp1, src2); |
1772 | reduce_operation_128(T_INT, opcode, vtmp1, src2); |
1773 | reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1774 | } |
1775 | } |
1776 | |
1777 | void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1778 | vextracti64x4_high(vtmp2, src2); |
1779 | reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2); |
1780 | reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1781 | } |
1782 | |
1783 | void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1784 | pshufd(vtmp2, src2, 0x1); |
1785 | reduce_operation_128(T_BYTE, opcode, vtmp2, src2); |
1786 | movdqu(vtmp1, vtmp2); |
1787 | psrldq(vtmp1, 2); |
1788 | reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2); |
1789 | movdqu(vtmp2, vtmp1); |
1790 | psrldq(vtmp2, 1); |
1791 | reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2); |
1792 | movdl(vtmp2, src1); |
1793 | pmovsxbd(vtmp1, vtmp1); |
1794 | reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); |
1795 | pextrb(dst, vtmp1, 0x0); |
1796 | movsbl(dst, dst); |
1797 | } |
1798 | |
1799 | void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1800 | pshufd(vtmp1, src2, 0xE); |
1801 | reduce_operation_128(T_BYTE, opcode, vtmp1, src2); |
1802 | reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1803 | } |
1804 | |
1805 | void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1806 | vextracti128_high(vtmp2, src2); |
1807 | reduce_operation_128(T_BYTE, opcode, vtmp2, src2); |
1808 | reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1809 | } |
1810 | |
1811 | void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1812 | vextracti64x4_high(vtmp1, src2); |
1813 | reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2); |
1814 | reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1815 | } |
1816 | |
1817 | void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1818 | pmovsxbw(vtmp2, src2); |
1819 | reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1820 | } |
1821 | |
1822 | void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1823 | if (UseAVX > 1) { |
1824 | int vector_len = Assembler::AVX_256bit; |
1825 | vpmovsxbw(vtmp1, src2, vector_len); |
1826 | reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1827 | } else { |
1828 | pmovsxbw(vtmp2, src2); |
1829 | reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1830 | pshufd(vtmp2, src2, 0x1); |
1831 | pmovsxbw(vtmp2, src2); |
1832 | reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2); |
1833 | } |
1834 | } |
1835 | |
1836 | void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1837 | if (UseAVX > 2 && VM_Version::supports_avx512bw()) { |
1838 | int vector_len = Assembler::AVX_512bit; |
1839 | vpmovsxbw(vtmp1, src2, vector_len); |
1840 | reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1841 | } else { |
1842 | assert(UseAVX >= 2,"Should not reach here.")do { if (!(UseAVX >= 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1842, "assert(" "UseAVX >= 2" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
1843 | mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); |
1844 | vextracti128_high(vtmp2, src2); |
1845 | mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2); |
1846 | } |
1847 | } |
1848 | |
1849 | void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1850 | mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); |
1851 | vextracti64x4_high(vtmp2, src2); |
1852 | mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2); |
1853 | } |
1854 | |
1855 | void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1856 | if (opcode == Op_AddReductionVI) { |
1857 | if (vtmp1 != src2) { |
1858 | movdqu(vtmp1, src2); |
1859 | } |
1860 | phaddw(vtmp1, vtmp1); |
1861 | phaddw(vtmp1, vtmp1); |
1862 | } else { |
1863 | pshufd(vtmp2, src2, 0x1); |
1864 | reduce_operation_128(T_SHORT, opcode, vtmp2, src2); |
1865 | movdqu(vtmp1, vtmp2); |
1866 | psrldq(vtmp1, 2); |
1867 | reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2); |
1868 | } |
1869 | movdl(vtmp2, src1); |
1870 | pmovsxwd(vtmp1, vtmp1); |
1871 | reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); |
1872 | pextrw(dst, vtmp1, 0x0); |
1873 | movswl(dst, dst); |
1874 | } |
1875 | |
1876 | void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1877 | if (opcode == Op_AddReductionVI) { |
1878 | if (vtmp1 != src2) { |
1879 | movdqu(vtmp1, src2); |
1880 | } |
1881 | phaddw(vtmp1, src2); |
1882 | } else { |
1883 | pshufd(vtmp1, src2, 0xE); |
1884 | reduce_operation_128(T_SHORT, opcode, vtmp1, src2); |
1885 | } |
1886 | reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1887 | } |
1888 | |
1889 | void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1890 | if (opcode == Op_AddReductionVI) { |
1891 | int vector_len = Assembler::AVX_256bit; |
1892 | vphaddw(vtmp2, src2, src2, vector_len); |
1893 | vpermq(vtmp2, vtmp2, 0xD8, vector_len); |
1894 | } else { |
1895 | vextracti128_high(vtmp2, src2); |
1896 | reduce_operation_128(T_SHORT, opcode, vtmp2, src2); |
1897 | } |
1898 | reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1899 | } |
1900 | |
1901 | void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1902 | int vector_len = Assembler::AVX_256bit; |
1903 | vextracti64x4_high(vtmp1, src2); |
1904 | reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2); |
1905 | reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1906 | } |
1907 | |
1908 | #ifdef _LP641 |
1909 | void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1910 | pshufd(vtmp2, src2, 0xE); |
1911 | reduce_operation_128(T_LONG, opcode, vtmp2, src2); |
1912 | movdq(vtmp1, src1); |
1913 | reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2); |
1914 | movdq(dst, vtmp1); |
1915 | } |
1916 | |
1917 | void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1918 | vextracti128_high(vtmp1, src2); |
1919 | reduce_operation_128(T_LONG, opcode, vtmp1, src2); |
1920 | reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2); |
1921 | } |
1922 | |
1923 | void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { |
1924 | vextracti64x4_high(vtmp2, src2); |
1925 | reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2); |
1926 | reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2); |
1927 | } |
1928 | |
1929 | void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) { |
1930 | assert(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64, "invalid")do { if (!(ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 1930, "assert(" "ArrayOperationPartialInlineSize > 0 && ArrayOperationPartialInlineSize <= 64" ") failed", "invalid"); ::breakpoint(); } } while (0); |
1931 | mov64(temp, -1L); |
1932 | bzhiq(temp, temp, len); |
1933 | kmovql(dst, temp); |
1934 | } |
1935 | #endif // _LP64 |
1936 | |
1937 | void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { |
1938 | reduce_operation_128(T_FLOAT, opcode, dst, src); |
1939 | pshufd(vtmp, src, 0x1); |
1940 | reduce_operation_128(T_FLOAT, opcode, dst, vtmp); |
1941 | } |
1942 | |
1943 | void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { |
1944 | reduce2F(opcode, dst, src, vtmp); |
1945 | pshufd(vtmp, src, 0x2); |
1946 | reduce_operation_128(T_FLOAT, opcode, dst, vtmp); |
1947 | pshufd(vtmp, src, 0x3); |
1948 | reduce_operation_128(T_FLOAT, opcode, dst, vtmp); |
1949 | } |
1950 | |
1951 | void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1952 | reduce4F(opcode, dst, src, vtmp2); |
1953 | vextractf128_high(vtmp2, src); |
1954 | reduce4F(opcode, dst, vtmp2, vtmp1); |
1955 | } |
1956 | |
1957 | void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1958 | reduce8F(opcode, dst, src, vtmp1, vtmp2); |
1959 | vextracti64x4_high(vtmp1, src); |
1960 | reduce8F(opcode, dst, vtmp1, vtmp1, vtmp2); |
1961 | } |
1962 | |
1963 | void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { |
1964 | reduce_operation_128(T_DOUBLE, opcode, dst, src); |
1965 | pshufd(vtmp, src, 0xE); |
1966 | reduce_operation_128(T_DOUBLE, opcode, dst, vtmp); |
1967 | } |
1968 | |
1969 | void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1970 | reduce2D(opcode, dst, src, vtmp2); |
1971 | vextractf128_high(vtmp2, src); |
1972 | reduce2D(opcode, dst, vtmp2, vtmp1); |
1973 | } |
1974 | |
1975 | void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { |
1976 | reduce4D(opcode, dst, src, vtmp1, vtmp2); |
1977 | vextracti64x4_high(vtmp1, src); |
1978 | reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2); |
1979 | } |
1980 | |
1981 | void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { |
1982 | MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); |
1983 | } |
1984 | |
1985 | void C2_MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { |
1986 | MacroAssembler::evmovdqu(type, kmask, dst, src, vector_len); |
1987 | } |
1988 | |
1989 | |
1990 | void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, |
1991 | XMMRegister dst, XMMRegister src, |
1992 | XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, |
1993 | XMMRegister xmm_0, XMMRegister xmm_1) { |
1994 | int permconst[] = {1, 14}; |
1995 | XMMRegister wsrc = src; |
1996 | XMMRegister wdst = xmm_0; |
1997 | XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1; |
1998 | |
1999 | int vlen_enc = Assembler::AVX_128bit; |
2000 | if (vlen == 16) { |
2001 | vlen_enc = Assembler::AVX_256bit; |
2002 | } |
2003 | |
2004 | for (int i = log2(vlen) - 1; i >=0; i--) { |
2005 | if (i == 0 && !is_dst_valid) { |
2006 | wdst = dst; |
2007 | } |
2008 | if (i == 3) { |
2009 | vextracti64x4_high(wtmp, wsrc); |
2010 | } else if (i == 2) { |
2011 | vextracti128_high(wtmp, wsrc); |
2012 | } else { // i = [0,1] |
2013 | vpermilps(wtmp, wsrc, permconst[i], vlen_enc); |
2014 | } |
2015 | vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); |
2016 | wsrc = wdst; |
2017 | vlen_enc = Assembler::AVX_128bit; |
2018 | } |
2019 | if (is_dst_valid) { |
2020 | vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); |
2021 | } |
2022 | } |
2023 | |
2024 | void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src, |
2025 | XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, |
2026 | XMMRegister xmm_0, XMMRegister xmm_1) { |
2027 | XMMRegister wsrc = src; |
2028 | XMMRegister wdst = xmm_0; |
2029 | XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1; |
2030 | int vlen_enc = Assembler::AVX_128bit; |
2031 | if (vlen == 8) { |
2032 | vlen_enc = Assembler::AVX_256bit; |
2033 | } |
2034 | for (int i = log2(vlen) - 1; i >=0; i--) { |
2035 | if (i == 0 && !is_dst_valid) { |
2036 | wdst = dst; |
2037 | } |
2038 | if (i == 1) { |
2039 | vextracti128_high(wtmp, wsrc); |
2040 | } else if (i == 2) { |
2041 | vextracti64x4_high(wtmp, wsrc); |
2042 | } else { |
2043 | assert(i == 0, "%d", i)do { if (!(i == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2043, "assert(" "i == 0" ") failed", "%d", i); ::breakpoint (); } } while (0); |
2044 | vpermilpd(wtmp, wsrc, 1, vlen_enc); |
2045 | } |
2046 | vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); |
2047 | wsrc = wdst; |
2048 | vlen_enc = Assembler::AVX_128bit; |
2049 | } |
2050 | if (is_dst_valid) { |
2051 | vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); |
2052 | } |
2053 | } |
2054 | |
2055 | void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) { |
2056 | switch (bt) { |
2057 | case T_BYTE: pextrb(dst, src, idx); break; |
2058 | case T_SHORT: pextrw(dst, src, idx); break; |
2059 | case T_INT: pextrd(dst, src, idx); break; |
2060 | case T_LONG: pextrq(dst, src, idx); break; |
2061 | |
2062 | default: |
2063 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2063, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
2064 | break; |
2065 | } |
2066 | } |
2067 | |
2068 | XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) { |
2069 | int esize = type2aelembytes(typ); |
2070 | int elem_per_lane = 16/esize; |
2071 | int lane = elemindex / elem_per_lane; |
2072 | int eindex = elemindex % elem_per_lane; |
Value stored to 'eindex' during its initialization is never read | |
2073 | |
2074 | if (lane >= 2) { |
2075 | assert(UseAVX > 2, "required")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2075, "assert(" "UseAVX > 2" ") failed", "required"); :: breakpoint(); } } while (0); |
2076 | vextractf32x4(dst, src, lane & 3); |
2077 | return dst; |
2078 | } else if (lane > 0) { |
2079 | assert(UseAVX > 0, "required")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2079, "assert(" "UseAVX > 0" ") failed", "required"); :: breakpoint(); } } while (0); |
2080 | vextractf128(dst, src, lane); |
2081 | return dst; |
2082 | } else { |
2083 | return src; |
2084 | } |
2085 | } |
2086 | |
2087 | void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) { |
2088 | int esize = type2aelembytes(typ); |
2089 | int elem_per_lane = 16/esize; |
2090 | int eindex = elemindex % elem_per_lane; |
2091 | assert(is_integral_type(typ),"required")do { if (!(is_integral_type(typ))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2091, "assert(" "is_integral_type(typ)" ") failed", "required" ); ::breakpoint(); } } while (0); |
2092 | |
2093 | if (eindex == 0) { |
2094 | if (typ == T_LONG) { |
2095 | movq(dst, src); |
2096 | } else { |
2097 | movdl(dst, src); |
2098 | if (typ == T_BYTE) |
2099 | movsbl(dst, dst); |
2100 | else if (typ == T_SHORT) |
2101 | movswl(dst, dst); |
2102 | } |
2103 | } else { |
2104 | extract(typ, dst, src, eindex); |
2105 | } |
2106 | } |
2107 | |
2108 | void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) { |
2109 | int esize = type2aelembytes(typ); |
2110 | int elem_per_lane = 16/esize; |
2111 | int eindex = elemindex % elem_per_lane; |
2112 | assert((typ == T_FLOAT || typ == T_DOUBLE),"required")do { if (!((typ == T_FLOAT || typ == T_DOUBLE))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2112, "assert(" "(typ == T_FLOAT || typ == T_DOUBLE)" ") failed" , "required"); ::breakpoint(); } } while (0); |
2113 | |
2114 | if (eindex == 0) { |
2115 | movq(dst, src); |
2116 | } else { |
2117 | if (typ == T_FLOAT) { |
2118 | if (UseAVX == 0) { |
2119 | movdqu(dst, src); |
2120 | pshufps(dst, dst, eindex); |
2121 | } else { |
2122 | vpshufps(dst, src, src, eindex, Assembler::AVX_128bit); |
2123 | } |
2124 | } else { |
2125 | if (UseAVX == 0) { |
2126 | movdqu(dst, src); |
2127 | psrldq(dst, eindex*esize); |
2128 | } else { |
2129 | vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit); |
2130 | } |
2131 | movq(dst, dst); |
2132 | } |
2133 | } |
2134 | // Zero upper bits |
2135 | if (typ == T_FLOAT) { |
2136 | if (UseAVX == 0) { |
2137 | assert((vtmp != xnoreg) && (tmp != noreg), "required.")do { if (!((vtmp != xnoreg) && (tmp != noreg))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2137, "assert(" "(vtmp != xnoreg) && (tmp != noreg)" ") failed", "required."); ::breakpoint(); } } while (0); |
2138 | movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp); |
2139 | pand(dst, vtmp); |
2140 | } else { |
2141 | assert((tmp != noreg), "required.")do { if (!((tmp != noreg))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2141, "assert(" "(tmp != noreg)" ") failed", "required."); :: breakpoint(); } } while (0); |
2142 | vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp); |
2143 | } |
2144 | } |
2145 | } |
2146 | |
2147 | void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len) { |
2148 | switch(typ) { |
2149 | case T_BYTE: |
2150 | case T_BOOLEAN: |
2151 | evpcmpb(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len); |
2152 | break; |
2153 | case T_SHORT: |
2154 | case T_CHAR: |
2155 | evpcmpw(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len); |
2156 | break; |
2157 | case T_INT: |
2158 | case T_FLOAT: |
2159 | evpcmpd(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len); |
2160 | break; |
2161 | case T_LONG: |
2162 | case T_DOUBLE: |
2163 | evpcmpq(kdmask, ksmask, src1, src2, comparison, /*signed*/ true, vector_len); |
2164 | break; |
2165 | default: |
2166 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2166, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
2167 | break; |
2168 | } |
2169 | } |
2170 | |
2171 | void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) { |
2172 | switch(typ) { |
2173 | case T_BOOLEAN: |
2174 | case T_BYTE: |
2175 | evpcmpb(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); |
2176 | break; |
2177 | case T_CHAR: |
2178 | case T_SHORT: |
2179 | evpcmpw(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); |
2180 | break; |
2181 | case T_INT: |
2182 | case T_FLOAT: |
2183 | evpcmpd(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); |
2184 | break; |
2185 | case T_LONG: |
2186 | case T_DOUBLE: |
2187 | evpcmpq(kdmask, ksmask, src1, adr, comparison, /*signed*/ true, vector_len, scratch); |
2188 | break; |
2189 | default: |
2190 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2190, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
2191 | break; |
2192 | } |
2193 | } |
2194 | |
2195 | void C2_MacroAssembler::vpcmpu(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, |
2196 | int vlen_in_bytes, XMMRegister vtmp1, XMMRegister vtmp2, Register scratch) { |
2197 | int vlen_enc = vector_length_encoding(vlen_in_bytes*2); |
2198 | switch (typ) { |
2199 | case T_BYTE: |
2200 | vpmovzxbw(vtmp1, src1, vlen_enc); |
2201 | vpmovzxbw(vtmp2, src2, vlen_enc); |
2202 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch); |
2203 | vpacksswb(dst, dst, dst, vlen_enc); |
2204 | break; |
2205 | case T_SHORT: |
2206 | vpmovzxwd(vtmp1, src1, vlen_enc); |
2207 | vpmovzxwd(vtmp2, src2, vlen_enc); |
2208 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch); |
2209 | vpackssdw(dst, dst, dst, vlen_enc); |
2210 | break; |
2211 | case T_INT: |
2212 | vpmovzxdq(vtmp1, src1, vlen_enc); |
2213 | vpmovzxdq(vtmp2, src2, vlen_enc); |
2214 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch); |
2215 | vpermilps(dst, dst, 8, vlen_enc); |
2216 | break; |
2217 | default: |
2218 | assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2218, "assert(" "false" ") failed", "Should not reach here" ); ::breakpoint(); } } while (0); |
2219 | } |
2220 | if (vlen_in_bytes == 16) { |
2221 | vpermpd(dst, dst, 0x8, vlen_enc); |
2222 | } |
2223 | } |
2224 | |
2225 | void C2_MacroAssembler::vpcmpu32(BasicType typ, XMMRegister dst, XMMRegister src1, XMMRegister src2, ComparisonPredicate comparison, int vlen_in_bytes, |
2226 | XMMRegister vtmp1, XMMRegister vtmp2, XMMRegister vtmp3, Register scratch) { |
2227 | int vlen_enc = vector_length_encoding(vlen_in_bytes); |
2228 | switch (typ) { |
2229 | case T_BYTE: |
2230 | vpmovzxbw(vtmp1, src1, vlen_enc); |
2231 | vpmovzxbw(vtmp2, src2, vlen_enc); |
2232 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch); |
2233 | vextracti128(vtmp1, src1, 1); |
2234 | vextracti128(vtmp2, src2, 1); |
2235 | vpmovzxbw(vtmp1, vtmp1, vlen_enc); |
2236 | vpmovzxbw(vtmp2, vtmp2, vlen_enc); |
2237 | vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::W, vlen_enc, scratch); |
2238 | vpacksswb(dst, dst, vtmp3, vlen_enc); |
2239 | vpermpd(dst, dst, 0xd8, vlen_enc); |
2240 | break; |
2241 | case T_SHORT: |
2242 | vpmovzxwd(vtmp1, src1, vlen_enc); |
2243 | vpmovzxwd(vtmp2, src2, vlen_enc); |
2244 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch); |
2245 | vextracti128(vtmp1, src1, 1); |
2246 | vextracti128(vtmp2, src2, 1); |
2247 | vpmovzxwd(vtmp1, vtmp1, vlen_enc); |
2248 | vpmovzxwd(vtmp2, vtmp2, vlen_enc); |
2249 | vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::D, vlen_enc, scratch); |
2250 | vpackssdw(dst, dst, vtmp3, vlen_enc); |
2251 | vpermpd(dst, dst, 0xd8, vlen_enc); |
2252 | break; |
2253 | case T_INT: |
2254 | vpmovzxdq(vtmp1, src1, vlen_enc); |
2255 | vpmovzxdq(vtmp2, src2, vlen_enc); |
2256 | vpcmpCCW(dst, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch); |
2257 | vpshufd(dst, dst, 8, vlen_enc); |
2258 | vpermq(dst, dst, 8, vlen_enc); |
2259 | vextracti128(vtmp1, src1, 1); |
2260 | vextracti128(vtmp2, src2, 1); |
2261 | vpmovzxdq(vtmp1, vtmp1, vlen_enc); |
2262 | vpmovzxdq(vtmp2, vtmp2, vlen_enc); |
2263 | vpcmpCCW(vtmp3, vtmp1, vtmp2, comparison, Assembler::Q, vlen_enc, scratch); |
2264 | vpshufd(vtmp3, vtmp3, 8, vlen_enc); |
2265 | vpermq(vtmp3, vtmp3, 0x80, vlen_enc); |
2266 | vpblendd(dst, dst, vtmp3, 0xf0, vlen_enc); |
2267 | break; |
2268 | default: |
2269 | assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2269, "assert(" "false" ") failed", "Should not reach here" ); ::breakpoint(); } } while (0); |
2270 | } |
2271 | } |
2272 | |
2273 | void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) { |
2274 | switch(typ) { |
2275 | case T_BYTE: |
2276 | evpblendmb(dst, kmask, src1, src2, merge, vector_len); |
2277 | break; |
2278 | case T_SHORT: |
2279 | evpblendmw(dst, kmask, src1, src2, merge, vector_len); |
2280 | break; |
2281 | case T_INT: |
2282 | case T_FLOAT: |
2283 | evpblendmd(dst, kmask, src1, src2, merge, vector_len); |
2284 | break; |
2285 | case T_LONG: |
2286 | case T_DOUBLE: |
2287 | evpblendmq(dst, kmask, src1, src2, merge, vector_len); |
2288 | break; |
2289 | default: |
2290 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2290, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
2291 | break; |
2292 | } |
2293 | } |
2294 | |
2295 | void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, |
2296 | XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) { |
2297 | switch(vlen) { |
2298 | case 4: |
2299 | assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2299, "assert(" "vtmp1 != xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2300 | // Broadcast lower 32 bits to 128 bits before ptest |
2301 | pshufd(vtmp1, src1, 0x0); |
2302 | if (bt == BoolTest::overflow) { |
2303 | assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2303, "assert(" "vtmp2 != xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2304 | pshufd(vtmp2, src2, 0x0); |
2305 | } else { |
2306 | assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2306, "assert(" "vtmp2 == xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2307 | vtmp2 = src2; |
2308 | } |
2309 | ptest(vtmp1, vtmp2); |
2310 | break; |
2311 | case 8: |
2312 | assert(vtmp1 != xnoreg, "required.")do { if (!(vtmp1 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2312, "assert(" "vtmp1 != xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2313 | // Broadcast lower 64 bits to 128 bits before ptest |
2314 | pshufd(vtmp1, src1, 0x4); |
2315 | if (bt == BoolTest::overflow) { |
2316 | assert(vtmp2 != xnoreg, "required.")do { if (!(vtmp2 != xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2316, "assert(" "vtmp2 != xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2317 | pshufd(vtmp2, src2, 0x4); |
2318 | } else { |
2319 | assert(vtmp2 == xnoreg, "required.")do { if (!(vtmp2 == xnoreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2319, "assert(" "vtmp2 == xnoreg" ") failed", "required."); ::breakpoint(); } } while (0); |
2320 | vtmp2 = src2; |
2321 | } |
2322 | ptest(vtmp1, vtmp2); |
2323 | break; |
2324 | case 16: |
2325 | assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2325, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)" ") failed", "required."); ::breakpoint(); } } while (0); |
2326 | ptest(src1, src2); |
2327 | break; |
2328 | case 32: |
2329 | assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2329, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)" ") failed", "required."); ::breakpoint(); } } while (0); |
2330 | vptest(src1, src2, Assembler::AVX_256bit); |
2331 | break; |
2332 | case 64: |
2333 | { |
2334 | assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.")do { if (!((vtmp1 == xnoreg) && (vtmp2 == xnoreg))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2334, "assert(" "(vtmp1 == xnoreg) && (vtmp2 == xnoreg)" ") failed", "required."); ::breakpoint(); } } while (0); |
2335 | evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit); |
2336 | if (bt == BoolTest::ne) { |
2337 | ktestql(mask, mask); |
2338 | } else { |
2339 | assert(bt == BoolTest::overflow, "required")do { if (!(bt == BoolTest::overflow)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2339, "assert(" "bt == BoolTest::overflow" ") failed", "required" ); ::breakpoint(); } } while (0); |
2340 | kortestql(mask, mask); |
2341 | } |
2342 | } |
2343 | break; |
2344 | default: |
2345 | assert(false,"Should not reach here.")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2345, "assert(" "false" ") failed", "Should not reach here." ); ::breakpoint(); } } while (0); |
2346 | break; |
2347 | } |
2348 | } |
2349 | |
2350 | //------------------------------------------------------------------------------------------- |
2351 | |
2352 | // IndexOf for constant substrings with size >= 8 chars |
2353 | // which don't need to be loaded through stack. |
2354 | void C2_MacroAssembler::string_indexofC8(Register str1, Register str2, |
2355 | Register cnt1, Register cnt2, |
2356 | int int_cnt2, Register result, |
2357 | XMMRegister vec, Register tmp, |
2358 | int ae) { |
2359 | ShortBranchVerifier sbv(this); |
2360 | assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2360, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required" ); ::breakpoint(); } } while (0); |
2361 | assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2361, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding" ); ::breakpoint(); } } while (0); |
2362 | |
2363 | // This method uses the pcmpestri instruction with bound registers |
2364 | // inputs: |
2365 | // xmm - substring |
2366 | // rax - substring length (elements count) |
2367 | // mem - scanned string |
2368 | // rdx - string length (elements count) |
2369 | // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
2370 | // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes) |
2371 | // outputs: |
2372 | // rcx - matched index in string |
2373 | assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp == rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2373, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx" ") failed", "pcmpestri"); ::breakpoint(); } } while (0); |
2374 | int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts |
2375 | int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8 |
2376 | Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2; |
2377 | Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1; |
2378 | |
2379 | Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, |
2380 | RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, |
2381 | MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; |
2382 | |
2383 | // Note, inline_string_indexOf() generates checks: |
2384 | // if (substr.count > string.count) return -1; |
2385 | // if (substr.count == 0) return 0; |
2386 | assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars")do { if (!(int_cnt2 >= stride)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2386, "assert(" "int_cnt2 >= stride" ") failed", "this code is used only for cnt2 >= 8 chars" ); ::breakpoint(); } } while (0); |
2387 | |
2388 | // Load substring. |
2389 | if (ae == StrIntrinsicNode::UL) { |
2390 | pmovzxbw(vec, Address(str2, 0)); |
2391 | } else { |
2392 | movdqu(vec, Address(str2, 0)); |
2393 | } |
2394 | movl(cnt2, int_cnt2); |
2395 | movptr(result, str1); // string addr |
2396 | |
2397 | if (int_cnt2 > stride) { |
2398 | jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2398); |
2399 | |
2400 | // Reload substr for rescan, this code |
2401 | // is executed only for large substrings (> 8 chars) |
2402 | bind(RELOAD_SUBSTR); |
2403 | if (ae == StrIntrinsicNode::UL) { |
2404 | pmovzxbw(vec, Address(str2, 0)); |
2405 | } else { |
2406 | movdqu(vec, Address(str2, 0)); |
2407 | } |
2408 | negptr(cnt2); // Jumped here with negative cnt2, convert to positive |
2409 | |
2410 | bind(RELOAD_STR); |
2411 | // We came here after the beginning of the substring was |
2412 | // matched but the rest of it was not so we need to search |
2413 | // again. Start from the next element after the previous match. |
2414 | |
2415 | // cnt2 is number of substring reminding elements and |
2416 | // cnt1 is number of string reminding elements when cmp failed. |
2417 | // Restored cnt1 = cnt1 - cnt2 + int_cnt2 |
2418 | subl(cnt1, cnt2); |
2419 | addl(cnt1, int_cnt2); |
2420 | movl(cnt2, int_cnt2); // Now restore cnt2 |
2421 | |
2422 | decrementl(cnt1); // Shift to next element |
2423 | cmpl(cnt1, cnt2); |
2424 | jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
2425 | |
2426 | addptr(result, (1<<scale1)); |
2427 | |
2428 | } // (int_cnt2 > 8) |
2429 | |
2430 | // Scan string for start of substr in 16-byte vectors |
2431 | bind(SCAN_TO_SUBSTR); |
2432 | pcmpestri(vec, Address(result, 0), mode); |
2433 | jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2433); // CF == 1 |
2434 | subl(cnt1, stride); |
2435 | jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2435); // Scanned full string |
2436 | cmpl(cnt1, cnt2); |
2437 | jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2437); // Left less then substring |
2438 | addptr(result, 16); |
2439 | jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2439); |
2440 | |
2441 | // Found a potential substr |
2442 | bind(FOUND_CANDIDATE); |
2443 | // Matched whole vector if first element matched (tmp(rcx) == 0). |
2444 | if (int_cnt2 == stride) { |
2445 | jccb(Assembler::overflow, RET_FOUND)jccb_0(Assembler::overflow, RET_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2445); // OF == 1 |
2446 | } else { // int_cnt2 > 8 |
2447 | jccb(Assembler::overflow, FOUND_SUBSTR)jccb_0(Assembler::overflow, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2447); |
2448 | } |
2449 | // After pcmpestri tmp(rcx) contains matched element index |
2450 | // Compute start addr of substr |
2451 | lea(result, Address(result, tmp, scale1)); |
2452 | |
2453 | // Make sure string is still long enough |
2454 | subl(cnt1, tmp); |
2455 | cmpl(cnt1, cnt2); |
2456 | if (int_cnt2 == stride) { |
2457 | jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2457); |
2458 | } else { // int_cnt2 > 8 |
2459 | jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD)jccb_0(Assembler::greaterEqual, MATCH_SUBSTR_HEAD, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2459); |
2460 | } |
2461 | // Left less then substring. |
2462 | |
2463 | bind(RET_NOT_FOUND); |
2464 | movl(result, -1); |
2465 | jmp(EXIT); |
2466 | |
2467 | if (int_cnt2 > stride) { |
2468 | // This code is optimized for the case when whole substring |
2469 | // is matched if its head is matched. |
2470 | bind(MATCH_SUBSTR_HEAD); |
2471 | pcmpestri(vec, Address(result, 0), mode); |
2472 | // Reload only string if does not match |
2473 | jcc(Assembler::noOverflow, RELOAD_STR); // OF == 0 |
2474 | |
2475 | Label CONT_SCAN_SUBSTR; |
2476 | // Compare the rest of substring (> 8 chars). |
2477 | bind(FOUND_SUBSTR); |
2478 | // First 8 chars are already matched. |
2479 | negptr(cnt2); |
2480 | addptr(cnt2, stride); |
2481 | |
2482 | bind(SCAN_SUBSTR); |
2483 | subl(cnt1, stride); |
2484 | cmpl(cnt2, -stride); // Do not read beyond substring |
2485 | jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::lessEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2485); |
2486 | // Back-up strings to avoid reading beyond substring: |
2487 | // cnt1 = cnt1 - cnt2 + 8 |
2488 | addl(cnt1, cnt2); // cnt2 is negative |
2489 | addl(cnt1, stride); |
2490 | movl(cnt2, stride); negptr(cnt2); |
2491 | bind(CONT_SCAN_SUBSTR); |
2492 | if (int_cnt2 < (int)G) { |
2493 | int tail_off1 = int_cnt2<<scale1; |
2494 | int tail_off2 = int_cnt2<<scale2; |
2495 | if (ae == StrIntrinsicNode::UL) { |
2496 | pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2)); |
2497 | } else { |
2498 | movdqu(vec, Address(str2, cnt2, scale2, tail_off2)); |
2499 | } |
2500 | pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode); |
2501 | } else { |
2502 | // calculate index in register to avoid integer overflow (int_cnt2*2) |
2503 | movl(tmp, int_cnt2); |
2504 | addptr(tmp, cnt2); |
2505 | if (ae == StrIntrinsicNode::UL) { |
2506 | pmovzxbw(vec, Address(str2, tmp, scale2, 0)); |
2507 | } else { |
2508 | movdqu(vec, Address(str2, tmp, scale2, 0)); |
2509 | } |
2510 | pcmpestri(vec, Address(result, tmp, scale1, 0), mode); |
2511 | } |
2512 | // Need to reload strings pointers if not matched whole vector |
2513 | jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
2514 | addptr(cnt2, stride); |
2515 | jcc(Assembler::negative, SCAN_SUBSTR); |
2516 | // Fall through if found full substring |
2517 | |
2518 | } // (int_cnt2 > 8) |
2519 | |
2520 | bind(RET_FOUND); |
2521 | // Found result if we matched full small substring. |
2522 | // Compute substr offset |
2523 | subptr(result, str1); |
2524 | if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
2525 | shrl(result, 1); // index |
2526 | } |
2527 | bind(EXIT); |
2528 | |
2529 | } // string_indexofC8 |
2530 | |
2531 | // Small strings are loaded through stack if they cross page boundary. |
2532 | void C2_MacroAssembler::string_indexof(Register str1, Register str2, |
2533 | Register cnt1, Register cnt2, |
2534 | int int_cnt2, Register result, |
2535 | XMMRegister vec, Register tmp, |
2536 | int ae) { |
2537 | ShortBranchVerifier sbv(this); |
2538 | assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2538, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required" ); ::breakpoint(); } } while (0); |
2539 | assert(ae != StrIntrinsicNode::LU, "Invalid encoding")do { if (!(ae != StrIntrinsicNode::LU)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2539, "assert(" "ae != StrIntrinsicNode::LU" ") failed", "Invalid encoding" ); ::breakpoint(); } } while (0); |
2540 | |
2541 | // |
2542 | // int_cnt2 is length of small (< 8 chars) constant substring |
2543 | // or (-1) for non constant substring in which case its length |
2544 | // is in cnt2 register. |
2545 | // |
2546 | // Note, inline_string_indexOf() generates checks: |
2547 | // if (substr.count > string.count) return -1; |
2548 | // if (substr.count == 0) return 0; |
2549 | // |
2550 | int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8 |
2551 | assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0")do { if (!(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride))) { (*g_assert_poison) = 'X';; report_vm_error( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2551, "assert(" "int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride)" ") failed", "should be != 0"); ::breakpoint(); } } while (0); |
2552 | // This method uses the pcmpestri instruction with bound registers |
2553 | // inputs: |
2554 | // xmm - substring |
2555 | // rax - substring length (elements count) |
2556 | // mem - scanned string |
2557 | // rdx - string length (elements count) |
2558 | // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) |
2559 | // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes) |
2560 | // outputs: |
2561 | // rcx - matched index in string |
2562 | assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp == rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2562, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx" ") failed", "pcmpestri"); ::breakpoint(); } } while (0); |
2563 | int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts |
2564 | Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2; |
2565 | Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1; |
2566 | |
2567 | Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, |
2568 | RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, |
2569 | FOUND_CANDIDATE; |
2570 | |
2571 | { //======================================================== |
2572 | // We don't know where these strings are located |
2573 | // and we can't read beyond them. Load them through stack. |
2574 | Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; |
2575 | |
2576 | movptr(tmp, rsp); // save old SP |
2577 | |
2578 | if (int_cnt2 > 0) { // small (< 8 chars) constant substring |
2579 | if (int_cnt2 == (1>>scale2)) { // One byte |
2580 | assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding")do { if (!((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode ::UL))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2580, "assert(" "(ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL)" ") failed", "Only possible for latin1 encoding"); ::breakpoint (); } } while (0); |
2581 | load_unsigned_byte(result, Address(str2, 0)); |
2582 | movdl(vec, result); // move 32 bits |
2583 | } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes |
2584 | // Not enough header space in 32-bit VM: 12+3 = 15. |
2585 | movl(result, Address(str2, -1)); |
2586 | shrl(result, 8); |
2587 | movdl(vec, result); // move 32 bits |
2588 | } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char |
2589 | load_unsigned_short(result, Address(str2, 0)); |
2590 | movdl(vec, result); // move 32 bits |
2591 | } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars |
2592 | movdl(vec, Address(str2, 0)); // move 32 bits |
2593 | } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars |
2594 | movq(vec, Address(str2, 0)); // move 64 bits |
2595 | } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7}) |
2596 | // Array header size is 12 bytes in 32-bit VM |
2597 | // + 6 bytes for 3 chars == 18 bytes, |
2598 | // enough space to load vec and shift. |
2599 | assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity")do { if (!(HeapWordSize*TypeArrayKlass::header_size() >= 12 )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2599, "assert(" "HeapWordSize*TypeArrayKlass::header_size() >= 12" ") failed", "sanity"); ::breakpoint(); } } while (0); |
2600 | if (ae == StrIntrinsicNode::UL) { |
2601 | int tail_off = int_cnt2-8; |
2602 | pmovzxbw(vec, Address(str2, tail_off)); |
2603 | psrldq(vec, -2*tail_off); |
2604 | } |
2605 | else { |
2606 | int tail_off = int_cnt2*(1<<scale2); |
2607 | movdqu(vec, Address(str2, tail_off-16)); |
2608 | psrldq(vec, 16-tail_off); |
2609 | } |
2610 | } |
2611 | } else { // not constant substring |
2612 | cmpl(cnt2, stride); |
2613 | jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2613); // Both strings are big enough |
2614 | |
2615 | // We can read beyond string if srt+16 does not cross page boundary |
2616 | // since heaps are aligned and mapped by pages. |
2617 | assert(os::vm_page_size() < (int)G, "default page should be small")do { if (!(os::vm_page_size() < (int)G)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2617, "assert(" "os::vm_page_size() < (int)G" ") failed" , "default page should be small"); ::breakpoint(); } } while ( 0); |
2618 | movl(result, str2); // We need only low 32 bits |
2619 | andl(result, (os::vm_page_size()-1)); |
2620 | cmpl(result, (os::vm_page_size()-16)); |
2621 | jccb(Assembler::belowEqual, CHECK_STR)jccb_0(Assembler::belowEqual, CHECK_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2621); |
2622 | |
2623 | // Move small strings to stack to allow load 16 bytes into vec. |
2624 | subptr(rsp, 16); |
2625 | int stk_offset = wordSize-(1<<scale2); |
2626 | push(cnt2); |
2627 | |
2628 | bind(COPY_SUBSTR); |
2629 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) { |
2630 | load_unsigned_byte(result, Address(str2, cnt2, scale2, -1)); |
2631 | movb(Address(rsp, cnt2, scale2, stk_offset), result); |
2632 | } else if (ae == StrIntrinsicNode::UU) { |
2633 | load_unsigned_short(result, Address(str2, cnt2, scale2, -2)); |
2634 | movw(Address(rsp, cnt2, scale2, stk_offset), result); |
2635 | } |
2636 | decrement(cnt2); |
2637 | jccb(Assembler::notZero, COPY_SUBSTR)jccb_0(Assembler::notZero, COPY_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2637); |
2638 | |
2639 | pop(cnt2); |
2640 | movptr(str2, rsp); // New substring address |
2641 | } // non constant |
2642 | |
2643 | bind(CHECK_STR); |
2644 | cmpl(cnt1, stride); |
2645 | jccb(Assembler::aboveEqual, BIG_STRINGS)jccb_0(Assembler::aboveEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2645); |
2646 | |
2647 | // Check cross page boundary. |
2648 | movl(result, str1); // We need only low 32 bits |
2649 | andl(result, (os::vm_page_size()-1)); |
2650 | cmpl(result, (os::vm_page_size()-16)); |
2651 | jccb(Assembler::belowEqual, BIG_STRINGS)jccb_0(Assembler::belowEqual, BIG_STRINGS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2651); |
2652 | |
2653 | subptr(rsp, 16); |
2654 | int stk_offset = -(1<<scale1); |
2655 | if (int_cnt2 < 0) { // not constant |
2656 | push(cnt2); |
2657 | stk_offset += wordSize; |
2658 | } |
2659 | movl(cnt2, cnt1); |
2660 | |
2661 | bind(COPY_STR); |
2662 | if (ae == StrIntrinsicNode::LL) { |
2663 | load_unsigned_byte(result, Address(str1, cnt2, scale1, -1)); |
2664 | movb(Address(rsp, cnt2, scale1, stk_offset), result); |
2665 | } else { |
2666 | load_unsigned_short(result, Address(str1, cnt2, scale1, -2)); |
2667 | movw(Address(rsp, cnt2, scale1, stk_offset), result); |
2668 | } |
2669 | decrement(cnt2); |
2670 | jccb(Assembler::notZero, COPY_STR)jccb_0(Assembler::notZero, COPY_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2670); |
2671 | |
2672 | if (int_cnt2 < 0) { // not constant |
2673 | pop(cnt2); |
2674 | } |
2675 | movptr(str1, rsp); // New string address |
2676 | |
2677 | bind(BIG_STRINGS); |
2678 | // Load substring. |
2679 | if (int_cnt2 < 0) { // -1 |
2680 | if (ae == StrIntrinsicNode::UL) { |
2681 | pmovzxbw(vec, Address(str2, 0)); |
2682 | } else { |
2683 | movdqu(vec, Address(str2, 0)); |
2684 | } |
2685 | push(cnt2); // substr count |
2686 | push(str2); // substr addr |
2687 | push(str1); // string addr |
2688 | } else { |
2689 | // Small (< 8 chars) constant substrings are loaded already. |
2690 | movl(cnt2, int_cnt2); |
2691 | } |
2692 | push(tmp); // original SP |
2693 | |
2694 | } // Finished loading |
2695 | |
2696 | //======================================================== |
2697 | // Start search |
2698 | // |
2699 | |
2700 | movptr(result, str1); // string addr |
2701 | |
2702 | if (int_cnt2 < 0) { // Only for non constant substring |
2703 | jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2703); |
2704 | |
2705 | // SP saved at sp+0 |
2706 | // String saved at sp+1*wordSize |
2707 | // Substr saved at sp+2*wordSize |
2708 | // Substr count saved at sp+3*wordSize |
2709 | |
2710 | // Reload substr for rescan, this code |
2711 | // is executed only for large substrings (> 8 chars) |
2712 | bind(RELOAD_SUBSTR); |
2713 | movptr(str2, Address(rsp, 2*wordSize)); |
2714 | movl(cnt2, Address(rsp, 3*wordSize)); |
2715 | if (ae == StrIntrinsicNode::UL) { |
2716 | pmovzxbw(vec, Address(str2, 0)); |
2717 | } else { |
2718 | movdqu(vec, Address(str2, 0)); |
2719 | } |
2720 | // We came here after the beginning of the substring was |
2721 | // matched but the rest of it was not so we need to search |
2722 | // again. Start from the next element after the previous match. |
2723 | subptr(str1, result); // Restore counter |
2724 | if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
2725 | shrl(str1, 1); |
2726 | } |
2727 | addl(cnt1, str1); |
2728 | decrementl(cnt1); // Shift to next element |
2729 | cmpl(cnt1, cnt2); |
2730 | jcc(Assembler::negative, RET_NOT_FOUND); // Left less then substring |
2731 | |
2732 | addptr(result, (1<<scale1)); |
2733 | } // non constant |
2734 | |
2735 | // Scan string for start of substr in 16-byte vectors |
2736 | bind(SCAN_TO_SUBSTR); |
2737 | assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri")do { if (!(cnt1 == rdx && cnt2 == rax && tmp == rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2737, "assert(" "cnt1 == rdx && cnt2 == rax && tmp == rcx" ") failed", "pcmpestri"); ::breakpoint(); } } while (0); |
2738 | pcmpestri(vec, Address(result, 0), mode); |
2739 | jccb(Assembler::below, FOUND_CANDIDATE)jccb_0(Assembler::below, FOUND_CANDIDATE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2739); // CF == 1 |
2740 | subl(cnt1, stride); |
2741 | jccb(Assembler::lessEqual, RET_NOT_FOUND)jccb_0(Assembler::lessEqual, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2741); // Scanned full string |
2742 | cmpl(cnt1, cnt2); |
2743 | jccb(Assembler::negative, RET_NOT_FOUND)jccb_0(Assembler::negative, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2743); // Left less then substring |
2744 | addptr(result, 16); |
2745 | |
2746 | bind(ADJUST_STR); |
2747 | cmpl(cnt1, stride); // Do not read beyond string |
2748 | jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR)jccb_0(Assembler::greaterEqual, SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2748); |
2749 | // Back-up string to avoid reading beyond string. |
2750 | lea(result, Address(result, cnt1, scale1, -16)); |
2751 | movl(cnt1, stride); |
2752 | jmpb(SCAN_TO_SUBSTR)jmpb_0(SCAN_TO_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2752); |
2753 | |
2754 | // Found a potential substr |
2755 | bind(FOUND_CANDIDATE); |
2756 | // After pcmpestri tmp(rcx) contains matched element index |
2757 | |
2758 | // Make sure string is still long enough |
2759 | subl(cnt1, tmp); |
2760 | cmpl(cnt1, cnt2); |
2761 | jccb(Assembler::greaterEqual, FOUND_SUBSTR)jccb_0(Assembler::greaterEqual, FOUND_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2761); |
2762 | // Left less then substring. |
2763 | |
2764 | bind(RET_NOT_FOUND); |
2765 | movl(result, -1); |
2766 | jmp(CLEANUP); |
2767 | |
2768 | bind(FOUND_SUBSTR); |
2769 | // Compute start addr of substr |
2770 | lea(result, Address(result, tmp, scale1)); |
2771 | if (int_cnt2 > 0) { // Constant substring |
2772 | // Repeat search for small substring (< 8 chars) |
2773 | // from new point without reloading substring. |
2774 | // Have to check that we don't read beyond string. |
2775 | cmpl(tmp, stride-int_cnt2); |
2776 | jccb(Assembler::greater, ADJUST_STR)jccb_0(Assembler::greater, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2776); |
2777 | // Fall through if matched whole substring. |
2778 | } else { // non constant |
2779 | assert(int_cnt2 == -1, "should be != 0")do { if (!(int_cnt2 == -1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2779, "assert(" "int_cnt2 == -1" ") failed", "should be != 0" ); ::breakpoint(); } } while (0); |
2780 | |
2781 | addl(tmp, cnt2); |
2782 | // Found result if we matched whole substring. |
2783 | cmpl(tmp, stride); |
2784 | jcc(Assembler::lessEqual, RET_FOUND); |
2785 | |
2786 | // Repeat search for small substring (<= 8 chars) |
2787 | // from new point 'str1' without reloading substring. |
2788 | cmpl(cnt2, stride); |
2789 | // Have to check that we don't read beyond string. |
2790 | jccb(Assembler::lessEqual, ADJUST_STR)jccb_0(Assembler::lessEqual, ADJUST_STR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2790); |
2791 | |
2792 | Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; |
2793 | // Compare the rest of substring (> 8 chars). |
2794 | movptr(str1, result); |
2795 | |
2796 | cmpl(tmp, cnt2); |
2797 | // First 8 chars are already matched. |
2798 | jccb(Assembler::equal, CHECK_NEXT)jccb_0(Assembler::equal, CHECK_NEXT, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2798); |
2799 | |
2800 | bind(SCAN_SUBSTR); |
2801 | pcmpestri(vec, Address(str1, 0), mode); |
2802 | // Need to reload strings pointers if not matched whole vector |
2803 | jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 |
2804 | |
2805 | bind(CHECK_NEXT); |
2806 | subl(cnt2, stride); |
2807 | jccb(Assembler::lessEqual, RET_FOUND_LONG)jccb_0(Assembler::lessEqual, RET_FOUND_LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2807); // Found full substring |
2808 | addptr(str1, 16); |
2809 | if (ae == StrIntrinsicNode::UL) { |
2810 | addptr(str2, 8); |
2811 | } else { |
2812 | addptr(str2, 16); |
2813 | } |
2814 | subl(cnt1, stride); |
2815 | cmpl(cnt2, stride); // Do not read beyond substring |
2816 | jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR)jccb_0(Assembler::greaterEqual, CONT_SCAN_SUBSTR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2816); |
2817 | // Back-up strings to avoid reading beyond substring. |
2818 | |
2819 | if (ae == StrIntrinsicNode::UL) { |
2820 | lea(str2, Address(str2, cnt2, scale2, -8)); |
2821 | lea(str1, Address(str1, cnt2, scale1, -16)); |
2822 | } else { |
2823 | lea(str2, Address(str2, cnt2, scale2, -16)); |
2824 | lea(str1, Address(str1, cnt2, scale1, -16)); |
2825 | } |
2826 | subl(cnt1, cnt2); |
2827 | movl(cnt2, stride); |
2828 | addl(cnt1, stride); |
2829 | bind(CONT_SCAN_SUBSTR); |
2830 | if (ae == StrIntrinsicNode::UL) { |
2831 | pmovzxbw(vec, Address(str2, 0)); |
2832 | } else { |
2833 | movdqu(vec, Address(str2, 0)); |
2834 | } |
2835 | jmp(SCAN_SUBSTR); |
2836 | |
2837 | bind(RET_FOUND_LONG); |
2838 | movptr(str1, Address(rsp, wordSize)); |
2839 | } // non constant |
2840 | |
2841 | bind(RET_FOUND); |
2842 | // Compute substr offset |
2843 | subptr(result, str1); |
2844 | if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) { |
2845 | shrl(result, 1); // index |
2846 | } |
2847 | bind(CLEANUP); |
2848 | pop(rsp); // restore SP |
2849 | |
2850 | } // string_indexof |
2851 | |
2852 | void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
2853 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { |
2854 | ShortBranchVerifier sbv(this); |
2855 | assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2855, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required" ); ::breakpoint(); } } while (0); |
2856 | |
2857 | int stride = 8; |
2858 | |
2859 | Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP, |
2860 | SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP, |
2861 | RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT, |
2862 | FOUND_SEQ_CHAR, DONE_LABEL; |
2863 | |
2864 | movptr(result, str1); |
2865 | if (UseAVX >= 2) { |
2866 | cmpl(cnt1, stride); |
2867 | jcc(Assembler::less, SCAN_TO_CHAR); |
2868 | cmpl(cnt1, 2*stride); |
2869 | jcc(Assembler::less, SCAN_TO_8_CHAR_INIT); |
2870 | movdl(vec1, ch); |
2871 | vpbroadcastw(vec1, vec1, Assembler::AVX_256bit); |
2872 | vpxor(vec2, vec2); |
2873 | movl(tmp, cnt1); |
2874 | andl(tmp, 0xFFFFFFF0); //vector count (in chars) |
2875 | andl(cnt1,0x0000000F); //tail count (in chars) |
2876 | |
2877 | bind(SCAN_TO_16_CHAR_LOOP); |
2878 | vmovdqu(vec3, Address(result, 0)); |
2879 | vpcmpeqw(vec3, vec3, vec1, 1); |
2880 | vptest(vec2, vec3); |
2881 | jcc(Assembler::carryClear, FOUND_CHAR); |
2882 | addptr(result, 32); |
2883 | subl(tmp, 2*stride); |
2884 | jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP); |
2885 | jmp(SCAN_TO_8_CHAR); |
2886 | bind(SCAN_TO_8_CHAR_INIT); |
2887 | movdl(vec1, ch); |
2888 | pshuflw(vec1, vec1, 0x00); |
2889 | pshufd(vec1, vec1, 0); |
2890 | pxor(vec2, vec2); |
2891 | } |
2892 | bind(SCAN_TO_8_CHAR); |
2893 | cmpl(cnt1, stride); |
2894 | jcc(Assembler::less, SCAN_TO_CHAR); |
2895 | if (UseAVX < 2) { |
2896 | movdl(vec1, ch); |
2897 | pshuflw(vec1, vec1, 0x00); |
2898 | pshufd(vec1, vec1, 0); |
2899 | pxor(vec2, vec2); |
2900 | } |
2901 | movl(tmp, cnt1); |
2902 | andl(tmp, 0xFFFFFFF8); //vector count (in chars) |
2903 | andl(cnt1,0x00000007); //tail count (in chars) |
2904 | |
2905 | bind(SCAN_TO_8_CHAR_LOOP); |
2906 | movdqu(vec3, Address(result, 0)); |
2907 | pcmpeqw(vec3, vec1); |
2908 | ptest(vec2, vec3); |
2909 | jcc(Assembler::carryClear, FOUND_CHAR); |
2910 | addptr(result, 16); |
2911 | subl(tmp, stride); |
2912 | jcc(Assembler::notZero, SCAN_TO_8_CHAR_LOOP); |
2913 | bind(SCAN_TO_CHAR); |
2914 | testl(cnt1, cnt1); |
2915 | jcc(Assembler::zero, RET_NOT_FOUND); |
2916 | bind(SCAN_TO_CHAR_LOOP); |
2917 | load_unsigned_short(tmp, Address(result, 0)); |
2918 | cmpl(ch, tmp); |
2919 | jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2919); |
2920 | addptr(result, 2); |
2921 | subl(cnt1, 1); |
2922 | jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2922); |
2923 | jmp(SCAN_TO_CHAR_LOOP); |
2924 | |
2925 | bind(RET_NOT_FOUND); |
2926 | movl(result, -1); |
2927 | jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2927); |
2928 | |
2929 | bind(FOUND_CHAR); |
2930 | if (UseAVX >= 2) { |
2931 | vpmovmskb(tmp, vec3); |
2932 | } else { |
2933 | pmovmskb(tmp, vec3); |
2934 | } |
2935 | bsfl(ch, tmp); |
2936 | addptr(result, ch); |
2937 | |
2938 | bind(FOUND_SEQ_CHAR); |
2939 | subptr(result, str1); |
2940 | shrl(result, 1); |
2941 | |
2942 | bind(DONE_LABEL); |
2943 | } // string_indexof_char |
2944 | |
2945 | void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, |
2946 | XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { |
2947 | ShortBranchVerifier sbv(this); |
2948 | assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 2948, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 intrinsics are required" ); ::breakpoint(); } } while (0); |
2949 | |
2950 | int stride = 16; |
2951 | |
2952 | Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, |
2953 | SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, |
2954 | RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, |
2955 | FOUND_SEQ_CHAR, DONE_LABEL; |
2956 | |
2957 | movptr(result, str1); |
2958 | if (UseAVX >= 2) { |
2959 | cmpl(cnt1, stride); |
2960 | jcc(Assembler::less, SCAN_TO_CHAR_INIT); |
2961 | cmpl(cnt1, stride*2); |
2962 | jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); |
2963 | movdl(vec1, ch); |
2964 | vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); |
2965 | vpxor(vec2, vec2); |
2966 | movl(tmp, cnt1); |
2967 | andl(tmp, 0xFFFFFFE0); //vector count (in chars) |
2968 | andl(cnt1,0x0000001F); //tail count (in chars) |
2969 | |
2970 | bind(SCAN_TO_32_CHAR_LOOP); |
2971 | vmovdqu(vec3, Address(result, 0)); |
2972 | vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); |
2973 | vptest(vec2, vec3); |
2974 | jcc(Assembler::carryClear, FOUND_CHAR); |
2975 | addptr(result, 32); |
2976 | subl(tmp, stride*2); |
2977 | jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); |
2978 | jmp(SCAN_TO_16_CHAR); |
2979 | |
2980 | bind(SCAN_TO_16_CHAR_INIT); |
2981 | movdl(vec1, ch); |
2982 | pxor(vec2, vec2); |
2983 | pshufb(vec1, vec2); |
2984 | } |
2985 | |
2986 | bind(SCAN_TO_16_CHAR); |
2987 | cmpl(cnt1, stride); |
2988 | jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left |
2989 | if (UseAVX < 2) { |
2990 | movdl(vec1, ch); |
2991 | pxor(vec2, vec2); |
2992 | pshufb(vec1, vec2); |
2993 | } |
2994 | movl(tmp, cnt1); |
2995 | andl(tmp, 0xFFFFFFF0); //vector count (in bytes) |
2996 | andl(cnt1,0x0000000F); //tail count (in bytes) |
2997 | |
2998 | bind(SCAN_TO_16_CHAR_LOOP); |
2999 | movdqu(vec3, Address(result, 0)); |
3000 | pcmpeqb(vec3, vec1); |
3001 | ptest(vec2, vec3); |
3002 | jcc(Assembler::carryClear, FOUND_CHAR); |
3003 | addptr(result, 16); |
3004 | subl(tmp, stride); |
3005 | jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... |
3006 | |
3007 | bind(SCAN_TO_CHAR_INIT); |
3008 | testl(cnt1, cnt1); |
3009 | jcc(Assembler::zero, RET_NOT_FOUND); |
3010 | bind(SCAN_TO_CHAR_LOOP); |
3011 | load_unsigned_byte(tmp, Address(result, 0)); |
3012 | cmpl(ch, tmp); |
3013 | jccb(Assembler::equal, FOUND_SEQ_CHAR)jccb_0(Assembler::equal, FOUND_SEQ_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3013); |
3014 | addptr(result, 1); |
3015 | subl(cnt1, 1); |
3016 | jccb(Assembler::zero, RET_NOT_FOUND)jccb_0(Assembler::zero, RET_NOT_FOUND, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3016); |
3017 | jmp(SCAN_TO_CHAR_LOOP); |
3018 | |
3019 | bind(RET_NOT_FOUND); |
3020 | movl(result, -1); |
3021 | jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3021); |
3022 | |
3023 | bind(FOUND_CHAR); |
3024 | if (UseAVX >= 2) { |
3025 | vpmovmskb(tmp, vec3); |
3026 | } else { |
3027 | pmovmskb(tmp, vec3); |
3028 | } |
3029 | bsfl(ch, tmp); |
3030 | addptr(result, ch); |
3031 | |
3032 | bind(FOUND_SEQ_CHAR); |
3033 | subptr(result, str1); |
3034 | |
3035 | bind(DONE_LABEL); |
3036 | } // stringL_indexof_char |
3037 | |
3038 | // helper function for string_compare |
3039 | void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, |
3040 | Address::ScaleFactor scale, Address::ScaleFactor scale1, |
3041 | Address::ScaleFactor scale2, Register index, int ae) { |
3042 | if (ae == StrIntrinsicNode::LL) { |
3043 | load_unsigned_byte(elem1, Address(str1, index, scale, 0)); |
3044 | load_unsigned_byte(elem2, Address(str2, index, scale, 0)); |
3045 | } else if (ae == StrIntrinsicNode::UU) { |
3046 | load_unsigned_short(elem1, Address(str1, index, scale, 0)); |
3047 | load_unsigned_short(elem2, Address(str2, index, scale, 0)); |
3048 | } else { |
3049 | load_unsigned_byte(elem1, Address(str1, index, scale1, 0)); |
3050 | load_unsigned_short(elem2, Address(str2, index, scale2, 0)); |
3051 | } |
3052 | } |
3053 | |
3054 | // Compare strings, used for char[] and byte[]. |
3055 | void C2_MacroAssembler::string_compare(Register str1, Register str2, |
3056 | Register cnt1, Register cnt2, Register result, |
3057 | XMMRegister vec1, int ae, KRegister mask) { |
3058 | ShortBranchVerifier sbv(this); |
3059 | Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; |
3060 | Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3 |
3061 | int stride, stride2, adr_stride, adr_stride1, adr_stride2; |
3062 | int stride2x2 = 0x40; |
3063 | Address::ScaleFactor scale = Address::no_scale; |
3064 | Address::ScaleFactor scale1 = Address::no_scale; |
3065 | Address::ScaleFactor scale2 = Address::no_scale; |
3066 | |
3067 | if (ae != StrIntrinsicNode::LL) { |
3068 | stride2x2 = 0x20; |
3069 | } |
3070 | |
3071 | if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) { |
3072 | shrl(cnt2, 1); |
3073 | } |
3074 | // Compute the minimum of the string lengths and the |
3075 | // difference of the string lengths (stack). |
3076 | // Do the conditional move stuff |
3077 | movl(result, cnt1); |
3078 | subl(cnt1, cnt2); |
3079 | push(cnt1); |
3080 | cmov32(Assembler::lessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2) |
3081 | |
3082 | // Is the minimum length zero? |
3083 | testl(cnt2, cnt2); |
3084 | jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
3085 | if (ae == StrIntrinsicNode::LL) { |
3086 | // Load first bytes |
3087 | load_unsigned_byte(result, Address(str1, 0)); // result = str1[0] |
3088 | load_unsigned_byte(cnt1, Address(str2, 0)); // cnt1 = str2[0] |
3089 | } else if (ae == StrIntrinsicNode::UU) { |
3090 | // Load first characters |
3091 | load_unsigned_short(result, Address(str1, 0)); |
3092 | load_unsigned_short(cnt1, Address(str2, 0)); |
3093 | } else { |
3094 | load_unsigned_byte(result, Address(str1, 0)); |
3095 | load_unsigned_short(cnt1, Address(str2, 0)); |
3096 | } |
3097 | subl(result, cnt1); |
3098 | jcc(Assembler::notZero, POP_LABEL); |
3099 | |
3100 | if (ae == StrIntrinsicNode::UU) { |
3101 | // Divide length by 2 to get number of chars |
3102 | shrl(cnt2, 1); |
3103 | } |
3104 | cmpl(cnt2, 1); |
3105 | jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
3106 | |
3107 | // Check if the strings start at the same location and setup scale and stride |
3108 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3109 | cmpptr(str1, str2); |
3110 | jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
3111 | if (ae == StrIntrinsicNode::LL) { |
3112 | scale = Address::times_1; |
3113 | stride = 16; |
3114 | } else { |
3115 | scale = Address::times_2; |
3116 | stride = 8; |
3117 | } |
3118 | } else { |
3119 | scale1 = Address::times_1; |
3120 | scale2 = Address::times_2; |
3121 | // scale not used |
3122 | stride = 8; |
3123 | } |
3124 | |
3125 | if (UseAVX >= 2 && UseSSE42Intrinsics) { |
3126 | Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
3127 | Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
3128 | Label COMPARE_WIDE_VECTORS_LOOP_AVX2; |
3129 | Label COMPARE_TAIL_LONG; |
3130 | Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3 |
3131 | |
3132 | int pcmpmask = 0x19; |
3133 | if (ae == StrIntrinsicNode::LL) { |
3134 | pcmpmask &= ~0x01; |
3135 | } |
3136 | |
3137 | // Setup to compare 16-chars (32-bytes) vectors, |
3138 | // start from first character again because it has aligned address. |
3139 | if (ae == StrIntrinsicNode::LL) { |
3140 | stride2 = 32; |
3141 | } else { |
3142 | stride2 = 16; |
3143 | } |
3144 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3145 | adr_stride = stride << scale; |
3146 | } else { |
3147 | adr_stride1 = 8; //stride << scale1; |
3148 | adr_stride2 = 16; //stride << scale2; |
3149 | } |
3150 | |
3151 | assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1 == rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3151, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx" ") failed", "pcmpestri"); ::breakpoint(); } } while (0); |
3152 | // rax and rdx are used by pcmpestri as elements counters |
3153 | movl(result, cnt2); |
3154 | andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
3155 | jcc(Assembler::zero, COMPARE_TAIL_LONG); |
3156 | |
3157 | // fast path : compare first 2 8-char vectors. |
3158 | bind(COMPARE_16_CHARS); |
3159 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3160 | movdqu(vec1, Address(str1, 0)); |
3161 | } else { |
3162 | pmovzxbw(vec1, Address(str1, 0)); |
3163 | } |
3164 | pcmpestri(vec1, Address(str2, 0), pcmpmask); |
3165 | jccb(Assembler::below, COMPARE_INDEX_CHAR)jccb_0(Assembler::below, COMPARE_INDEX_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3165); |
3166 | |
3167 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3168 | movdqu(vec1, Address(str1, adr_stride)); |
3169 | pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
3170 | } else { |
3171 | pmovzxbw(vec1, Address(str1, adr_stride1)); |
3172 | pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask); |
3173 | } |
3174 | jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS)jccb_0(Assembler::aboveEqual, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3174); |
3175 | addl(cnt1, stride); |
3176 | |
3177 | // Compare the characters at index in cnt1 |
3178 | bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character |
3179 | load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae); |
3180 | subl(result, cnt2); |
3181 | jmp(POP_LABEL); |
3182 | |
3183 | // Setup the registers to start vector comparison loop |
3184 | bind(COMPARE_WIDE_VECTORS); |
3185 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3186 | lea(str1, Address(str1, result, scale)); |
3187 | lea(str2, Address(str2, result, scale)); |
3188 | } else { |
3189 | lea(str1, Address(str1, result, scale1)); |
3190 | lea(str2, Address(str2, result, scale2)); |
3191 | } |
3192 | subl(result, stride2); |
3193 | subl(cnt2, stride2); |
3194 | jcc(Assembler::zero, COMPARE_WIDE_TAIL); |
3195 | negptr(result); |
3196 | |
3197 | // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
3198 | bind(COMPARE_WIDE_VECTORS_LOOP); |
3199 | |
3200 | #ifdef _LP641 |
3201 | if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop |
3202 | cmpl(cnt2, stride2x2); |
3203 | jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3203); |
3204 | testl(cnt2, stride2x2-1); // cnt2 holds the vector count |
3205 | jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3205); // means we cannot subtract by 0x40 |
3206 | |
3207 | bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop |
3208 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3209 | evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit); |
3210 | evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0 |
3211 | } else { |
3212 | vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit); |
3213 | evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0 |
3214 | } |
3215 | kortestql(mask, mask); |
3216 | jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare |
3217 | addptr(result, stride2x2); // update since we already compared at this addr |
3218 | subl(cnt2, stride2x2); // and sub the size too |
3219 | jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3219); |
3220 | |
3221 | vpxor(vec1, vec1); |
3222 | jmpb(COMPARE_WIDE_TAIL)jmpb_0(COMPARE_WIDE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3222); |
3223 | }//if (VM_Version::supports_avx512vlbw()) |
3224 | #endif // _LP64 |
3225 | |
3226 | |
3227 | bind(COMPARE_WIDE_VECTORS_LOOP_AVX2); |
3228 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3229 | vmovdqu(vec1, Address(str1, result, scale)); |
3230 | vpxor(vec1, Address(str2, result, scale)); |
3231 | } else { |
3232 | vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_256bit); |
3233 | vpxor(vec1, Address(str2, result, scale2)); |
3234 | } |
3235 | vptest(vec1, vec1); |
3236 | jcc(Assembler::notZero, VECTOR_NOT_EQUAL); |
3237 | addptr(result, stride2); |
3238 | subl(cnt2, stride2); |
3239 | jcc(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
3240 | // clean upper bits of YMM registers |
3241 | vpxor(vec1, vec1); |
3242 | |
3243 | // compare wide vectors tail |
3244 | bind(COMPARE_WIDE_TAIL); |
3245 | testptr(result, result); |
3246 | jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
3247 | |
3248 | movl(result, stride2); |
3249 | movl(cnt2, result); |
3250 | negptr(result); |
3251 | jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2); |
3252 | |
3253 | // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
3254 | bind(VECTOR_NOT_EQUAL); |
3255 | // clean upper bits of YMM registers |
3256 | vpxor(vec1, vec1); |
3257 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3258 | lea(str1, Address(str1, result, scale)); |
3259 | lea(str2, Address(str2, result, scale)); |
3260 | } else { |
3261 | lea(str1, Address(str1, result, scale1)); |
3262 | lea(str2, Address(str2, result, scale2)); |
3263 | } |
3264 | jmp(COMPARE_16_CHARS); |
3265 | |
3266 | // Compare tail chars, length between 1 to 15 chars |
3267 | bind(COMPARE_TAIL_LONG); |
3268 | movl(cnt2, result); |
3269 | cmpl(cnt2, stride); |
3270 | jcc(Assembler::less, COMPARE_SMALL_STR); |
3271 | |
3272 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3273 | movdqu(vec1, Address(str1, 0)); |
3274 | } else { |
3275 | pmovzxbw(vec1, Address(str1, 0)); |
3276 | } |
3277 | pcmpestri(vec1, Address(str2, 0), pcmpmask); |
3278 | jcc(Assembler::below, COMPARE_INDEX_CHAR); |
3279 | subptr(cnt2, stride); |
3280 | jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
3281 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3282 | lea(str1, Address(str1, result, scale)); |
3283 | lea(str2, Address(str2, result, scale)); |
3284 | } else { |
3285 | lea(str1, Address(str1, result, scale1)); |
3286 | lea(str2, Address(str2, result, scale2)); |
3287 | } |
3288 | negptr(cnt2); |
3289 | jmpb(WHILE_HEAD_LABEL)jmpb_0(WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3289); |
3290 | |
3291 | bind(COMPARE_SMALL_STR); |
3292 | } else if (UseSSE42Intrinsics) { |
3293 | Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; |
3294 | int pcmpmask = 0x19; |
3295 | // Setup to compare 8-char (16-byte) vectors, |
3296 | // start from first character again because it has aligned address. |
3297 | movl(result, cnt2); |
3298 | andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count |
3299 | if (ae == StrIntrinsicNode::LL) { |
3300 | pcmpmask &= ~0x01; |
3301 | } |
3302 | jcc(Assembler::zero, COMPARE_TAIL); |
3303 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3304 | lea(str1, Address(str1, result, scale)); |
3305 | lea(str2, Address(str2, result, scale)); |
3306 | } else { |
3307 | lea(str1, Address(str1, result, scale1)); |
3308 | lea(str2, Address(str2, result, scale2)); |
3309 | } |
3310 | negptr(result); |
3311 | |
3312 | // pcmpestri |
3313 | // inputs: |
3314 | // vec1- substring |
3315 | // rax - negative string length (elements count) |
3316 | // mem - scanned string |
3317 | // rdx - string length (elements count) |
3318 | // pcmpmask - cmp mode: 11000 (string compare with negated result) |
3319 | // + 00 (unsigned bytes) or + 01 (unsigned shorts) |
3320 | // outputs: |
3321 | // rcx - first mismatched element index |
3322 | assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri")do { if (!(result == rax && cnt2 == rdx && cnt1 == rcx)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3322, "assert(" "result == rax && cnt2 == rdx && cnt1 == rcx" ") failed", "pcmpestri"); ::breakpoint(); } } while (0); |
3323 | |
3324 | bind(COMPARE_WIDE_VECTORS); |
3325 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3326 | movdqu(vec1, Address(str1, result, scale)); |
3327 | pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
3328 | } else { |
3329 | pmovzxbw(vec1, Address(str1, result, scale1)); |
3330 | pcmpestri(vec1, Address(str2, result, scale2), pcmpmask); |
3331 | } |
3332 | // After pcmpestri cnt1(rcx) contains mismatched element index |
3333 | |
3334 | jccb(Assembler::below, VECTOR_NOT_EQUAL)jccb_0(Assembler::below, VECTOR_NOT_EQUAL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3334); // CF==1 |
3335 | addptr(result, stride); |
3336 | subptr(cnt2, stride); |
3337 | jccb(Assembler::notZero, COMPARE_WIDE_VECTORS)jccb_0(Assembler::notZero, COMPARE_WIDE_VECTORS, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3337); |
3338 | |
3339 | // compare wide vectors tail |
3340 | testptr(result, result); |
3341 | jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
3342 | |
3343 | movl(cnt2, stride); |
3344 | movl(result, stride); |
3345 | negptr(result); |
3346 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3347 | movdqu(vec1, Address(str1, result, scale)); |
3348 | pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
3349 | } else { |
3350 | pmovzxbw(vec1, Address(str1, result, scale1)); |
3351 | pcmpestri(vec1, Address(str2, result, scale2), pcmpmask); |
3352 | } |
3353 | jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL)jccb_0(Assembler::aboveEqual, LENGTH_DIFF_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3353); |
3354 | |
3355 | // Mismatched characters in the vectors |
3356 | bind(VECTOR_NOT_EQUAL); |
3357 | addptr(cnt1, result); |
3358 | load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae); |
3359 | subl(result, cnt2); |
3360 | jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3360); |
3361 | |
3362 | bind(COMPARE_TAIL); // limit is zero |
3363 | movl(cnt2, result); |
3364 | // Fallthru to tail compare |
3365 | } |
3366 | // Shift str2 and str1 to the end of the arrays, negate min |
3367 | if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { |
3368 | lea(str1, Address(str1, cnt2, scale)); |
3369 | lea(str2, Address(str2, cnt2, scale)); |
3370 | } else { |
3371 | lea(str1, Address(str1, cnt2, scale1)); |
3372 | lea(str2, Address(str2, cnt2, scale2)); |
3373 | } |
3374 | decrementl(cnt2); // first character was compared already |
3375 | negptr(cnt2); |
3376 | |
3377 | // Compare the rest of the elements |
3378 | bind(WHILE_HEAD_LABEL); |
3379 | load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae); |
3380 | subl(result, cnt1); |
3381 | jccb(Assembler::notZero, POP_LABEL)jccb_0(Assembler::notZero, POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3381); |
3382 | increment(cnt2); |
3383 | jccb(Assembler::notZero, WHILE_HEAD_LABEL)jccb_0(Assembler::notZero, WHILE_HEAD_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3383); |
3384 | |
3385 | // Strings are equal up to min length. Return the length difference. |
3386 | bind(LENGTH_DIFF_LABEL); |
3387 | pop(result); |
3388 | if (ae == StrIntrinsicNode::UU) { |
3389 | // Divide diff by 2 to get number of chars |
3390 | sarl(result, 1); |
3391 | } |
3392 | jmpb(DONE_LABEL)jmpb_0(DONE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3392); |
3393 | |
3394 | #ifdef _LP641 |
3395 | if (VM_Version::supports_avx512vlbw()) { |
3396 | |
3397 | bind(COMPARE_WIDE_VECTORS_LOOP_FAILED); |
3398 | |
3399 | kmovql(cnt1, mask); |
3400 | notq(cnt1); |
3401 | bsfq(cnt2, cnt1); |
3402 | if (ae != StrIntrinsicNode::LL) { |
3403 | // Divide diff by 2 to get number of chars |
3404 | sarl(cnt2, 1); |
3405 | } |
3406 | addq(result, cnt2); |
3407 | if (ae == StrIntrinsicNode::LL) { |
3408 | load_unsigned_byte(cnt1, Address(str2, result)); |
3409 | load_unsigned_byte(result, Address(str1, result)); |
3410 | } else if (ae == StrIntrinsicNode::UU) { |
3411 | load_unsigned_short(cnt1, Address(str2, result, scale)); |
3412 | load_unsigned_short(result, Address(str1, result, scale)); |
3413 | } else { |
3414 | load_unsigned_short(cnt1, Address(str2, result, scale2)); |
3415 | load_unsigned_byte(result, Address(str1, result, scale1)); |
3416 | } |
3417 | subl(result, cnt1); |
3418 | jmpb(POP_LABEL)jmpb_0(POP_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3418); |
3419 | }//if (VM_Version::supports_avx512vlbw()) |
3420 | #endif // _LP64 |
3421 | |
3422 | // Discard the stored length difference |
3423 | bind(POP_LABEL); |
3424 | pop(cnt1); |
3425 | |
3426 | // That's it |
3427 | bind(DONE_LABEL); |
3428 | if(ae == StrIntrinsicNode::UL) { |
3429 | negl(result); |
3430 | } |
3431 | |
3432 | } |
3433 | |
3434 | // Search for Non-ASCII character (Negative byte value) in a byte array, |
3435 | // return true if it has any and false otherwise. |
3436 | // ..\jdk\src\java.base\share\classes\java\lang\StringCoding.java |
3437 | // @IntrinsicCandidate |
3438 | // private static boolean hasNegatives(byte[] ba, int off, int len) { |
3439 | // for (int i = off; i < off + len; i++) { |
3440 | // if (ba[i] < 0) { |
3441 | // return true; |
3442 | // } |
3443 | // } |
3444 | // return false; |
3445 | // } |
3446 | void C2_MacroAssembler::has_negatives(Register ary1, Register len, |
3447 | Register result, Register tmp1, |
3448 | XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) { |
3449 | // rsi: byte array |
3450 | // rcx: len |
3451 | // rax: result |
3452 | ShortBranchVerifier sbv(this); |
3453 | assert_different_registers(ary1, len, result, tmp1); |
3454 | assert_different_registers(vec1, vec2); |
3455 | Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE; |
3456 | |
3457 | // len == 0 |
3458 | testl(len, len); |
3459 | jcc(Assembler::zero, FALSE_LABEL); |
3460 | |
3461 | if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512 |
3462 | VM_Version::supports_avx512vlbw() && |
3463 | VM_Version::supports_bmi2()) { |
3464 | |
3465 | Label test_64_loop, test_tail; |
3466 | Register tmp3_aliased = len; |
3467 | |
3468 | movl(tmp1, len); |
3469 | vpxor(vec2, vec2, vec2, Assembler::AVX_512bit); |
3470 | |
3471 | andl(tmp1, 64 - 1); // tail count (in chars) 0x3F |
3472 | andl(len, ~(64 - 1)); // vector count (in chars) |
3473 | jccb(Assembler::zero, test_tail)jccb_0(Assembler::zero, test_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3473); |
3474 | |
3475 | lea(ary1, Address(ary1, len, Address::times_1)); |
3476 | negptr(len); |
3477 | |
3478 | bind(test_64_loop); |
3479 | // Check whether our 64 elements of size byte contain negatives |
3480 | evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit); |
3481 | kortestql(mask1, mask1); |
3482 | jcc(Assembler::notZero, TRUE_LABEL); |
3483 | |
3484 | addptr(len, 64); |
3485 | jccb(Assembler::notZero, test_64_loop)jccb_0(Assembler::notZero, test_64_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3485); |
3486 | |
3487 | |
3488 | bind(test_tail); |
3489 | // bail out when there is nothing to be done |
3490 | testl(tmp1, -1); |
3491 | jcc(Assembler::zero, FALSE_LABEL); |
3492 | |
3493 | // ~(~0 << len) applied up to two times (for 32-bit scenario) |
3494 | #ifdef _LP641 |
3495 | mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF); |
3496 | shlxq(tmp3_aliased, tmp3_aliased, tmp1); |
3497 | notq(tmp3_aliased); |
3498 | kmovql(mask2, tmp3_aliased); |
3499 | #else |
3500 | Label k_init; |
3501 | jmp(k_init); |
3502 | |
3503 | // We could not read 64-bits from a general purpose register thus we move |
3504 | // data required to compose 64 1's to the instruction stream |
3505 | // We emit 64 byte wide series of elements from 0..63 which later on would |
3506 | // be used as a compare targets with tail count contained in tmp1 register. |
3507 | // Result would be a k register having tmp1 consecutive number or 1 |
3508 | // counting from least significant bit. |
3509 | address tmp = pc(); |
3510 | emit_int64(0x0706050403020100); |
3511 | emit_int64(0x0F0E0D0C0B0A0908); |
3512 | emit_int64(0x1716151413121110); |
3513 | emit_int64(0x1F1E1D1C1B1A1918); |
3514 | emit_int64(0x2726252423222120); |
3515 | emit_int64(0x2F2E2D2C2B2A2928); |
3516 | emit_int64(0x3736353433323130); |
3517 | emit_int64(0x3F3E3D3C3B3A3938); |
3518 | |
3519 | bind(k_init); |
3520 | lea(len, InternalAddress(tmp)); |
3521 | // create mask to test for negative byte inside a vector |
3522 | evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit); |
3523 | evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit); |
3524 | |
3525 | #endif |
3526 | evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit); |
3527 | ktestq(mask1, mask2); |
3528 | jcc(Assembler::notZero, TRUE_LABEL); |
3529 | |
3530 | jmp(FALSE_LABEL); |
3531 | } else { |
3532 | movl(result, len); // copy |
3533 | |
3534 | if (UseAVX >= 2 && UseSSE >= 2) { |
3535 | // With AVX2, use 32-byte vector compare |
3536 | Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
3537 | |
3538 | // Compare 32-byte vectors |
3539 | andl(result, 0x0000001f); // tail count (in bytes) |
3540 | andl(len, 0xffffffe0); // vector count (in bytes) |
3541 | jccb(Assembler::zero, COMPARE_TAIL)jccb_0(Assembler::zero, COMPARE_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3541); |
3542 | |
3543 | lea(ary1, Address(ary1, len, Address::times_1)); |
3544 | negptr(len); |
3545 | |
3546 | movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector |
3547 | movdl(vec2, tmp1); |
3548 | vpbroadcastd(vec2, vec2, Assembler::AVX_256bit); |
3549 | |
3550 | bind(COMPARE_WIDE_VECTORS); |
3551 | vmovdqu(vec1, Address(ary1, len, Address::times_1)); |
3552 | vptest(vec1, vec2); |
3553 | jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3553); |
3554 | addptr(len, 32); |
3555 | jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
3556 | |
3557 | testl(result, result); |
3558 | jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3558); |
3559 | |
3560 | vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
3561 | vptest(vec1, vec2); |
3562 | jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3562); |
3563 | jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3563); |
3564 | |
3565 | bind(COMPARE_TAIL); // len is zero |
3566 | movl(len, result); |
3567 | // Fallthru to tail compare |
3568 | } else if (UseSSE42Intrinsics) { |
3569 | // With SSE4.2, use double quad vector compare |
3570 | Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
3571 | |
3572 | // Compare 16-byte vectors |
3573 | andl(result, 0x0000000f); // tail count (in bytes) |
3574 | andl(len, 0xfffffff0); // vector count (in bytes) |
3575 | jcc(Assembler::zero, COMPARE_TAIL); |
3576 | |
3577 | lea(ary1, Address(ary1, len, Address::times_1)); |
3578 | negptr(len); |
3579 | |
3580 | movl(tmp1, 0x80808080); |
3581 | movdl(vec2, tmp1); |
3582 | pshufd(vec2, vec2, 0); |
3583 | |
3584 | bind(COMPARE_WIDE_VECTORS); |
3585 | movdqu(vec1, Address(ary1, len, Address::times_1)); |
3586 | ptest(vec1, vec2); |
3587 | jcc(Assembler::notZero, TRUE_LABEL); |
3588 | addptr(len, 16); |
3589 | jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
3590 | |
3591 | testl(result, result); |
3592 | jcc(Assembler::zero, FALSE_LABEL); |
3593 | |
3594 | movdqu(vec1, Address(ary1, result, Address::times_1, -16)); |
3595 | ptest(vec1, vec2); |
3596 | jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3596); |
3597 | jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3597); |
3598 | |
3599 | bind(COMPARE_TAIL); // len is zero |
3600 | movl(len, result); |
3601 | // Fallthru to tail compare |
3602 | } |
3603 | } |
3604 | // Compare 4-byte vectors |
3605 | andl(len, 0xfffffffc); // vector count (in bytes) |
3606 | jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3606); |
3607 | |
3608 | lea(ary1, Address(ary1, len, Address::times_1)); |
3609 | negptr(len); |
3610 | |
3611 | bind(COMPARE_VECTORS); |
3612 | movl(tmp1, Address(ary1, len, Address::times_1)); |
3613 | andl(tmp1, 0x80808080); |
3614 | jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3614); |
3615 | addptr(len, 4); |
3616 | jcc(Assembler::notZero, COMPARE_VECTORS); |
3617 | |
3618 | // Compare trailing char (final 2 bytes), if any |
3619 | bind(COMPARE_CHAR); |
3620 | testl(result, 0x2); // tail char |
3621 | jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3621); |
3622 | load_unsigned_short(tmp1, Address(ary1, 0)); |
3623 | andl(tmp1, 0x00008080); |
3624 | jccb(Assembler::notZero, TRUE_LABEL)jccb_0(Assembler::notZero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3624); |
3625 | subptr(result, 2); |
3626 | lea(ary1, Address(ary1, 2)); |
3627 | |
3628 | bind(COMPARE_BYTE); |
3629 | testl(result, 0x1); // tail byte |
3630 | jccb(Assembler::zero, FALSE_LABEL)jccb_0(Assembler::zero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3630); |
3631 | load_unsigned_byte(tmp1, Address(ary1, 0)); |
3632 | andl(tmp1, 0x00000080); |
3633 | jccb(Assembler::notEqual, TRUE_LABEL)jccb_0(Assembler::notEqual, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3633); |
3634 | jmpb(FALSE_LABEL)jmpb_0(FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3634); |
3635 | |
3636 | bind(TRUE_LABEL); |
3637 | movl(result, 1); // return true |
3638 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3638); |
3639 | |
3640 | bind(FALSE_LABEL); |
3641 | xorl(result, result); // return false |
3642 | |
3643 | // That's it |
3644 | bind(DONE); |
3645 | if (UseAVX >= 2 && UseSSE >= 2) { |
3646 | // clean upper bits of YMM registers |
3647 | vpxor(vec1, vec1); |
3648 | vpxor(vec2, vec2); |
3649 | } |
3650 | } |
3651 | // Compare char[] or byte[] arrays aligned to 4 bytes or substrings. |
3652 | void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2, |
3653 | Register limit, Register result, Register chr, |
3654 | XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) { |
3655 | ShortBranchVerifier sbv(this); |
3656 | Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE; |
3657 | |
3658 | int length_offset = arrayOopDesc::length_offset_in_bytes(); |
3659 | int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE); |
3660 | |
3661 | if (is_array_equ) { |
3662 | // Check the input args |
3663 | cmpoop(ary1, ary2); |
3664 | jcc(Assembler::equal, TRUE_LABEL); |
3665 | |
3666 | // Need additional checks for arrays_equals. |
3667 | testptr(ary1, ary1); |
3668 | jcc(Assembler::zero, FALSE_LABEL); |
3669 | testptr(ary2, ary2); |
3670 | jcc(Assembler::zero, FALSE_LABEL); |
3671 | |
3672 | // Check the lengths |
3673 | movl(limit, Address(ary1, length_offset)); |
3674 | cmpl(limit, Address(ary2, length_offset)); |
3675 | jcc(Assembler::notEqual, FALSE_LABEL); |
3676 | } |
3677 | |
3678 | // count == 0 |
3679 | testl(limit, limit); |
3680 | jcc(Assembler::zero, TRUE_LABEL); |
3681 | |
3682 | if (is_array_equ) { |
3683 | // Load array address |
3684 | lea(ary1, Address(ary1, base_offset)); |
3685 | lea(ary2, Address(ary2, base_offset)); |
3686 | } |
3687 | |
3688 | if (is_array_equ && is_char) { |
3689 | // arrays_equals when used for char[]. |
3690 | shll(limit, 1); // byte count != 0 |
3691 | } |
3692 | movl(result, limit); // copy |
3693 | |
3694 | if (UseAVX >= 2) { |
3695 | // With AVX2, use 32-byte vector compare |
3696 | Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
3697 | |
3698 | // Compare 32-byte vectors |
3699 | andl(result, 0x0000001f); // tail count (in bytes) |
3700 | andl(limit, 0xffffffe0); // vector count (in bytes) |
3701 | jcc(Assembler::zero, COMPARE_TAIL); |
3702 | |
3703 | lea(ary1, Address(ary1, limit, Address::times_1)); |
3704 | lea(ary2, Address(ary2, limit, Address::times_1)); |
3705 | negptr(limit); |
3706 | |
3707 | #ifdef _LP641 |
3708 | if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop |
3709 | Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3; |
3710 | |
3711 | cmpl(limit, -64); |
3712 | jcc(Assembler::greater, COMPARE_WIDE_VECTORS_LOOP_AVX2); |
3713 | |
3714 | bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop |
3715 | |
3716 | evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit); |
3717 | evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit); |
3718 | kortestql(mask, mask); |
3719 | jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare |
3720 | addptr(limit, 64); // update since we already compared at this addr |
3721 | cmpl(limit, -64); |
3722 | jccb(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3)jccb_0(Assembler::lessEqual, COMPARE_WIDE_VECTORS_LOOP_AVX3, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3722); |
3723 | |
3724 | // At this point we may still need to compare -limit+result bytes. |
3725 | // We could execute the next two instruction and just continue via non-wide path: |
3726 | // cmpl(limit, 0); |
3727 | // jcc(Assembler::equal, COMPARE_TAIL); // true |
3728 | // But since we stopped at the points ary{1,2}+limit which are |
3729 | // not farther than 64 bytes from the ends of arrays ary{1,2}+result |
3730 | // (|limit| <= 32 and result < 32), |
3731 | // we may just compare the last 64 bytes. |
3732 | // |
3733 | addptr(result, -64); // it is safe, bc we just came from this area |
3734 | evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit); |
3735 | evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit); |
3736 | kortestql(mask, mask); |
3737 | jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare |
3738 | |
3739 | jmp(TRUE_LABEL); |
3740 | |
3741 | bind(COMPARE_WIDE_VECTORS_LOOP_AVX2); |
3742 | |
3743 | }//if (VM_Version::supports_avx512vlbw()) |
3744 | #endif //_LP64 |
3745 | bind(COMPARE_WIDE_VECTORS); |
3746 | vmovdqu(vec1, Address(ary1, limit, Address::times_1)); |
3747 | vmovdqu(vec2, Address(ary2, limit, Address::times_1)); |
3748 | vpxor(vec1, vec2); |
3749 | |
3750 | vptest(vec1, vec1); |
3751 | jcc(Assembler::notZero, FALSE_LABEL); |
3752 | addptr(limit, 32); |
3753 | jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
3754 | |
3755 | testl(result, result); |
3756 | jcc(Assembler::zero, TRUE_LABEL); |
3757 | |
3758 | vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
3759 | vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); |
3760 | vpxor(vec1, vec2); |
3761 | |
3762 | vptest(vec1, vec1); |
3763 | jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3763); |
3764 | jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3764); |
3765 | |
3766 | bind(COMPARE_TAIL); // limit is zero |
3767 | movl(limit, result); |
3768 | // Fallthru to tail compare |
3769 | } else if (UseSSE42Intrinsics) { |
3770 | // With SSE4.2, use double quad vector compare |
3771 | Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
3772 | |
3773 | // Compare 16-byte vectors |
3774 | andl(result, 0x0000000f); // tail count (in bytes) |
3775 | andl(limit, 0xfffffff0); // vector count (in bytes) |
3776 | jcc(Assembler::zero, COMPARE_TAIL); |
3777 | |
3778 | lea(ary1, Address(ary1, limit, Address::times_1)); |
3779 | lea(ary2, Address(ary2, limit, Address::times_1)); |
3780 | negptr(limit); |
3781 | |
3782 | bind(COMPARE_WIDE_VECTORS); |
3783 | movdqu(vec1, Address(ary1, limit, Address::times_1)); |
3784 | movdqu(vec2, Address(ary2, limit, Address::times_1)); |
3785 | pxor(vec1, vec2); |
3786 | |
3787 | ptest(vec1, vec1); |
3788 | jcc(Assembler::notZero, FALSE_LABEL); |
3789 | addptr(limit, 16); |
3790 | jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
3791 | |
3792 | testl(result, result); |
3793 | jcc(Assembler::zero, TRUE_LABEL); |
3794 | |
3795 | movdqu(vec1, Address(ary1, result, Address::times_1, -16)); |
3796 | movdqu(vec2, Address(ary2, result, Address::times_1, -16)); |
3797 | pxor(vec1, vec2); |
3798 | |
3799 | ptest(vec1, vec1); |
3800 | jccb(Assembler::notZero, FALSE_LABEL)jccb_0(Assembler::notZero, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3800); |
3801 | jmpb(TRUE_LABEL)jmpb_0(TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3801); |
3802 | |
3803 | bind(COMPARE_TAIL); // limit is zero |
3804 | movl(limit, result); |
3805 | // Fallthru to tail compare |
3806 | } |
3807 | |
3808 | // Compare 4-byte vectors |
3809 | andl(limit, 0xfffffffc); // vector count (in bytes) |
3810 | jccb(Assembler::zero, COMPARE_CHAR)jccb_0(Assembler::zero, COMPARE_CHAR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3810); |
3811 | |
3812 | lea(ary1, Address(ary1, limit, Address::times_1)); |
3813 | lea(ary2, Address(ary2, limit, Address::times_1)); |
3814 | negptr(limit); |
3815 | |
3816 | bind(COMPARE_VECTORS); |
3817 | movl(chr, Address(ary1, limit, Address::times_1)); |
3818 | cmpl(chr, Address(ary2, limit, Address::times_1)); |
3819 | jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3819); |
3820 | addptr(limit, 4); |
3821 | jcc(Assembler::notZero, COMPARE_VECTORS); |
3822 | |
3823 | // Compare trailing char (final 2 bytes), if any |
3824 | bind(COMPARE_CHAR); |
3825 | testl(result, 0x2); // tail char |
3826 | jccb(Assembler::zero, COMPARE_BYTE)jccb_0(Assembler::zero, COMPARE_BYTE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3826); |
3827 | load_unsigned_short(chr, Address(ary1, 0)); |
3828 | load_unsigned_short(limit, Address(ary2, 0)); |
3829 | cmpl(chr, limit); |
3830 | jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3830); |
3831 | |
3832 | if (is_array_equ && is_char) { |
3833 | bind(COMPARE_BYTE); |
3834 | } else { |
3835 | lea(ary1, Address(ary1, 2)); |
3836 | lea(ary2, Address(ary2, 2)); |
3837 | |
3838 | bind(COMPARE_BYTE); |
3839 | testl(result, 0x1); // tail byte |
3840 | jccb(Assembler::zero, TRUE_LABEL)jccb_0(Assembler::zero, TRUE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3840); |
3841 | load_unsigned_byte(chr, Address(ary1, 0)); |
3842 | load_unsigned_byte(limit, Address(ary2, 0)); |
3843 | cmpl(chr, limit); |
3844 | jccb(Assembler::notEqual, FALSE_LABEL)jccb_0(Assembler::notEqual, FALSE_LABEL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3844); |
3845 | } |
3846 | bind(TRUE_LABEL); |
3847 | movl(result, 1); // return true |
3848 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3848); |
3849 | |
3850 | bind(FALSE_LABEL); |
3851 | xorl(result, result); // return false |
3852 | |
3853 | // That's it |
3854 | bind(DONE); |
3855 | if (UseAVX >= 2) { |
3856 | // clean upper bits of YMM registers |
3857 | vpxor(vec1, vec1); |
3858 | vpxor(vec2, vec2); |
3859 | } |
3860 | } |
3861 | |
3862 | void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, |
3863 | XMMRegister src1, int imm8, bool merge, int vlen_enc) { |
3864 | switch(ideal_opc) { |
3865 | case Op_LShiftVS: |
3866 | Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break; |
3867 | case Op_LShiftVI: |
3868 | Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break; |
3869 | case Op_LShiftVL: |
3870 | Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break; |
3871 | case Op_RShiftVS: |
3872 | Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break; |
3873 | case Op_RShiftVI: |
3874 | Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break; |
3875 | case Op_RShiftVL: |
3876 | Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break; |
3877 | case Op_URShiftVS: |
3878 | Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break; |
3879 | case Op_URShiftVI: |
3880 | Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break; |
3881 | case Op_URShiftVL: |
3882 | Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break; |
3883 | case Op_RotateRightV: |
3884 | evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break; |
3885 | case Op_RotateLeftV: |
3886 | evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break; |
3887 | default: |
3888 | fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3888, "Unsupported masked operation"); ::breakpoint(); } while (0); break; |
3889 | } |
3890 | } |
3891 | |
3892 | void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, |
3893 | XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc, |
3894 | bool is_varshift) { |
3895 | switch (ideal_opc) { |
3896 | case Op_AddVB: |
3897 | evpaddb(dst, mask, src1, src2, merge, vlen_enc); break; |
3898 | case Op_AddVS: |
3899 | evpaddw(dst, mask, src1, src2, merge, vlen_enc); break; |
3900 | case Op_AddVI: |
3901 | evpaddd(dst, mask, src1, src2, merge, vlen_enc); break; |
3902 | case Op_AddVL: |
3903 | evpaddq(dst, mask, src1, src2, merge, vlen_enc); break; |
3904 | case Op_AddVF: |
3905 | evaddps(dst, mask, src1, src2, merge, vlen_enc); break; |
3906 | case Op_AddVD: |
3907 | evaddpd(dst, mask, src1, src2, merge, vlen_enc); break; |
3908 | case Op_SubVB: |
3909 | evpsubb(dst, mask, src1, src2, merge, vlen_enc); break; |
3910 | case Op_SubVS: |
3911 | evpsubw(dst, mask, src1, src2, merge, vlen_enc); break; |
3912 | case Op_SubVI: |
3913 | evpsubd(dst, mask, src1, src2, merge, vlen_enc); break; |
3914 | case Op_SubVL: |
3915 | evpsubq(dst, mask, src1, src2, merge, vlen_enc); break; |
3916 | case Op_SubVF: |
3917 | evsubps(dst, mask, src1, src2, merge, vlen_enc); break; |
3918 | case Op_SubVD: |
3919 | evsubpd(dst, mask, src1, src2, merge, vlen_enc); break; |
3920 | case Op_MulVS: |
3921 | evpmullw(dst, mask, src1, src2, merge, vlen_enc); break; |
3922 | case Op_MulVI: |
3923 | evpmulld(dst, mask, src1, src2, merge, vlen_enc); break; |
3924 | case Op_MulVL: |
3925 | evpmullq(dst, mask, src1, src2, merge, vlen_enc); break; |
3926 | case Op_MulVF: |
3927 | evmulps(dst, mask, src1, src2, merge, vlen_enc); break; |
3928 | case Op_MulVD: |
3929 | evmulpd(dst, mask, src1, src2, merge, vlen_enc); break; |
3930 | case Op_DivVF: |
3931 | evdivps(dst, mask, src1, src2, merge, vlen_enc); break; |
3932 | case Op_DivVD: |
3933 | evdivpd(dst, mask, src1, src2, merge, vlen_enc); break; |
3934 | case Op_SqrtVF: |
3935 | evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break; |
3936 | case Op_SqrtVD: |
3937 | evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break; |
3938 | case Op_AbsVB: |
3939 | evpabsb(dst, mask, src2, merge, vlen_enc); break; |
3940 | case Op_AbsVS: |
3941 | evpabsw(dst, mask, src2, merge, vlen_enc); break; |
3942 | case Op_AbsVI: |
3943 | evpabsd(dst, mask, src2, merge, vlen_enc); break; |
3944 | case Op_AbsVL: |
3945 | evpabsq(dst, mask, src2, merge, vlen_enc); break; |
3946 | case Op_FmaVF: |
3947 | evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break; |
3948 | case Op_FmaVD: |
3949 | evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break; |
3950 | case Op_VectorRearrange: |
3951 | evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break; |
3952 | case Op_LShiftVS: |
3953 | evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3954 | case Op_LShiftVI: |
3955 | evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3956 | case Op_LShiftVL: |
3957 | evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3958 | case Op_RShiftVS: |
3959 | evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3960 | case Op_RShiftVI: |
3961 | evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3962 | case Op_RShiftVL: |
3963 | evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3964 | case Op_URShiftVS: |
3965 | evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3966 | case Op_URShiftVI: |
3967 | evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3968 | case Op_URShiftVL: |
3969 | evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break; |
3970 | case Op_RotateLeftV: |
3971 | evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3972 | case Op_RotateRightV: |
3973 | evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3974 | case Op_MaxV: |
3975 | evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3976 | case Op_MinV: |
3977 | evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3978 | case Op_XorV: |
3979 | evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3980 | case Op_OrV: |
3981 | evor(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3982 | case Op_AndV: |
3983 | evand(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
3984 | default: |
3985 | fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 3985, "Unsupported masked operation"); ::breakpoint(); } while (0); break; |
3986 | } |
3987 | } |
3988 | |
3989 | void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, |
3990 | XMMRegister src1, Address src2, bool merge, int vlen_enc) { |
3991 | switch (ideal_opc) { |
3992 | case Op_AddVB: |
3993 | evpaddb(dst, mask, src1, src2, merge, vlen_enc); break; |
3994 | case Op_AddVS: |
3995 | evpaddw(dst, mask, src1, src2, merge, vlen_enc); break; |
3996 | case Op_AddVI: |
3997 | evpaddd(dst, mask, src1, src2, merge, vlen_enc); break; |
3998 | case Op_AddVL: |
3999 | evpaddq(dst, mask, src1, src2, merge, vlen_enc); break; |
4000 | case Op_AddVF: |
4001 | evaddps(dst, mask, src1, src2, merge, vlen_enc); break; |
4002 | case Op_AddVD: |
4003 | evaddpd(dst, mask, src1, src2, merge, vlen_enc); break; |
4004 | case Op_SubVB: |
4005 | evpsubb(dst, mask, src1, src2, merge, vlen_enc); break; |
4006 | case Op_SubVS: |
4007 | evpsubw(dst, mask, src1, src2, merge, vlen_enc); break; |
4008 | case Op_SubVI: |
4009 | evpsubd(dst, mask, src1, src2, merge, vlen_enc); break; |
4010 | case Op_SubVL: |
4011 | evpsubq(dst, mask, src1, src2, merge, vlen_enc); break; |
4012 | case Op_SubVF: |
4013 | evsubps(dst, mask, src1, src2, merge, vlen_enc); break; |
4014 | case Op_SubVD: |
4015 | evsubpd(dst, mask, src1, src2, merge, vlen_enc); break; |
4016 | case Op_MulVS: |
4017 | evpmullw(dst, mask, src1, src2, merge, vlen_enc); break; |
4018 | case Op_MulVI: |
4019 | evpmulld(dst, mask, src1, src2, merge, vlen_enc); break; |
4020 | case Op_MulVL: |
4021 | evpmullq(dst, mask, src1, src2, merge, vlen_enc); break; |
4022 | case Op_MulVF: |
4023 | evmulps(dst, mask, src1, src2, merge, vlen_enc); break; |
4024 | case Op_MulVD: |
4025 | evmulpd(dst, mask, src1, src2, merge, vlen_enc); break; |
4026 | case Op_DivVF: |
4027 | evdivps(dst, mask, src1, src2, merge, vlen_enc); break; |
4028 | case Op_DivVD: |
4029 | evdivpd(dst, mask, src1, src2, merge, vlen_enc); break; |
4030 | case Op_FmaVF: |
4031 | evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break; |
4032 | case Op_FmaVD: |
4033 | evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break; |
4034 | case Op_MaxV: |
4035 | evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
4036 | case Op_MinV: |
4037 | evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
4038 | case Op_XorV: |
4039 | evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
4040 | case Op_OrV: |
4041 | evor(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
4042 | case Op_AndV: |
4043 | evand(eType, dst, mask, src1, src2, merge, vlen_enc); break; |
4044 | default: |
4045 | fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4045, "Unsupported masked operation"); ::breakpoint(); } while (0); break; |
4046 | } |
4047 | } |
4048 | |
4049 | void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst, |
4050 | KRegister src1, KRegister src2) { |
4051 | BasicType etype = T_ILLEGAL; |
4052 | switch(mask_len) { |
4053 | case 2: |
4054 | case 4: |
4055 | case 8: etype = T_BYTE; break; |
4056 | case 16: etype = T_SHORT; break; |
4057 | case 32: etype = T_INT; break; |
4058 | case 64: etype = T_LONG; break; |
4059 | default: fatal("Unsupported type")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4059, "Unsupported type"); ::breakpoint(); } while (0); break; |
4060 | } |
4061 | assert(etype != T_ILLEGAL, "")do { if (!(etype != T_ILLEGAL)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4061, "assert(" "etype != T_ILLEGAL" ") failed", ""); ::breakpoint (); } } while (0); |
4062 | switch(ideal_opc) { |
4063 | case Op_AndVMask: |
4064 | kand(etype, dst, src1, src2); break; |
4065 | case Op_OrVMask: |
4066 | kor(etype, dst, src1, src2); break; |
4067 | case Op_XorVMask: |
4068 | kxor(etype, dst, src1, src2); break; |
4069 | default: |
4070 | fatal("Unsupported masked operation")do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4070, "Unsupported masked operation"); ::breakpoint(); } while (0); break; |
4071 | } |
4072 | } |
4073 | |
4074 | /* |
4075 | * Algorithm for vector D2L and F2I conversions:- |
4076 | * a) Perform vector D2L/F2I cast. |
4077 | * b) Choose fast path if none of the result vector lane contains 0x80000000 value. |
4078 | * It signifies that source value could be any of the special floating point |
4079 | * values(NaN,-Inf,Inf,Max,-Min). |
4080 | * c) Set destination to zero if source is NaN value. |
4081 | * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value. |
4082 | */ |
4083 | |
4084 | void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, |
4085 | KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip, |
4086 | Register scratch, int vec_enc) { |
4087 | Label done; |
4088 | evcvttpd2qq(dst, src, vec_enc); |
4089 | evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch); |
4090 | evpcmpeqq(ktmp1, xtmp1, dst, vec_enc); |
4091 | kortestwl(ktmp1, ktmp1); |
4092 | jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4092); |
4093 | |
4094 | vpxor(xtmp2, xtmp2, xtmp2, vec_enc); |
4095 | evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc); |
4096 | evmovdquq(dst, ktmp2, xtmp2, true, vec_enc); |
4097 | |
4098 | kxorwl(ktmp1, ktmp1, ktmp2); |
4099 | evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc); |
4100 | vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc); |
4101 | evmovdquq(dst, ktmp1, xtmp2, true, vec_enc); |
4102 | bind(done); |
4103 | } |
4104 | |
4105 | void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, |
4106 | XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4, |
4107 | AddressLiteral float_sign_flip, Register scratch, int vec_enc) { |
4108 | Label done; |
4109 | vcvttps2dq(dst, src, vec_enc); |
4110 | vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc); |
4111 | vpcmpeqd(xtmp2, dst, xtmp1, vec_enc); |
4112 | vptest(xtmp2, xtmp2, vec_enc); |
4113 | jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4113); |
4114 | |
4115 | vpcmpeqd(xtmp4, xtmp4, xtmp4, vec_enc); |
4116 | vpxor(xtmp1, xtmp1, xtmp4, vec_enc); |
4117 | |
4118 | vpxor(xtmp4, xtmp4, xtmp4, vec_enc); |
4119 | vcmpps(xtmp3, src, src, Assembler::UNORD_Q, vec_enc); |
4120 | vblendvps(dst, dst, xtmp4, xtmp3, vec_enc); |
4121 | |
4122 | // Recompute the mask for remaining special value. |
4123 | vpxor(xtmp2, xtmp2, xtmp3, vec_enc); |
4124 | // Extract SRC values corresponding to TRUE mask lanes. |
4125 | vpand(xtmp4, xtmp2, src, vec_enc); |
4126 | // Flip mask bits so that MSB bit of MASK lanes corresponding to +ve special |
4127 | // values are set. |
4128 | vpxor(xtmp3, xtmp2, xtmp4, vec_enc); |
4129 | |
4130 | vblendvps(dst, dst, xtmp1, xtmp3, vec_enc); |
4131 | bind(done); |
4132 | } |
4133 | |
4134 | void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, |
4135 | KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip, |
4136 | Register scratch, int vec_enc) { |
4137 | Label done; |
4138 | vcvttps2dq(dst, src, vec_enc); |
4139 | evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch); |
4140 | Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc); |
4141 | kortestwl(ktmp1, ktmp1); |
4142 | jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4142); |
4143 | |
4144 | vpxor(xtmp2, xtmp2, xtmp2, vec_enc); |
4145 | evcmpps(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc); |
4146 | evmovdqul(dst, ktmp2, xtmp2, true, vec_enc); |
4147 | |
4148 | kxorwl(ktmp1, ktmp1, ktmp2); |
4149 | evcmpps(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc); |
4150 | vpternlogd(xtmp2, 0x11, xtmp1, xtmp1, vec_enc); |
4151 | evmovdqul(dst, ktmp1, xtmp2, true, vec_enc); |
4152 | bind(done); |
4153 | } |
4154 | |
4155 | #ifdef _LP641 |
4156 | void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, |
4157 | Register rtmp2, XMMRegister xtmp, int mask_len, |
4158 | int vec_enc) { |
4159 | int index = 0; |
4160 | int vindex = 0; |
4161 | mov64(rtmp1, 0x0101010101010101L); |
4162 | pdep(rtmp1, src, rtmp1); |
4163 | if (mask_len > 8) { |
4164 | movq(rtmp2, src); |
4165 | vpxor(xtmp, xtmp, xtmp, vec_enc); |
4166 | movq(xtmp, rtmp1); |
4167 | } |
4168 | movq(dst, rtmp1); |
4169 | |
4170 | mask_len -= 8; |
4171 | while (mask_len > 0) { |
4172 | assert ((mask_len & 0x7) == 0, "mask must be multiple of 8")do { if (!((mask_len & 0x7) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4172, "assert(" "(mask_len & 0x7) == 0" ") failed", "mask must be multiple of 8" ); ::breakpoint(); } } while (0); |
4173 | index++; |
4174 | if ((index % 2) == 0) { |
4175 | pxor(xtmp, xtmp); |
4176 | } |
4177 | mov64(rtmp1, 0x0101010101010101L); |
4178 | shrq(rtmp2, 8); |
4179 | pdep(rtmp1, rtmp2, rtmp1); |
4180 | pinsrq(xtmp, rtmp1, index % 2); |
4181 | vindex = index / 2; |
4182 | if (vindex) { |
4183 | // Write entire 16 byte vector when both 64 bit |
4184 | // lanes are update to save redundant instructions. |
4185 | if (index % 2) { |
4186 | vinsertf128(dst, dst, xtmp, vindex); |
4187 | } |
4188 | } else { |
4189 | vmovdqu(dst, xtmp); |
4190 | } |
4191 | mask_len -= 8; |
4192 | } |
4193 | } |
4194 | |
4195 | void C2_MacroAssembler::vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen) { |
4196 | switch(opc) { |
4197 | case Op_VectorMaskTrueCount: |
4198 | popcntq(dst, tmp); |
4199 | break; |
4200 | case Op_VectorMaskLastTrue: |
4201 | if (VM_Version::supports_lzcnt()) { |
4202 | lzcntq(tmp, tmp); |
4203 | movl(dst, 63); |
4204 | subl(dst, tmp); |
4205 | } else { |
4206 | movl(dst, -1); |
4207 | bsrq(tmp, tmp); |
4208 | cmov32(Assembler::notZero, dst, tmp); |
4209 | } |
4210 | break; |
4211 | case Op_VectorMaskFirstTrue: |
4212 | if (VM_Version::supports_bmi1()) { |
4213 | if (masklen < 32) { |
4214 | orl(tmp, 1 << masklen); |
4215 | tzcntl(dst, tmp); |
4216 | } else if (masklen == 32) { |
4217 | tzcntl(dst, tmp); |
4218 | } else { |
4219 | assert(masklen == 64, "")do { if (!(masklen == 64)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4219, "assert(" "masklen == 64" ") failed", ""); ::breakpoint (); } } while (0); |
4220 | tzcntq(dst, tmp); |
4221 | } |
4222 | } else { |
4223 | if (masklen < 32) { |
4224 | orl(tmp, 1 << masklen); |
4225 | bsfl(dst, tmp); |
4226 | } else { |
4227 | assert(masklen == 32 || masklen == 64, "")do { if (!(masklen == 32 || masklen == 64)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4227, "assert(" "masklen == 32 || masklen == 64" ") failed" , ""); ::breakpoint(); } } while (0); |
4228 | movl(dst, masklen); |
4229 | if (masklen == 32) { |
4230 | bsfl(tmp, tmp); |
4231 | } else { |
4232 | bsfq(tmp, tmp); |
4233 | } |
4234 | cmov32(Assembler::notZero, dst, tmp); |
4235 | } |
4236 | } |
4237 | break; |
4238 | case Op_VectorMaskToLong: |
4239 | assert(dst == tmp, "Dst and tmp should be the same for toLong operations")do { if (!(dst == tmp)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4239, "assert(" "dst == tmp" ") failed", "Dst and tmp should be the same for toLong operations" ); ::breakpoint(); } } while (0); |
4240 | break; |
4241 | default: assert(false, "Unhandled mask operation")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4241, "assert(" "false" ") failed", "Unhandled mask operation" ); ::breakpoint(); } } while (0); |
4242 | } |
4243 | } |
4244 | |
4245 | void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, |
4246 | int masklen, int masksize, int vec_enc) { |
4247 | assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4247, "assert(" "VM_Version::supports_popcnt()" ") failed", ""); ::breakpoint(); } } while (0); |
4248 | |
4249 | if(VM_Version::supports_avx512bw()) { |
4250 | kmovql(tmp, mask); |
4251 | } else { |
4252 | assert(masklen <= 16, "")do { if (!(masklen <= 16)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4252, "assert(" "masklen <= 16" ") failed", ""); ::breakpoint (); } } while (0); |
4253 | kmovwl(tmp, mask); |
4254 | } |
4255 | |
4256 | // Mask generated out of partial vector comparisons/replicate/mask manipulation |
4257 | // operations needs to be clipped. |
4258 | if (masksize < 16 && opc != Op_VectorMaskFirstTrue) { |
4259 | andq(tmp, (1 << masklen) - 1); |
4260 | } |
4261 | |
4262 | vector_mask_operation_helper(opc, dst, tmp, masklen); |
4263 | } |
4264 | |
4265 | void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp, |
4266 | Register tmp, int masklen, BasicType bt, int vec_enc) { |
4267 | assert(vec_enc == AVX_128bit && VM_Version::supports_avx() ||do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx () || vec_enc == AVX_256bit && (VM_Version::supports_avx2 () || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)" ") failed", ""); ::breakpoint(); } } while (0) |
4268 | vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4), "")do { if (!(vec_enc == AVX_128bit && VM_Version::supports_avx () || vec_enc == AVX_256bit && (VM_Version::supports_avx2 () || type2aelembytes(bt) >= 4))) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4268, "assert(" "vec_enc == AVX_128bit && VM_Version::supports_avx() || vec_enc == AVX_256bit && (VM_Version::supports_avx2() || type2aelembytes(bt) >= 4)" ") failed", ""); ::breakpoint(); } } while (0); |
4269 | assert(VM_Version::supports_popcnt(), "")do { if (!(VM_Version::supports_popcnt())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4269, "assert(" "VM_Version::supports_popcnt()" ") failed", ""); ::breakpoint(); } } while (0); |
4270 | |
4271 | bool need_clip = false; |
4272 | switch(bt) { |
4273 | case T_BOOLEAN: |
4274 | // While masks of other types contain 0, -1; boolean masks contain lane values of 0, 1 |
4275 | vpxor(xtmp, xtmp, xtmp, vec_enc); |
4276 | vpsubb(xtmp, xtmp, mask, vec_enc); |
4277 | vpmovmskb(tmp, xtmp, vec_enc); |
4278 | need_clip = masklen < 16; |
4279 | break; |
4280 | case T_BYTE: |
4281 | vpmovmskb(tmp, mask, vec_enc); |
4282 | need_clip = masklen < 16; |
4283 | break; |
4284 | case T_SHORT: |
4285 | vpacksswb(xtmp, mask, mask, vec_enc); |
4286 | if (masklen >= 16) { |
4287 | vpermpd(xtmp, xtmp, 8, vec_enc); |
4288 | } |
4289 | vpmovmskb(tmp, xtmp, Assembler::AVX_128bit); |
4290 | need_clip = masklen < 16; |
4291 | break; |
4292 | case T_INT: |
4293 | case T_FLOAT: |
4294 | vmovmskps(tmp, mask, vec_enc); |
4295 | need_clip = masklen < 4; |
4296 | break; |
4297 | case T_LONG: |
4298 | case T_DOUBLE: |
4299 | vmovmskpd(tmp, mask, vec_enc); |
4300 | need_clip = masklen < 2; |
4301 | break; |
4302 | default: assert(false, "Unhandled type, %s", type2name(bt))do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp" , 4302, "assert(" "false" ") failed", "Unhandled type, %s", type2name (bt)); ::breakpoint(); } } while (0); |
4303 | } |
4304 | |
4305 | // Mask generated out of partial vector comparisons/replicate/mask manipulation |
4306 | // operations needs to be clipped. |
4307 | if (need_clip && opc != Op_VectorMaskFirstTrue) { |
4308 | // need_clip implies masklen < 32 |
4309 | andq(tmp, (1 << masklen) - 1); |
4310 | } |
4311 | |
4312 | vector_mask_operation_helper(opc, dst, tmp, masklen); |
4313 | } |
4314 | #endif |