File: | jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp |
Warning: | line 2506, column 5 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. | |||
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |||
4 | * | |||
5 | * This code is free software; you can redistribute it and/or modify it | |||
6 | * under the terms of the GNU General Public License version 2 only, as | |||
7 | * published by the Free Software Foundation. | |||
8 | * | |||
9 | * This code is distributed in the hope that it will be useful, but WITHOUT | |||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |||
12 | * version 2 for more details (a copy is included in the LICENSE file that | |||
13 | * accompanied this code). | |||
14 | * | |||
15 | * You should have received a copy of the GNU General Public License version | |||
16 | * 2 along with this work; if not, write to the Free Software Foundation, | |||
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |||
18 | * | |||
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |||
20 | * or visit www.oracle.com if you need additional information or have any | |||
21 | * questions. | |||
22 | * | |||
23 | */ | |||
24 | ||||
25 | #include "precompiled.hpp" | |||
26 | #include "jvm.h" | |||
27 | #include "asm/assembler.hpp" | |||
28 | #include "asm/assembler.inline.hpp" | |||
29 | #include "compiler/compiler_globals.hpp" | |||
30 | #include "compiler/disassembler.hpp" | |||
31 | #include "gc/shared/barrierSet.hpp" | |||
32 | #include "gc/shared/barrierSetAssembler.hpp" | |||
33 | #include "gc/shared/collectedHeap.inline.hpp" | |||
34 | #include "gc/shared/tlab_globals.hpp" | |||
35 | #include "interpreter/bytecodeHistogram.hpp" | |||
36 | #include "interpreter/interpreter.hpp" | |||
37 | #include "memory/resourceArea.hpp" | |||
38 | #include "memory/universe.hpp" | |||
39 | #include "oops/accessDecorators.hpp" | |||
40 | #include "oops/compressedOops.inline.hpp" | |||
41 | #include "oops/klass.inline.hpp" | |||
42 | #include "prims/methodHandles.hpp" | |||
43 | #include "runtime/flags/flagSetting.hpp" | |||
44 | #include "runtime/interfaceSupport.inline.hpp" | |||
45 | #include "runtime/jniHandles.hpp" | |||
46 | #include "runtime/objectMonitor.hpp" | |||
47 | #include "runtime/os.hpp" | |||
48 | #include "runtime/safepoint.hpp" | |||
49 | #include "runtime/safepointMechanism.hpp" | |||
50 | #include "runtime/sharedRuntime.hpp" | |||
51 | #include "runtime/stubRoutines.hpp" | |||
52 | #include "runtime/thread.hpp" | |||
53 | #include "utilities/macros.hpp" | |||
54 | #include "crc32c.h" | |||
55 | ||||
56 | #ifdef PRODUCT | |||
57 | #define BLOCK_COMMENT(str) /* nothing */ | |||
58 | #define STOP(error)block_comment(error); stop(error) stop(error) | |||
59 | #else | |||
60 | #define BLOCK_COMMENT(str) block_comment(str) | |||
61 | #define STOP(error)block_comment(error); stop(error) block_comment(error); stop(error) | |||
62 | #endif | |||
63 | ||||
64 | #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") | |||
65 | ||||
66 | #ifdef ASSERT1 | |||
67 | bool AbstractAssembler::pd_check_instruction_mark() { return true; } | |||
68 | #endif | |||
69 | ||||
70 | static Assembler::Condition reverse[] = { | |||
71 | Assembler::noOverflow /* overflow = 0x0 */ , | |||
72 | Assembler::overflow /* noOverflow = 0x1 */ , | |||
73 | Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , | |||
74 | Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , | |||
75 | Assembler::notZero /* zero = 0x4, equal = 0x4 */ , | |||
76 | Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , | |||
77 | Assembler::above /* belowEqual = 0x6 */ , | |||
78 | Assembler::belowEqual /* above = 0x7 */ , | |||
79 | Assembler::positive /* negative = 0x8 */ , | |||
80 | Assembler::negative /* positive = 0x9 */ , | |||
81 | Assembler::noParity /* parity = 0xa */ , | |||
82 | Assembler::parity /* noParity = 0xb */ , | |||
83 | Assembler::greaterEqual /* less = 0xc */ , | |||
84 | Assembler::less /* greaterEqual = 0xd */ , | |||
85 | Assembler::greater /* lessEqual = 0xe */ , | |||
86 | Assembler::lessEqual /* greater = 0xf, */ | |||
87 | ||||
88 | }; | |||
89 | ||||
90 | ||||
91 | // Implementation of MacroAssembler | |||
92 | ||||
93 | // First all the versions that have distinct versions depending on 32/64 bit | |||
94 | // Unless the difference is trivial (1 line or so). | |||
95 | ||||
96 | #ifndef _LP641 | |||
97 | ||||
98 | // 32bit versions | |||
99 | ||||
100 | Address MacroAssembler::as_Address(AddressLiteral adr) { | |||
101 | return Address(adr.target(), adr.rspec()); | |||
102 | } | |||
103 | ||||
104 | Address MacroAssembler::as_Address(ArrayAddress adr) { | |||
105 | return Address::make_array(adr); | |||
106 | } | |||
107 | ||||
108 | void MacroAssembler::call_VM_leaf_base(address entry_point, | |||
109 | int number_of_arguments) { | |||
110 | call(RuntimeAddress(entry_point)); | |||
111 | increment(rsp, number_of_arguments * wordSize); | |||
112 | } | |||
113 | ||||
114 | void MacroAssembler::cmpklass(Address src1, Metadata* obj) { | |||
115 | cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |||
116 | } | |||
117 | ||||
118 | ||||
119 | void MacroAssembler::cmpklass(Register src1, Metadata* obj) { | |||
120 | cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |||
121 | } | |||
122 | ||||
123 | void MacroAssembler::cmpoop(Address src1, jobject obj) { | |||
124 | cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |||
125 | } | |||
126 | ||||
127 | void MacroAssembler::cmpoop(Register src1, jobject obj) { | |||
128 | cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |||
129 | } | |||
130 | ||||
131 | void MacroAssembler::extend_sign(Register hi, Register lo) { | |||
132 | // According to Intel Doc. AP-526, "Integer Divide", p.18. | |||
133 | if (VM_Version::is_P6() && hi == rdx && lo == rax) { | |||
134 | cdql(); | |||
135 | } else { | |||
136 | movl(hi, lo); | |||
137 | sarl(hi, 31); | |||
138 | } | |||
139 | } | |||
140 | ||||
141 | void MacroAssembler::jC2(Register tmp, Label& L) { | |||
142 | // set parity bit if FPU flag C2 is set (via rax) | |||
143 | save_rax(tmp); | |||
144 | fwait(); fnstsw_ax(); | |||
145 | sahf(); | |||
146 | restore_rax(tmp); | |||
147 | // branch | |||
148 | jcc(Assembler::parity, L); | |||
149 | } | |||
150 | ||||
151 | void MacroAssembler::jnC2(Register tmp, Label& L) { | |||
152 | // set parity bit if FPU flag C2 is set (via rax) | |||
153 | save_rax(tmp); | |||
154 | fwait(); fnstsw_ax(); | |||
155 | sahf(); | |||
156 | restore_rax(tmp); | |||
157 | // branch | |||
158 | jcc(Assembler::noParity, L); | |||
159 | } | |||
160 | ||||
161 | // 32bit can do a case table jump in one instruction but we no longer allow the base | |||
162 | // to be installed in the Address class | |||
163 | void MacroAssembler::jump(ArrayAddress entry) { | |||
164 | jmp(as_Address(entry)); | |||
165 | } | |||
166 | ||||
167 | // Note: y_lo will be destroyed | |||
168 | void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |||
169 | // Long compare for Java (semantics as described in JVM spec.) | |||
170 | Label high, low, done; | |||
171 | ||||
172 | cmpl(x_hi, y_hi); | |||
173 | jcc(Assembler::less, low); | |||
174 | jcc(Assembler::greater, high); | |||
175 | // x_hi is the return register | |||
176 | xorl(x_hi, x_hi); | |||
177 | cmpl(x_lo, y_lo); | |||
178 | jcc(Assembler::below, low); | |||
179 | jcc(Assembler::equal, done); | |||
180 | ||||
181 | bind(high); | |||
182 | xorl(x_hi, x_hi); | |||
183 | increment(x_hi); | |||
184 | jmp(done); | |||
185 | ||||
186 | bind(low); | |||
187 | xorl(x_hi, x_hi); | |||
188 | decrementl(x_hi); | |||
189 | ||||
190 | bind(done); | |||
191 | } | |||
192 | ||||
193 | void MacroAssembler::lea(Register dst, AddressLiteral src) { | |||
194 | mov_literal32(dst, (int32_t)src.target(), src.rspec()); | |||
195 | } | |||
196 | ||||
197 | void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |||
198 | // leal(dst, as_Address(adr)); | |||
199 | // see note in movl as to why we must use a move | |||
200 | mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); | |||
201 | } | |||
202 | ||||
203 | void MacroAssembler::leave() { | |||
204 | mov(rsp, rbp); | |||
205 | pop(rbp); | |||
206 | } | |||
207 | ||||
208 | void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { | |||
209 | // Multiplication of two Java long values stored on the stack | |||
210 | // as illustrated below. Result is in rdx:rax. | |||
211 | // | |||
212 | // rsp ---> [ ?? ] \ \ | |||
213 | // .... | y_rsp_offset | | |||
214 | // [ y_lo ] / (in bytes) | x_rsp_offset | |||
215 | // [ y_hi ] | (in bytes) | |||
216 | // .... | | |||
217 | // [ x_lo ] / | |||
218 | // [ x_hi ] | |||
219 | // .... | |||
220 | // | |||
221 | // Basic idea: lo(result) = lo(x_lo * y_lo) | |||
222 | // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) | |||
223 | Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); | |||
224 | Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); | |||
225 | Label quick; | |||
226 | // load x_hi, y_hi and check if quick | |||
227 | // multiplication is possible | |||
228 | movl(rbx, x_hi); | |||
229 | movl(rcx, y_hi); | |||
230 | movl(rax, rbx); | |||
231 | orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 | |||
232 | jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply | |||
233 | // do full multiplication | |||
234 | // 1st step | |||
235 | mull(y_lo); // x_hi * y_lo | |||
236 | movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, | |||
237 | // 2nd step | |||
238 | movl(rax, x_lo); | |||
239 | mull(rcx); // x_lo * y_hi | |||
240 | addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, | |||
241 | // 3rd step | |||
242 | bind(quick); // note: rbx, = 0 if quick multiply! | |||
243 | movl(rax, x_lo); | |||
244 | mull(y_lo); // x_lo * y_lo | |||
245 | addl(rdx, rbx); // correct hi(x_lo * y_lo) | |||
246 | } | |||
247 | ||||
248 | void MacroAssembler::lneg(Register hi, Register lo) { | |||
249 | negl(lo); | |||
250 | adcl(hi, 0); | |||
251 | negl(hi); | |||
252 | } | |||
253 | ||||
254 | void MacroAssembler::lshl(Register hi, Register lo) { | |||
255 | // Java shift left long support (semantics as described in JVM spec., p.305) | |||
256 | // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) | |||
257 | // shift value is in rcx ! | |||
258 | assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 258, "assert(" "hi != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); | |||
259 | assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 259, "assert(" "lo != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); | |||
260 | const Register s = rcx; // shift count | |||
261 | const int n = BitsPerWord; | |||
262 | Label L; | |||
263 | andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |||
264 | cmpl(s, n); // if (s < n) | |||
265 | jcc(Assembler::less, L); // else (s >= n) | |||
266 | movl(hi, lo); // x := x << n | |||
267 | xorl(lo, lo); | |||
268 | // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |||
269 | bind(L); // s (mod n) < n | |||
270 | shldl(hi, lo); // x := x << s | |||
271 | shll(lo); | |||
272 | } | |||
273 | ||||
274 | ||||
275 | void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { | |||
276 | // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) | |||
277 | // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) | |||
278 | assert(hi != rcx, "must not use rcx")do { if (!(hi != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 278, "assert(" "hi != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); | |||
279 | assert(lo != rcx, "must not use rcx")do { if (!(lo != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 279, "assert(" "lo != rcx" ") failed", "must not use rcx"); ::breakpoint(); } } while (0); | |||
280 | const Register s = rcx; // shift count | |||
281 | const int n = BitsPerWord; | |||
282 | Label L; | |||
283 | andl(s, 0x3f); // s := s & 0x3f (s < 0x40) | |||
284 | cmpl(s, n); // if (s < n) | |||
285 | jcc(Assembler::less, L); // else (s >= n) | |||
286 | movl(lo, hi); // x := x >> n | |||
287 | if (sign_extension) sarl(hi, 31); | |||
288 | else xorl(hi, hi); | |||
289 | // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! | |||
290 | bind(L); // s (mod n) < n | |||
291 | shrdl(lo, hi); // x := x >> s | |||
292 | if (sign_extension) sarl(hi); | |||
293 | else shrl(hi); | |||
294 | } | |||
295 | ||||
296 | void MacroAssembler::movoop(Register dst, jobject obj) { | |||
297 | mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |||
298 | } | |||
299 | ||||
300 | void MacroAssembler::movoop(Address dst, jobject obj) { | |||
301 | mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); | |||
302 | } | |||
303 | ||||
304 | void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |||
305 | mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |||
306 | } | |||
307 | ||||
308 | void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |||
309 | mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); | |||
310 | } | |||
311 | ||||
312 | void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { | |||
313 | // scratch register is not used, | |||
314 | // it is defined to match parameters of 64-bit version of this method. | |||
315 | if (src.is_lval()) { | |||
316 | mov_literal32(dst, (intptr_t)src.target(), src.rspec()); | |||
317 | } else { | |||
318 | movl(dst, as_Address(src)); | |||
319 | } | |||
320 | } | |||
321 | ||||
322 | void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |||
323 | movl(as_Address(dst), src); | |||
324 | } | |||
325 | ||||
326 | void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |||
327 | movl(dst, as_Address(src)); | |||
328 | } | |||
329 | ||||
330 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |||
331 | void MacroAssembler::movptr(Address dst, intptr_t src) { | |||
332 | movl(dst, src); | |||
333 | } | |||
334 | ||||
335 | ||||
336 | void MacroAssembler::pop_callee_saved_registers() { | |||
337 | pop(rcx); | |||
338 | pop(rdx); | |||
339 | pop(rdi); | |||
340 | pop(rsi); | |||
341 | } | |||
342 | ||||
343 | void MacroAssembler::push_callee_saved_registers() { | |||
344 | push(rsi); | |||
345 | push(rdi); | |||
346 | push(rdx); | |||
347 | push(rcx); | |||
348 | } | |||
349 | ||||
350 | void MacroAssembler::pushoop(jobject obj) { | |||
351 | push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); | |||
352 | } | |||
353 | ||||
354 | void MacroAssembler::pushklass(Metadata* obj) { | |||
355 | push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); | |||
356 | } | |||
357 | ||||
358 | void MacroAssembler::pushptr(AddressLiteral src) { | |||
359 | if (src.is_lval()) { | |||
360 | push_literal32((int32_t)src.target(), src.rspec()); | |||
361 | } else { | |||
362 | pushl(as_Address(src)); | |||
363 | } | |||
364 | } | |||
365 | ||||
366 | static void pass_arg0(MacroAssembler* masm, Register arg) { | |||
367 | masm->push(arg); | |||
368 | } | |||
369 | ||||
370 | static void pass_arg1(MacroAssembler* masm, Register arg) { | |||
371 | masm->push(arg); | |||
372 | } | |||
373 | ||||
374 | static void pass_arg2(MacroAssembler* masm, Register arg) { | |||
375 | masm->push(arg); | |||
376 | } | |||
377 | ||||
378 | static void pass_arg3(MacroAssembler* masm, Register arg) { | |||
379 | masm->push(arg); | |||
380 | } | |||
381 | ||||
382 | #ifndef PRODUCT | |||
383 | extern "C" void findpc(intptr_t x); | |||
384 | #endif | |||
385 | ||||
386 | void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { | |||
387 | // In order to get locks to work, we need to fake a in_VM state | |||
388 | JavaThread* thread = JavaThread::current(); | |||
389 | JavaThreadState saved_state = thread->thread_state(); | |||
390 | thread->set_thread_state(_thread_in_vm); | |||
391 | if (ShowMessageBoxOnError) { | |||
392 | JavaThread* thread = JavaThread::current(); | |||
393 | JavaThreadState saved_state = thread->thread_state(); | |||
394 | thread->set_thread_state(_thread_in_vm); | |||
395 | if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |||
396 | ttyLocker ttyl; | |||
397 | BytecodeCounter::print(); | |||
398 | } | |||
399 | // To see where a verify_oop failed, get $ebx+40/X for this frame. | |||
400 | // This is the value of eip which points to where verify_oop will return. | |||
401 | if (os::message_box(msg, "Execution stopped, print registers?")) { | |||
402 | print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); | |||
403 | BREAKPOINT::breakpoint(); | |||
404 | } | |||
405 | } | |||
406 | fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 406, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0); | |||
407 | } | |||
408 | ||||
409 | void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { | |||
410 | ttyLocker ttyl; | |||
411 | FlagSetting fs(Debugging, true); | |||
412 | tty->print_cr("eip = 0x%08x", eip); | |||
413 | #ifndef PRODUCT | |||
414 | if ((WizardMode || Verbose) && PrintMiscellaneous) { | |||
415 | tty->cr(); | |||
416 | findpc(eip); | |||
417 | tty->cr(); | |||
418 | } | |||
419 | #endif | |||
420 | #define PRINT_REG(rax) \ | |||
421 | { tty->print("%s = ", #rax); os::print_location(tty, rax); } | |||
422 | PRINT_REG(rax); | |||
423 | PRINT_REG(rbx); | |||
424 | PRINT_REG(rcx); | |||
425 | PRINT_REG(rdx); | |||
426 | PRINT_REG(rdi); | |||
427 | PRINT_REG(rsi); | |||
428 | PRINT_REG(rbp); | |||
429 | PRINT_REG(rsp); | |||
430 | #undef PRINT_REG | |||
431 | // Print some words near top of staack. | |||
432 | int* dump_sp = (int*) rsp; | |||
433 | for (int col1 = 0; col1 < 8; col1++) { | |||
434 | tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |||
435 | os::print_location(tty, *dump_sp++); | |||
436 | } | |||
437 | for (int row = 0; row < 16; row++) { | |||
438 | tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |||
439 | for (int col = 0; col < 8; col++) { | |||
440 | tty->print(" 0x%08x", *dump_sp++); | |||
441 | } | |||
442 | tty->cr(); | |||
443 | } | |||
444 | // Print some instructions around pc: | |||
445 | Disassembler::decode((address)eip-64, (address)eip); | |||
446 | tty->print_cr("--------"); | |||
447 | Disassembler::decode((address)eip, (address)eip+32); | |||
448 | } | |||
449 | ||||
450 | void MacroAssembler::stop(const char* msg) { | |||
451 | ExternalAddress message((address)msg); | |||
452 | // push address of message | |||
453 | pushptr(message.addr()); | |||
454 | { Label L; call(L, relocInfo::none); bind(L); } // push eip | |||
455 | pusha(); // push registers | |||
456 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)((address)((address_word)(MacroAssembler::debug32))))); | |||
457 | hlt(); | |||
458 | } | |||
459 | ||||
460 | void MacroAssembler::warn(const char* msg) { | |||
461 | push_CPU_state(); | |||
462 | ||||
463 | ExternalAddress message((address) msg); | |||
464 | // push address of message | |||
465 | pushptr(message.addr()); | |||
466 | ||||
467 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning))))); | |||
468 | addl(rsp, wordSize); // discard argument | |||
469 | pop_CPU_state(); | |||
470 | } | |||
471 | ||||
472 | void MacroAssembler::print_state() { | |||
473 | { Label L; call(L, relocInfo::none); bind(L); } // push eip | |||
474 | pusha(); // push registers | |||
475 | ||||
476 | push_CPU_state(); | |||
477 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)((address)((address_word)(MacroAssembler::print_state32))))); | |||
478 | pop_CPU_state(); | |||
479 | ||||
480 | popa(); | |||
481 | addl(rsp, wordSize); | |||
482 | } | |||
483 | ||||
484 | #else // _LP64 | |||
485 | ||||
486 | // 64 bit versions | |||
487 | ||||
488 | Address MacroAssembler::as_Address(AddressLiteral adr) { | |||
489 | // amd64 always does this as a pc-rel | |||
490 | // we can be absolute or disp based on the instruction type | |||
491 | // jmp/call are displacements others are absolute | |||
492 | assert(!adr.is_lval(), "must be rval")do { if (!(!adr.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 492, "assert(" "!adr.is_lval()" ") failed", "must be rval") ; ::breakpoint(); } } while (0); | |||
493 | assert(reachable(adr), "must be")do { if (!(reachable(adr))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 493, "assert(" "reachable(adr)" ") failed", "must be"); ::breakpoint (); } } while (0); | |||
494 | return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); | |||
495 | ||||
496 | } | |||
497 | ||||
498 | Address MacroAssembler::as_Address(ArrayAddress adr) { | |||
499 | AddressLiteral base = adr.base(); | |||
500 | lea(rscratch1, base); | |||
501 | Address index = adr.index(); | |||
502 | assert(index._disp == 0, "must not have disp")do { if (!(index._disp == 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 502, "assert(" "index._disp == 0" ") failed", "must not have disp" ); ::breakpoint(); } } while (0); // maybe it can? | |||
503 | Address array(rscratch1, index._index, index._scale, index._disp); | |||
504 | return array; | |||
505 | } | |||
506 | ||||
507 | void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { | |||
508 | Label L, E; | |||
509 | ||||
510 | #ifdef _WIN64 | |||
511 | // Windows always allocates space for it's register args | |||
512 | assert(num_args <= 4, "only register arguments supported")do { if (!(num_args <= 4)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 512, "assert(" "num_args <= 4" ") failed", "only register arguments supported" ); ::breakpoint(); } } while (0); | |||
513 | subq(rsp, frame::arg_reg_save_area_bytes); | |||
514 | #endif | |||
515 | ||||
516 | // Align stack if necessary | |||
517 | testl(rsp, 15); | |||
518 | jcc(Assembler::zero, L); | |||
519 | ||||
520 | subq(rsp, 8); | |||
521 | { | |||
522 | call(RuntimeAddress(entry_point)); | |||
523 | } | |||
524 | addq(rsp, 8); | |||
525 | jmp(E); | |||
526 | ||||
527 | bind(L); | |||
528 | { | |||
529 | call(RuntimeAddress(entry_point)); | |||
530 | } | |||
531 | ||||
532 | bind(E); | |||
533 | ||||
534 | #ifdef _WIN64 | |||
535 | // restore stack pointer | |||
536 | addq(rsp, frame::arg_reg_save_area_bytes); | |||
537 | #endif | |||
538 | ||||
539 | } | |||
540 | ||||
541 | void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { | |||
542 | assert(!src2.is_lval(), "should use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 542, "assert(" "!src2.is_lval()" ") failed", "should use cmpptr" ); ::breakpoint(); } } while (0); | |||
543 | ||||
544 | if (reachable(src2)) { | |||
545 | cmpq(src1, as_Address(src2)); | |||
546 | } else { | |||
547 | lea(rscratch1, src2); | |||
548 | Assembler::cmpq(src1, Address(rscratch1, 0)); | |||
549 | } | |||
550 | } | |||
551 | ||||
552 | int MacroAssembler::corrected_idivq(Register reg) { | |||
553 | // Full implementation of Java ldiv and lrem; checks for special | |||
554 | // case as described in JVM spec., p.243 & p.271. The function | |||
555 | // returns the (pc) offset of the idivl instruction - may be needed | |||
556 | // for implicit exceptions. | |||
557 | // | |||
558 | // normal case special case | |||
559 | // | |||
560 | // input : rax: dividend min_long | |||
561 | // reg: divisor (may not be eax/edx) -1 | |||
562 | // | |||
563 | // output: rax: quotient (= rax idiv reg) min_long | |||
564 | // rdx: remainder (= rax irem reg) 0 | |||
565 | assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 565, "assert(" "reg != rax && reg != rdx" ") failed" , "reg cannot be rax or rdx register"); ::breakpoint(); } } while (0); | |||
566 | static const int64_t min_long = 0x8000000000000000; | |||
567 | Label normal_case, special_case; | |||
568 | ||||
569 | // check for special case | |||
570 | cmp64(rax, ExternalAddress((address) &min_long)); | |||
571 | jcc(Assembler::notEqual, normal_case); | |||
572 | xorl(rdx, rdx); // prepare rdx for possible special case (where | |||
573 | // remainder = 0) | |||
574 | cmpq(reg, -1); | |||
575 | jcc(Assembler::equal, special_case); | |||
576 | ||||
577 | // handle normal case | |||
578 | bind(normal_case); | |||
579 | cdqq(); | |||
580 | int idivq_offset = offset(); | |||
581 | idivq(reg); | |||
582 | ||||
583 | // normal and special case exit | |||
584 | bind(special_case); | |||
585 | ||||
586 | return idivq_offset; | |||
587 | } | |||
588 | ||||
589 | void MacroAssembler::decrementq(Register reg, int value) { | |||
590 | if (value == min_jint) { subq(reg, value); return; } | |||
591 | if (value < 0) { incrementq(reg, -value); return; } | |||
592 | if (value == 0) { ; return; } | |||
593 | if (value == 1 && UseIncDec) { decq(reg) ; return; } | |||
594 | /* else */ { subq(reg, value) ; return; } | |||
595 | } | |||
596 | ||||
597 | void MacroAssembler::decrementq(Address dst, int value) { | |||
598 | if (value == min_jint) { subq(dst, value); return; } | |||
599 | if (value < 0) { incrementq(dst, -value); return; } | |||
600 | if (value == 0) { ; return; } | |||
601 | if (value == 1 && UseIncDec) { decq(dst) ; return; } | |||
602 | /* else */ { subq(dst, value) ; return; } | |||
603 | } | |||
604 | ||||
605 | void MacroAssembler::incrementq(AddressLiteral dst) { | |||
606 | if (reachable(dst)) { | |||
607 | incrementq(as_Address(dst)); | |||
608 | } else { | |||
609 | lea(rscratch1, dst); | |||
610 | incrementq(Address(rscratch1, 0)); | |||
611 | } | |||
612 | } | |||
613 | ||||
614 | void MacroAssembler::incrementq(Register reg, int value) { | |||
615 | if (value == min_jint) { addq(reg, value); return; } | |||
616 | if (value < 0) { decrementq(reg, -value); return; } | |||
617 | if (value == 0) { ; return; } | |||
618 | if (value == 1 && UseIncDec) { incq(reg) ; return; } | |||
619 | /* else */ { addq(reg, value) ; return; } | |||
620 | } | |||
621 | ||||
622 | void MacroAssembler::incrementq(Address dst, int value) { | |||
623 | if (value == min_jint) { addq(dst, value); return; } | |||
624 | if (value < 0) { decrementq(dst, -value); return; } | |||
625 | if (value == 0) { ; return; } | |||
626 | if (value == 1 && UseIncDec) { incq(dst) ; return; } | |||
627 | /* else */ { addq(dst, value) ; return; } | |||
628 | } | |||
629 | ||||
630 | // 32bit can do a case table jump in one instruction but we no longer allow the base | |||
631 | // to be installed in the Address class | |||
632 | void MacroAssembler::jump(ArrayAddress entry) { | |||
633 | lea(rscratch1, entry.base()); | |||
634 | Address dispatch = entry.index(); | |||
635 | assert(dispatch._base == noreg, "must be")do { if (!(dispatch._base == noreg)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 635, "assert(" "dispatch._base == noreg" ") failed", "must be" ); ::breakpoint(); } } while (0); | |||
636 | dispatch._base = rscratch1; | |||
637 | jmp(dispatch); | |||
638 | } | |||
639 | ||||
640 | void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { | |||
641 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 641); ::breakpoint(); } while (0); // 64bit doesn't use two regs | |||
642 | cmpq(x_lo, y_lo); | |||
643 | } | |||
644 | ||||
645 | void MacroAssembler::lea(Register dst, AddressLiteral src) { | |||
646 | mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |||
647 | } | |||
648 | ||||
649 | void MacroAssembler::lea(Address dst, AddressLiteral adr) { | |||
650 | mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); | |||
651 | movptr(dst, rscratch1); | |||
652 | } | |||
653 | ||||
654 | void MacroAssembler::leave() { | |||
655 | // %%% is this really better? Why not on 32bit too? | |||
656 | emit_int8((unsigned char)0xC9); // LEAVE | |||
657 | } | |||
658 | ||||
659 | void MacroAssembler::lneg(Register hi, Register lo) { | |||
660 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 660); ::breakpoint(); } while (0); // 64bit doesn't use two regs | |||
661 | negq(lo); | |||
662 | } | |||
663 | ||||
664 | void MacroAssembler::movoop(Register dst, jobject obj) { | |||
665 | mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |||
666 | } | |||
667 | ||||
668 | void MacroAssembler::movoop(Address dst, jobject obj) { | |||
669 | mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); | |||
670 | movq(dst, rscratch1); | |||
671 | } | |||
672 | ||||
673 | void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { | |||
674 | mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |||
675 | } | |||
676 | ||||
677 | void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { | |||
678 | mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); | |||
679 | movq(dst, rscratch1); | |||
680 | } | |||
681 | ||||
682 | void MacroAssembler::movptr(Register dst, AddressLiteral src, Register scratch) { | |||
683 | if (src.is_lval()) { | |||
684 | mov_literal64(dst, (intptr_t)src.target(), src.rspec()); | |||
685 | } else { | |||
686 | if (reachable(src)) { | |||
687 | movq(dst, as_Address(src)); | |||
688 | } else { | |||
689 | lea(scratch, src); | |||
690 | movq(dst, Address(scratch, 0)); | |||
691 | } | |||
692 | } | |||
693 | } | |||
694 | ||||
695 | void MacroAssembler::movptr(ArrayAddress dst, Register src) { | |||
696 | movq(as_Address(dst), src); | |||
697 | } | |||
698 | ||||
699 | void MacroAssembler::movptr(Register dst, ArrayAddress src) { | |||
700 | movq(dst, as_Address(src)); | |||
701 | } | |||
702 | ||||
703 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |||
704 | void MacroAssembler::movptr(Address dst, intptr_t src) { | |||
705 | if (is_simm32(src)) { | |||
706 | movptr(dst, checked_cast<int32_t>(src)); | |||
707 | } else { | |||
708 | mov64(rscratch1, src); | |||
709 | movq(dst, rscratch1); | |||
710 | } | |||
711 | } | |||
712 | ||||
713 | // These are mostly for initializing NULL | |||
714 | void MacroAssembler::movptr(Address dst, int32_t src) { | |||
715 | movslq(dst, src); | |||
716 | } | |||
717 | ||||
718 | void MacroAssembler::movptr(Register dst, int32_t src) { | |||
719 | mov64(dst, (intptr_t)src); | |||
720 | } | |||
721 | ||||
722 | void MacroAssembler::pushoop(jobject obj) { | |||
723 | movoop(rscratch1, obj); | |||
724 | push(rscratch1); | |||
725 | } | |||
726 | ||||
727 | void MacroAssembler::pushklass(Metadata* obj) { | |||
728 | mov_metadata(rscratch1, obj); | |||
729 | push(rscratch1); | |||
730 | } | |||
731 | ||||
732 | void MacroAssembler::pushptr(AddressLiteral src) { | |||
733 | lea(rscratch1, src); | |||
734 | if (src.is_lval()) { | |||
735 | push(rscratch1); | |||
736 | } else { | |||
737 | pushq(Address(rscratch1, 0)); | |||
738 | } | |||
739 | } | |||
740 | ||||
741 | void MacroAssembler::reset_last_Java_frame(bool clear_fp) { | |||
742 | reset_last_Java_frame(r15_thread, clear_fp); | |||
743 | } | |||
744 | ||||
745 | void MacroAssembler::set_last_Java_frame(Register last_java_sp, | |||
746 | Register last_java_fp, | |||
747 | address last_java_pc) { | |||
748 | vzeroupper(); | |||
749 | // determine last_java_sp register | |||
750 | if (!last_java_sp->is_valid()) { | |||
751 | last_java_sp = rsp; | |||
752 | } | |||
753 | ||||
754 | // last_java_fp is optional | |||
755 | if (last_java_fp->is_valid()) { | |||
756 | movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), | |||
757 | last_java_fp); | |||
758 | } | |||
759 | ||||
760 | // last_java_pc is optional | |||
761 | if (last_java_pc != NULL__null) { | |||
762 | Address java_pc(r15_thread, | |||
763 | JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); | |||
764 | lea(rscratch1, InternalAddress(last_java_pc)); | |||
765 | movptr(java_pc, rscratch1); | |||
766 | } | |||
767 | ||||
768 | movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |||
769 | } | |||
770 | ||||
771 | static void pass_arg0(MacroAssembler* masm, Register arg) { | |||
772 | if (c_rarg0 != arg ) { | |||
773 | masm->mov(c_rarg0, arg); | |||
774 | } | |||
775 | } | |||
776 | ||||
777 | static void pass_arg1(MacroAssembler* masm, Register arg) { | |||
778 | if (c_rarg1 != arg ) { | |||
779 | masm->mov(c_rarg1, arg); | |||
780 | } | |||
781 | } | |||
782 | ||||
783 | static void pass_arg2(MacroAssembler* masm, Register arg) { | |||
784 | if (c_rarg2 != arg ) { | |||
785 | masm->mov(c_rarg2, arg); | |||
786 | } | |||
787 | } | |||
788 | ||||
789 | static void pass_arg3(MacroAssembler* masm, Register arg) { | |||
790 | if (c_rarg3 != arg ) { | |||
791 | masm->mov(c_rarg3, arg); | |||
792 | } | |||
793 | } | |||
794 | ||||
795 | void MacroAssembler::stop(const char* msg) { | |||
796 | if (ShowMessageBoxOnError) { | |||
797 | address rip = pc(); | |||
798 | pusha(); // get regs on stack | |||
799 | lea(c_rarg1, InternalAddress(rip)); | |||
800 | movq(c_rarg2, rsp); // pass pointer to regs array | |||
801 | } | |||
802 | lea(c_rarg0, ExternalAddress((address) msg)); | |||
803 | andq(rsp, -16); // align stack as required by ABI | |||
804 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)((address)((address_word)(MacroAssembler::debug64))))); | |||
805 | hlt(); | |||
806 | } | |||
807 | ||||
808 | void MacroAssembler::warn(const char* msg) { | |||
809 | push(rbp); | |||
810 | movq(rbp, rsp); | |||
811 | andq(rsp, -16); // align stack as required by push_CPU_state and call | |||
812 | push_CPU_state(); // keeps alignment at 16 bytes | |||
813 | lea(c_rarg0, ExternalAddress((address) msg)); | |||
814 | lea(rax, ExternalAddress(CAST_FROM_FN_PTR(address, warning)((address)((address_word)(warning))))); | |||
815 | call(rax); | |||
816 | pop_CPU_state(); | |||
817 | mov(rsp, rbp); | |||
818 | pop(rbp); | |||
819 | } | |||
820 | ||||
821 | void MacroAssembler::print_state() { | |||
822 | address rip = pc(); | |||
823 | pusha(); // get regs on stack | |||
824 | push(rbp); | |||
825 | movq(rbp, rsp); | |||
826 | andq(rsp, -16); // align stack as required by push_CPU_state and call | |||
827 | push_CPU_state(); // keeps alignment at 16 bytes | |||
828 | ||||
829 | lea(c_rarg0, InternalAddress(rip)); | |||
830 | lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array | |||
831 | call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64)((address)((address_word)(MacroAssembler::print_state64))), c_rarg0, c_rarg1); | |||
832 | ||||
833 | pop_CPU_state(); | |||
834 | mov(rsp, rbp); | |||
835 | pop(rbp); | |||
836 | popa(); | |||
837 | } | |||
838 | ||||
839 | #ifndef PRODUCT | |||
840 | extern "C" void findpc(intptr_t x); | |||
841 | #endif | |||
842 | ||||
843 | void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { | |||
844 | // In order to get locks to work, we need to fake a in_VM state | |||
845 | if (ShowMessageBoxOnError) { | |||
846 | JavaThread* thread = JavaThread::current(); | |||
847 | JavaThreadState saved_state = thread->thread_state(); | |||
848 | thread->set_thread_state(_thread_in_vm); | |||
849 | #ifndef PRODUCT | |||
850 | if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { | |||
851 | ttyLocker ttyl; | |||
852 | BytecodeCounter::print(); | |||
853 | } | |||
854 | #endif | |||
855 | // To see where a verify_oop failed, get $ebx+40/X for this frame. | |||
856 | // XXX correct this offset for amd64 | |||
857 | // This is the value of eip which points to where verify_oop will return. | |||
858 | if (os::message_box(msg, "Execution stopped, print registers?")) { | |||
859 | print_state64(pc, regs); | |||
860 | BREAKPOINT::breakpoint(); | |||
861 | } | |||
862 | } | |||
863 | fatal("DEBUG MESSAGE: %s", msg)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 863, "DEBUG MESSAGE: %s", msg); ::breakpoint(); } while (0); | |||
864 | } | |||
865 | ||||
866 | void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { | |||
867 | ttyLocker ttyl; | |||
868 | FlagSetting fs(Debugging, true); | |||
869 | tty->print_cr("rip = 0x%016lx", (intptr_t)pc); | |||
870 | #ifndef PRODUCT | |||
871 | tty->cr(); | |||
872 | findpc(pc); | |||
873 | tty->cr(); | |||
874 | #endif | |||
875 | #define PRINT_REG(rax, value) \ | |||
876 | { tty->print("%s = ", #rax); os::print_location(tty, value); } | |||
877 | PRINT_REG(rax, regs[15]); | |||
878 | PRINT_REG(rbx, regs[12]); | |||
879 | PRINT_REG(rcx, regs[14]); | |||
880 | PRINT_REG(rdx, regs[13]); | |||
881 | PRINT_REG(rdi, regs[8]); | |||
882 | PRINT_REG(rsi, regs[9]); | |||
883 | PRINT_REG(rbp, regs[10]); | |||
884 | // rsp is actually not stored by pusha(), compute the old rsp from regs (rsp after pusha): regs + 16 = old rsp | |||
885 | PRINT_REG(rsp, (intptr_t)(®s[16])); | |||
886 | PRINT_REG(r8 , regs[7]); | |||
887 | PRINT_REG(r9 , regs[6]); | |||
888 | PRINT_REG(r10, regs[5]); | |||
889 | PRINT_REG(r11, regs[4]); | |||
890 | PRINT_REG(r12, regs[3]); | |||
891 | PRINT_REG(r13, regs[2]); | |||
892 | PRINT_REG(r14, regs[1]); | |||
893 | PRINT_REG(r15, regs[0]); | |||
894 | #undef PRINT_REG | |||
895 | // Print some words near the top of the stack. | |||
896 | int64_t* rsp = ®s[16]; | |||
897 | int64_t* dump_sp = rsp; | |||
898 | for (int col1 = 0; col1 < 8; col1++) { | |||
899 | tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |||
900 | os::print_location(tty, *dump_sp++); | |||
901 | } | |||
902 | for (int row = 0; row < 25; row++) { | |||
903 | tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); | |||
904 | for (int col = 0; col < 4; col++) { | |||
905 | tty->print(" 0x%016lx", (intptr_t)*dump_sp++); | |||
906 | } | |||
907 | tty->cr(); | |||
908 | } | |||
909 | // Print some instructions around pc: | |||
910 | Disassembler::decode((address)pc-64, (address)pc); | |||
911 | tty->print_cr("--------"); | |||
912 | Disassembler::decode((address)pc, (address)pc+32); | |||
913 | } | |||
914 | ||||
915 | // The java_calling_convention describes stack locations as ideal slots on | |||
916 | // a frame with no abi restrictions. Since we must observe abi restrictions | |||
917 | // (like the placement of the register window) the slots must be biased by | |||
918 | // the following value. | |||
919 | static int reg2offset_in(VMReg r) { | |||
920 | // Account for saved rbp and return address | |||
921 | // This should really be in_preserve_stack_slots | |||
922 | return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size; | |||
923 | } | |||
924 | ||||
925 | static int reg2offset_out(VMReg r) { | |||
926 | return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; | |||
927 | } | |||
928 | ||||
929 | // A long move | |||
930 | void MacroAssembler::long_move(VMRegPair src, VMRegPair dst) { | |||
931 | ||||
932 | // The calling conventions assures us that each VMregpair is either | |||
933 | // all really one physical register or adjacent stack slots. | |||
934 | ||||
935 | if (src.is_single_phys_reg() ) { | |||
936 | if (dst.is_single_phys_reg()) { | |||
937 | if (dst.first() != src.first()) { | |||
938 | mov(dst.first()->as_Register(), src.first()->as_Register()); | |||
939 | } | |||
940 | } else { | |||
941 | assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 941, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); | |||
942 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); | |||
943 | } | |||
944 | } else if (dst.is_single_phys_reg()) { | |||
945 | assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 945, "assert(" "src.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); | |||
946 | movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first()))); | |||
947 | } else { | |||
948 | assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg() )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 948, "assert(" "src.is_single_reg() && dst.is_single_reg()" ") failed", "not stack pairs"); ::breakpoint(); } } while (0 ); | |||
949 | movq(rax, Address(rbp, reg2offset_in(src.first()))); | |||
950 | movq(Address(rsp, reg2offset_out(dst.first())), rax); | |||
951 | } | |||
952 | } | |||
953 | ||||
954 | // A double move | |||
955 | void MacroAssembler::double_move(VMRegPair src, VMRegPair dst) { | |||
956 | ||||
957 | // The calling conventions assures us that each VMregpair is either | |||
958 | // all really one physical register or adjacent stack slots. | |||
959 | ||||
960 | if (src.is_single_phys_reg() ) { | |||
961 | if (dst.is_single_phys_reg()) { | |||
962 | // In theory these overlap but the ordering is such that this is likely a nop | |||
963 | if ( src.first() != dst.first()) { | |||
964 | movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); | |||
965 | } | |||
966 | } else { | |||
967 | assert(dst.is_single_reg(), "not a stack pair")do { if (!(dst.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 967, "assert(" "dst.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); | |||
968 | movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); | |||
969 | } | |||
970 | } else if (dst.is_single_phys_reg()) { | |||
971 | assert(src.is_single_reg(), "not a stack pair")do { if (!(src.is_single_reg())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 971, "assert(" "src.is_single_reg()" ") failed", "not a stack pair" ); ::breakpoint(); } } while (0); | |||
972 | movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first()))); | |||
973 | } else { | |||
974 | assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs")do { if (!(src.is_single_reg() && dst.is_single_reg() )) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 974, "assert(" "src.is_single_reg() && dst.is_single_reg()" ") failed", "not stack pairs"); ::breakpoint(); } } while (0 ); | |||
975 | movq(rax, Address(rbp, reg2offset_in(src.first()))); | |||
976 | movq(Address(rsp, reg2offset_out(dst.first())), rax); | |||
977 | } | |||
978 | } | |||
979 | ||||
980 | ||||
981 | // A float arg may have to do float reg int reg conversion | |||
982 | void MacroAssembler::float_move(VMRegPair src, VMRegPair dst) { | |||
983 | assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move")do { if (!(!src.second()->is_valid() && !dst.second ()->is_valid())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 983, "assert(" "!src.second()->is_valid() && !dst.second()->is_valid()" ") failed", "bad float_move"); ::breakpoint(); } } while (0); | |||
984 | ||||
985 | // The calling conventions assures us that each VMregpair is either | |||
986 | // all really one physical register or adjacent stack slots. | |||
987 | ||||
988 | if (src.first()->is_stack()) { | |||
989 | if (dst.first()->is_stack()) { | |||
990 | movl(rax, Address(rbp, reg2offset_in(src.first()))); | |||
991 | movptr(Address(rsp, reg2offset_out(dst.first())), rax); | |||
992 | } else { | |||
993 | // stack to reg | |||
994 | assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(dst.first()->is_XMMRegister())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 994, "assert(" "dst.first()->is_XMMRegister()" ") failed" , "only expect xmm registers as parameters"); ::breakpoint(); } } while (0); | |||
995 | movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first()))); | |||
996 | } | |||
997 | } else if (dst.first()->is_stack()) { | |||
998 | // reg to stack | |||
999 | assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters")do { if (!(src.first()->is_XMMRegister())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 999, "assert(" "src.first()->is_XMMRegister()" ") failed" , "only expect xmm registers as parameters"); ::breakpoint(); } } while (0); | |||
1000 | movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); | |||
1001 | } else { | |||
1002 | // reg to reg | |||
1003 | // In theory these overlap but the ordering is such that this is likely a nop | |||
1004 | if ( src.first() != dst.first()) { | |||
1005 | movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister()); | |||
1006 | } | |||
1007 | } | |||
1008 | } | |||
1009 | ||||
1010 | // On 64 bit we will store integer like items to the stack as | |||
1011 | // 64 bits items (x86_32/64 abi) even though java would only store | |||
1012 | // 32bits for a parameter. On 32bit it will simply be 32 bits | |||
1013 | // So this routine will do 32->32 on 32bit and 32->64 on 64bit | |||
1014 | void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst) { | |||
1015 | if (src.first()->is_stack()) { | |||
1016 | if (dst.first()->is_stack()) { | |||
1017 | // stack to stack | |||
1018 | movslq(rax, Address(rbp, reg2offset_in(src.first()))); | |||
1019 | movq(Address(rsp, reg2offset_out(dst.first())), rax); | |||
1020 | } else { | |||
1021 | // stack to reg | |||
1022 | movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); | |||
1023 | } | |||
1024 | } else if (dst.first()->is_stack()) { | |||
1025 | // reg to stack | |||
1026 | // Do we really have to sign extend??? | |||
1027 | // __ movslq(src.first()->as_Register(), src.first()->as_Register()); | |||
1028 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); | |||
1029 | } else { | |||
1030 | // Do we really have to sign extend??? | |||
1031 | // __ movslq(dst.first()->as_Register(), src.first()->as_Register()); | |||
1032 | if (dst.first() != src.first()) { | |||
1033 | movq(dst.first()->as_Register(), src.first()->as_Register()); | |||
1034 | } | |||
1035 | } | |||
1036 | } | |||
1037 | ||||
1038 | void MacroAssembler::move_ptr(VMRegPair src, VMRegPair dst) { | |||
1039 | if (src.first()->is_stack()) { | |||
1040 | if (dst.first()->is_stack()) { | |||
1041 | // stack to stack | |||
1042 | movq(rax, Address(rbp, reg2offset_in(src.first()))); | |||
1043 | movq(Address(rsp, reg2offset_out(dst.first())), rax); | |||
1044 | } else { | |||
1045 | // stack to reg | |||
1046 | movq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); | |||
1047 | } | |||
1048 | } else if (dst.first()->is_stack()) { | |||
1049 | // reg to stack | |||
1050 | movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); | |||
1051 | } else { | |||
1052 | if (dst.first() != src.first()) { | |||
1053 | movq(dst.first()->as_Register(), src.first()->as_Register()); | |||
1054 | } | |||
1055 | } | |||
1056 | } | |||
1057 | ||||
1058 | // An oop arg. Must pass a handle not the oop itself | |||
1059 | void MacroAssembler::object_move(OopMap* map, | |||
1060 | int oop_handle_offset, | |||
1061 | int framesize_in_slots, | |||
1062 | VMRegPair src, | |||
1063 | VMRegPair dst, | |||
1064 | bool is_receiver, | |||
1065 | int* receiver_offset) { | |||
1066 | ||||
1067 | // must pass a handle. First figure out the location we use as a handle | |||
1068 | ||||
1069 | Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register(); | |||
1070 | ||||
1071 | // See if oop is NULL if it is we need no handle | |||
1072 | ||||
1073 | if (src.first()->is_stack()) { | |||
1074 | ||||
1075 | // Oop is already on the stack as an argument | |||
1076 | int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); | |||
1077 | map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); | |||
1078 | if (is_receiver) { | |||
1079 | *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; | |||
1080 | } | |||
1081 | ||||
1082 | cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD0L); | |||
1083 | lea(rHandle, Address(rbp, reg2offset_in(src.first()))); | |||
1084 | // conditionally move a NULL | |||
1085 | cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first()))); | |||
1086 | } else { | |||
1087 | ||||
1088 | // Oop is in an a register we must store it to the space we reserve | |||
1089 | // on the stack for oop_handles and pass a handle if oop is non-NULL | |||
1090 | ||||
1091 | const Register rOop = src.first()->as_Register(); | |||
1092 | int oop_slot; | |||
1093 | if (rOop == j_rarg0) | |||
1094 | oop_slot = 0; | |||
1095 | else if (rOop == j_rarg1) | |||
1096 | oop_slot = 1; | |||
1097 | else if (rOop == j_rarg2) | |||
1098 | oop_slot = 2; | |||
1099 | else if (rOop == j_rarg3) | |||
1100 | oop_slot = 3; | |||
1101 | else if (rOop == j_rarg4) | |||
1102 | oop_slot = 4; | |||
1103 | else { | |||
1104 | assert(rOop == j_rarg5, "wrong register")do { if (!(rOop == j_rarg5)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1104, "assert(" "rOop == j_rarg5" ") failed", "wrong register" ); ::breakpoint(); } } while (0); | |||
1105 | oop_slot = 5; | |||
1106 | } | |||
1107 | ||||
1108 | oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; | |||
1109 | int offset = oop_slot*VMRegImpl::stack_slot_size; | |||
1110 | ||||
1111 | map->set_oop(VMRegImpl::stack2reg(oop_slot)); | |||
1112 | // Store oop in handle area, may be NULL | |||
1113 | movptr(Address(rsp, offset), rOop); | |||
1114 | if (is_receiver) { | |||
1115 | *receiver_offset = offset; | |||
1116 | } | |||
1117 | ||||
1118 | cmpptr(rOop, (int32_t)NULL_WORD0L); | |||
1119 | lea(rHandle, Address(rsp, offset)); | |||
1120 | // conditionally move a NULL from the handle area where it was just stored | |||
1121 | cmovptr(Assembler::equal, rHandle, Address(rsp, offset)); | |||
1122 | } | |||
1123 | ||||
1124 | // If arg is on the stack then place it otherwise it is already in correct reg. | |||
1125 | if (dst.first()->is_stack()) { | |||
1126 | movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); | |||
1127 | } | |||
1128 | } | |||
1129 | ||||
1130 | #endif // _LP64 | |||
1131 | ||||
1132 | // Now versions that are common to 32/64 bit | |||
1133 | ||||
1134 | void MacroAssembler::addptr(Register dst, int32_t imm32) { | |||
1135 | LP64_ONLY(addq(dst, imm32))addq(dst, imm32) NOT_LP64(addl(dst, imm32)); | |||
1136 | } | |||
1137 | ||||
1138 | void MacroAssembler::addptr(Register dst, Register src) { | |||
1139 | LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); | |||
1140 | } | |||
1141 | ||||
1142 | void MacroAssembler::addptr(Address dst, Register src) { | |||
1143 | LP64_ONLY(addq(dst, src))addq(dst, src) NOT_LP64(addl(dst, src)); | |||
1144 | } | |||
1145 | ||||
1146 | void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { | |||
1147 | if (reachable(src)) { | |||
1148 | Assembler::addsd(dst, as_Address(src)); | |||
1149 | } else { | |||
1150 | lea(rscratch1, src); | |||
1151 | Assembler::addsd(dst, Address(rscratch1, 0)); | |||
1152 | } | |||
1153 | } | |||
1154 | ||||
1155 | void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { | |||
1156 | if (reachable(src)) { | |||
1157 | addss(dst, as_Address(src)); | |||
1158 | } else { | |||
1159 | lea(rscratch1, src); | |||
1160 | addss(dst, Address(rscratch1, 0)); | |||
1161 | } | |||
1162 | } | |||
1163 | ||||
1164 | void MacroAssembler::addpd(XMMRegister dst, AddressLiteral src) { | |||
1165 | if (reachable(src)) { | |||
1166 | Assembler::addpd(dst, as_Address(src)); | |||
1167 | } else { | |||
1168 | lea(rscratch1, src); | |||
1169 | Assembler::addpd(dst, Address(rscratch1, 0)); | |||
1170 | } | |||
1171 | } | |||
1172 | ||||
1173 | // See 8273459. Function for ensuring 64-byte alignment, intended for stubs only. | |||
1174 | // Stub code is generated once and never copied. | |||
1175 | // NMethods can't use this because they get copied and we can't force alignment > 32 bytes. | |||
1176 | void MacroAssembler::align64() { | |||
1177 | align(64, (unsigned long long) pc()); | |||
1178 | } | |||
1179 | ||||
1180 | void MacroAssembler::align32() { | |||
1181 | align(32, (unsigned long long) pc()); | |||
1182 | } | |||
1183 | ||||
1184 | void MacroAssembler::align(int modulus) { | |||
1185 | // 8273459: Ensure alignment is possible with current segment alignment | |||
1186 | assert(modulus <= CodeEntryAlignment, "Alignment must be <= CodeEntryAlignment")do { if (!(modulus <= CodeEntryAlignment)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1186, "assert(" "modulus <= CodeEntryAlignment" ") failed" , "Alignment must be <= CodeEntryAlignment"); ::breakpoint (); } } while (0); | |||
1187 | align(modulus, offset()); | |||
1188 | } | |||
1189 | ||||
1190 | void MacroAssembler::align(int modulus, int target) { | |||
1191 | if (target % modulus != 0) { | |||
1192 | nop(modulus - (target % modulus)); | |||
1193 | } | |||
1194 | } | |||
1195 | ||||
1196 | void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { | |||
1197 | // Used in sign-masking with aligned address. | |||
1198 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1198, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); | |||
1199 | if (reachable(src)) { | |||
1200 | Assembler::andpd(dst, as_Address(src)); | |||
1201 | } else { | |||
1202 | lea(scratch_reg, src); | |||
1203 | Assembler::andpd(dst, Address(scratch_reg, 0)); | |||
1204 | } | |||
1205 | } | |||
1206 | ||||
1207 | void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { | |||
1208 | // Used in sign-masking with aligned address. | |||
1209 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1209, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); | |||
1210 | if (reachable(src)) { | |||
1211 | Assembler::andps(dst, as_Address(src)); | |||
1212 | } else { | |||
1213 | lea(scratch_reg, src); | |||
1214 | Assembler::andps(dst, Address(scratch_reg, 0)); | |||
1215 | } | |||
1216 | } | |||
1217 | ||||
1218 | void MacroAssembler::andptr(Register dst, int32_t imm32) { | |||
1219 | LP64_ONLY(andq(dst, imm32))andq(dst, imm32) NOT_LP64(andl(dst, imm32)); | |||
1220 | } | |||
1221 | ||||
1222 | void MacroAssembler::atomic_incl(Address counter_addr) { | |||
1223 | lock(); | |||
1224 | incrementl(counter_addr); | |||
1225 | } | |||
1226 | ||||
1227 | void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) { | |||
1228 | if (reachable(counter_addr)) { | |||
1229 | atomic_incl(as_Address(counter_addr)); | |||
1230 | } else { | |||
1231 | lea(scr, counter_addr); | |||
1232 | atomic_incl(Address(scr, 0)); | |||
1233 | } | |||
1234 | } | |||
1235 | ||||
1236 | #ifdef _LP641 | |||
1237 | void MacroAssembler::atomic_incq(Address counter_addr) { | |||
1238 | lock(); | |||
1239 | incrementq(counter_addr); | |||
1240 | } | |||
1241 | ||||
1242 | void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) { | |||
1243 | if (reachable(counter_addr)) { | |||
1244 | atomic_incq(as_Address(counter_addr)); | |||
1245 | } else { | |||
1246 | lea(scr, counter_addr); | |||
1247 | atomic_incq(Address(scr, 0)); | |||
1248 | } | |||
1249 | } | |||
1250 | #endif | |||
1251 | ||||
1252 | // Writes to stack successive pages until offset reached to check for | |||
1253 | // stack overflow + shadow pages. This clobbers tmp. | |||
1254 | void MacroAssembler::bang_stack_size(Register size, Register tmp) { | |||
1255 | movptr(tmp, rsp); | |||
1256 | // Bang stack for total size given plus shadow page size. | |||
1257 | // Bang one page at a time because large size can bang beyond yellow and | |||
1258 | // red zones. | |||
1259 | Label loop; | |||
1260 | bind(loop); | |||
1261 | movl(Address(tmp, (-os::vm_page_size())), size ); | |||
1262 | subptr(tmp, os::vm_page_size()); | |||
1263 | subl(size, os::vm_page_size()); | |||
1264 | jcc(Assembler::greater, loop); | |||
1265 | ||||
1266 | // Bang down shadow pages too. | |||
1267 | // At this point, (tmp-0) is the last address touched, so don't | |||
1268 | // touch it again. (It was touched as (tmp-pagesize) but then tmp | |||
1269 | // was post-decremented.) Skip this address by starting at i=1, and | |||
1270 | // touch a few more pages below. N.B. It is important to touch all | |||
1271 | // the way down including all pages in the shadow zone. | |||
1272 | for (int i = 1; i < ((int)StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); i++) { | |||
1273 | // this could be any sized move but this is can be a debugging crumb | |||
1274 | // so the bigger the better. | |||
1275 | movptr(Address(tmp, (-i*os::vm_page_size())), size ); | |||
1276 | } | |||
1277 | } | |||
1278 | ||||
1279 | void MacroAssembler::reserved_stack_check() { | |||
1280 | // testing if reserved zone needs to be enabled | |||
1281 | Label no_reserved_zone_enabling; | |||
1282 | Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread)r15_thread; | |||
1283 | NOT_LP64(get_thread(rsi);) | |||
1284 | ||||
1285 | cmpptr(rsp, Address(thread, JavaThread::reserved_stack_activation_offset())); | |||
1286 | jcc(Assembler::below, no_reserved_zone_enabling); | |||
1287 | ||||
1288 | call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)((address)((address_word)(SharedRuntime::enable_stack_reserved_zone ))), thread); | |||
1289 | jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry())); | |||
1290 | should_not_reach_here(); | |||
1291 | ||||
1292 | bind(no_reserved_zone_enabling); | |||
1293 | } | |||
1294 | ||||
1295 | void MacroAssembler::c2bool(Register x) { | |||
1296 | // implements x == 0 ? 0 : 1 | |||
1297 | // note: must only look at least-significant byte of x | |||
1298 | // since C-style booleans are stored in one byte | |||
1299 | // only! (was bug) | |||
1300 | andl(x, 0xFF); | |||
1301 | setb(Assembler::notZero, x); | |||
1302 | } | |||
1303 | ||||
1304 | // Wouldn't need if AddressLiteral version had new name | |||
1305 | void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { | |||
1306 | Assembler::call(L, rtype); | |||
1307 | } | |||
1308 | ||||
1309 | void MacroAssembler::call(Register entry) { | |||
1310 | Assembler::call(entry); | |||
1311 | } | |||
1312 | ||||
1313 | void MacroAssembler::call(AddressLiteral entry) { | |||
1314 | if (reachable(entry)) { | |||
1315 | Assembler::call_literal(entry.target(), entry.rspec()); | |||
1316 | } else { | |||
1317 | lea(rscratch1, entry); | |||
1318 | Assembler::call(rscratch1); | |||
1319 | } | |||
1320 | } | |||
1321 | ||||
1322 | void MacroAssembler::ic_call(address entry, jint method_index) { | |||
1323 | RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); | |||
1324 | movptr(rax, (intptr_t)Universe::non_oop_word()); | |||
1325 | call(AddressLiteral(entry, rh)); | |||
1326 | } | |||
1327 | ||||
1328 | // Implementation of call_VM versions | |||
1329 | ||||
1330 | void MacroAssembler::call_VM(Register oop_result, | |||
1331 | address entry_point, | |||
1332 | bool check_exceptions) { | |||
1333 | Label C, E; | |||
1334 | call(C, relocInfo::none); | |||
1335 | jmp(E); | |||
1336 | ||||
1337 | bind(C); | |||
1338 | call_VM_helper(oop_result, entry_point, 0, check_exceptions); | |||
1339 | ret(0); | |||
1340 | ||||
1341 | bind(E); | |||
1342 | } | |||
1343 | ||||
1344 | void MacroAssembler::call_VM(Register oop_result, | |||
1345 | address entry_point, | |||
1346 | Register arg_1, | |||
1347 | bool check_exceptions) { | |||
1348 | Label C, E; | |||
1349 | call(C, relocInfo::none); | |||
1350 | jmp(E); | |||
1351 | ||||
1352 | bind(C); | |||
1353 | pass_arg1(this, arg_1); | |||
1354 | call_VM_helper(oop_result, entry_point, 1, check_exceptions); | |||
1355 | ret(0); | |||
1356 | ||||
1357 | bind(E); | |||
1358 | } | |||
1359 | ||||
1360 | void MacroAssembler::call_VM(Register oop_result, | |||
1361 | address entry_point, | |||
1362 | Register arg_1, | |||
1363 | Register arg_2, | |||
1364 | bool check_exceptions) { | |||
1365 | Label C, E; | |||
1366 | call(C, relocInfo::none); | |||
1367 | jmp(E); | |||
1368 | ||||
1369 | bind(C); | |||
1370 | ||||
1371 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1371, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1372 | ||||
1373 | pass_arg2(this, arg_2); | |||
1374 | pass_arg1(this, arg_1); | |||
1375 | call_VM_helper(oop_result, entry_point, 2, check_exceptions); | |||
1376 | ret(0); | |||
1377 | ||||
1378 | bind(E); | |||
1379 | } | |||
1380 | ||||
1381 | void MacroAssembler::call_VM(Register oop_result, | |||
1382 | address entry_point, | |||
1383 | Register arg_1, | |||
1384 | Register arg_2, | |||
1385 | Register arg_3, | |||
1386 | bool check_exceptions) { | |||
1387 | Label C, E; | |||
1388 | call(C, relocInfo::none); | |||
1389 | jmp(E); | |||
1390 | ||||
1391 | bind(C); | |||
1392 | ||||
1393 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1393, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1394 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1394, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1395 | pass_arg3(this, arg_3); | |||
1396 | ||||
1397 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1397, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1398 | pass_arg2(this, arg_2); | |||
1399 | ||||
1400 | pass_arg1(this, arg_1); | |||
1401 | call_VM_helper(oop_result, entry_point, 3, check_exceptions); | |||
1402 | ret(0); | |||
1403 | ||||
1404 | bind(E); | |||
1405 | } | |||
1406 | ||||
1407 | void MacroAssembler::call_VM(Register oop_result, | |||
1408 | Register last_java_sp, | |||
1409 | address entry_point, | |||
1410 | int number_of_arguments, | |||
1411 | bool check_exceptions) { | |||
1412 | Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg); | |||
1413 | call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |||
1414 | } | |||
1415 | ||||
1416 | void MacroAssembler::call_VM(Register oop_result, | |||
1417 | Register last_java_sp, | |||
1418 | address entry_point, | |||
1419 | Register arg_1, | |||
1420 | bool check_exceptions) { | |||
1421 | pass_arg1(this, arg_1); | |||
1422 | call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |||
1423 | } | |||
1424 | ||||
1425 | void MacroAssembler::call_VM(Register oop_result, | |||
1426 | Register last_java_sp, | |||
1427 | address entry_point, | |||
1428 | Register arg_1, | |||
1429 | Register arg_2, | |||
1430 | bool check_exceptions) { | |||
1431 | ||||
1432 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1432, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1433 | pass_arg2(this, arg_2); | |||
1434 | pass_arg1(this, arg_1); | |||
1435 | call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |||
1436 | } | |||
1437 | ||||
1438 | void MacroAssembler::call_VM(Register oop_result, | |||
1439 | Register last_java_sp, | |||
1440 | address entry_point, | |||
1441 | Register arg_1, | |||
1442 | Register arg_2, | |||
1443 | Register arg_3, | |||
1444 | bool check_exceptions) { | |||
1445 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1445, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1446 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1446, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1447 | pass_arg3(this, arg_3); | |||
1448 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1448, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1449 | pass_arg2(this, arg_2); | |||
1450 | pass_arg1(this, arg_1); | |||
1451 | call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |||
1452 | } | |||
1453 | ||||
1454 | void MacroAssembler::super_call_VM(Register oop_result, | |||
1455 | Register last_java_sp, | |||
1456 | address entry_point, | |||
1457 | int number_of_arguments, | |||
1458 | bool check_exceptions) { | |||
1459 | Register thread = LP64_ONLY(r15_thread)r15_thread NOT_LP64(noreg); | |||
1460 | MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); | |||
1461 | } | |||
1462 | ||||
1463 | void MacroAssembler::super_call_VM(Register oop_result, | |||
1464 | Register last_java_sp, | |||
1465 | address entry_point, | |||
1466 | Register arg_1, | |||
1467 | bool check_exceptions) { | |||
1468 | pass_arg1(this, arg_1); | |||
1469 | super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); | |||
1470 | } | |||
1471 | ||||
1472 | void MacroAssembler::super_call_VM(Register oop_result, | |||
1473 | Register last_java_sp, | |||
1474 | address entry_point, | |||
1475 | Register arg_1, | |||
1476 | Register arg_2, | |||
1477 | bool check_exceptions) { | |||
1478 | ||||
1479 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1479, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1480 | pass_arg2(this, arg_2); | |||
1481 | pass_arg1(this, arg_1); | |||
1482 | super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); | |||
1483 | } | |||
1484 | ||||
1485 | void MacroAssembler::super_call_VM(Register oop_result, | |||
1486 | Register last_java_sp, | |||
1487 | address entry_point, | |||
1488 | Register arg_1, | |||
1489 | Register arg_2, | |||
1490 | Register arg_3, | |||
1491 | bool check_exceptions) { | |||
1492 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1492, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1493 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1493, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1494 | pass_arg3(this, arg_3); | |||
1495 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1495, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1496 | pass_arg2(this, arg_2); | |||
1497 | pass_arg1(this, arg_1); | |||
1498 | super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); | |||
1499 | } | |||
1500 | ||||
1501 | void MacroAssembler::call_VM_base(Register oop_result, | |||
1502 | Register java_thread, | |||
1503 | Register last_java_sp, | |||
1504 | address entry_point, | |||
1505 | int number_of_arguments, | |||
1506 | bool check_exceptions) { | |||
1507 | // determine java_thread register | |||
1508 | if (!java_thread->is_valid()) { | |||
1509 | #ifdef _LP641 | |||
1510 | java_thread = r15_thread; | |||
1511 | #else | |||
1512 | java_thread = rdi; | |||
1513 | get_thread(java_thread); | |||
1514 | #endif // LP64 | |||
1515 | } | |||
1516 | // determine last_java_sp register | |||
1517 | if (!last_java_sp->is_valid()) { | |||
1518 | last_java_sp = rsp; | |||
1519 | } | |||
1520 | // debugging support | |||
1521 | assert(number_of_arguments >= 0 , "cannot have negative number of arguments")do { if (!(number_of_arguments >= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1521, "assert(" "number_of_arguments >= 0" ") failed", "cannot have negative number of arguments" ); ::breakpoint(); } } while (0); | |||
1522 | LP64_ONLY(assert(java_thread == r15_thread, "unexpected register"))do { if (!(java_thread == r15_thread)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1522, "assert(" "java_thread == r15_thread" ") failed", "unexpected register" ); ::breakpoint(); } } while (0); | |||
1523 | #ifdef ASSERT1 | |||
1524 | // TraceBytecodes does not use r12 but saves it over the call, so don't verify | |||
1525 | // r12 is the heapbase. | |||
1526 | LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");)if (UseCompressedOops && !TraceBytecodes) verify_heapbase ("call_VM_base: heap base corrupted?"); | |||
1527 | #endif // ASSERT | |||
1528 | ||||
1529 | assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result")do { if (!(java_thread != oop_result)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1529, "assert(" "java_thread != oop_result" ") failed", "cannot use the same register for java_thread & oop_result" ); ::breakpoint(); } } while (0); | |||
1530 | assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp")do { if (!(java_thread != last_java_sp)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1530, "assert(" "java_thread != last_java_sp" ") failed", "cannot use the same register for java_thread & last_java_sp" ); ::breakpoint(); } } while (0); | |||
1531 | ||||
1532 | // push java thread (becomes first argument of C function) | |||
1533 | ||||
1534 | NOT_LP64(push(java_thread); number_of_arguments++); | |||
1535 | LP64_ONLY(mov(c_rarg0, r15_thread))mov(c_rarg0, r15_thread); | |||
1536 | ||||
1537 | // set last Java frame before call | |||
1538 | assert(last_java_sp != rbp, "can't use ebp/rbp")do { if (!(last_java_sp != rbp)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1538, "assert(" "last_java_sp != rbp" ") failed", "can't use ebp/rbp" ); ::breakpoint(); } } while (0); | |||
1539 | ||||
1540 | // Only interpreter should have to set fp | |||
1541 | set_last_Java_frame(java_thread, last_java_sp, rbp, NULL__null); | |||
1542 | ||||
1543 | // do the call, remove parameters | |||
1544 | MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); | |||
1545 | ||||
1546 | // restore the thread (cannot use the pushed argument since arguments | |||
1547 | // may be overwritten by C code generated by an optimizing compiler); | |||
1548 | // however can use the register value directly if it is callee saved. | |||
1549 | if (LP64_ONLY(true ||)true || java_thread == rdi || java_thread == rsi) { | |||
1550 | // rdi & rsi (also r15) are callee saved -> nothing to do | |||
1551 | #ifdef ASSERT1 | |||
1552 | guarantee(java_thread != rax, "change this code")do { if (!(java_thread != rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1552, "guarantee(" "java_thread != rax" ") failed", "change this code" ); ::breakpoint(); } } while (0); | |||
1553 | push(rax); | |||
1554 | { Label L; | |||
1555 | get_thread(rax); | |||
1556 | cmpptr(java_thread, rax); | |||
1557 | jcc(Assembler::equal, L); | |||
1558 | STOP("MacroAssembler::call_VM_base: rdi not callee saved?")block_comment("MacroAssembler::call_VM_base: rdi not callee saved?" ); stop("MacroAssembler::call_VM_base: rdi not callee saved?" ); | |||
1559 | bind(L); | |||
1560 | } | |||
1561 | pop(rax); | |||
1562 | #endif | |||
1563 | } else { | |||
1564 | get_thread(java_thread); | |||
1565 | } | |||
1566 | // reset last Java frame | |||
1567 | // Only interpreter should have to clear fp | |||
1568 | reset_last_Java_frame(java_thread, true); | |||
1569 | ||||
1570 | // C++ interp handles this in the interpreter | |||
1571 | check_and_handle_popframe(java_thread); | |||
1572 | check_and_handle_earlyret(java_thread); | |||
1573 | ||||
1574 | if (check_exceptions) { | |||
1575 | // check for pending exceptions (java_thread is set upon return) | |||
1576 | cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD0L); | |||
1577 | #ifndef _LP641 | |||
1578 | jump_cc(Assembler::notEqual, | |||
1579 | RuntimeAddress(StubRoutines::forward_exception_entry())); | |||
1580 | #else | |||
1581 | // This used to conditionally jump to forward_exception however it is | |||
1582 | // possible if we relocate that the branch will not reach. So we must jump | |||
1583 | // around so we can always reach | |||
1584 | ||||
1585 | Label ok; | |||
1586 | jcc(Assembler::equal, ok); | |||
1587 | jump(RuntimeAddress(StubRoutines::forward_exception_entry())); | |||
1588 | bind(ok); | |||
1589 | #endif // LP64 | |||
1590 | } | |||
1591 | ||||
1592 | // get oop result if there is one and reset the value in the thread | |||
1593 | if (oop_result->is_valid()) { | |||
1594 | get_vm_result(oop_result, java_thread); | |||
1595 | } | |||
1596 | } | |||
1597 | ||||
1598 | void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { | |||
1599 | ||||
1600 | // Calculate the value for last_Java_sp | |||
1601 | // somewhat subtle. call_VM does an intermediate call | |||
1602 | // which places a return address on the stack just under the | |||
1603 | // stack pointer as the user finsihed with it. This allows | |||
1604 | // use to retrieve last_Java_pc from last_Java_sp[-1]. | |||
1605 | // On 32bit we then have to push additional args on the stack to accomplish | |||
1606 | // the actual requested call. On 64bit call_VM only can use register args | |||
1607 | // so the only extra space is the return address that call_VM created. | |||
1608 | // This hopefully explains the calculations here. | |||
1609 | ||||
1610 | #ifdef _LP641 | |||
1611 | // We've pushed one address, correct last_Java_sp | |||
1612 | lea(rax, Address(rsp, wordSize)); | |||
1613 | #else | |||
1614 | lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); | |||
1615 | #endif // LP64 | |||
1616 | ||||
1617 | call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); | |||
1618 | ||||
1619 | } | |||
1620 | ||||
1621 | // Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter. | |||
1622 | void MacroAssembler::call_VM_leaf0(address entry_point) { | |||
1623 | MacroAssembler::call_VM_leaf_base(entry_point, 0); | |||
1624 | } | |||
1625 | ||||
1626 | void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { | |||
1627 | call_VM_leaf_base(entry_point, number_of_arguments); | |||
1628 | } | |||
1629 | ||||
1630 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { | |||
1631 | pass_arg0(this, arg_0); | |||
1632 | call_VM_leaf(entry_point, 1); | |||
1633 | } | |||
1634 | ||||
1635 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |||
1636 | ||||
1637 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1637, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1638 | pass_arg1(this, arg_1); | |||
1639 | pass_arg0(this, arg_0); | |||
1640 | call_VM_leaf(entry_point, 2); | |||
1641 | } | |||
1642 | ||||
1643 | void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |||
1644 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1644, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1645 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1645, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1646 | pass_arg2(this, arg_2); | |||
1647 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1647, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1648 | pass_arg1(this, arg_1); | |||
1649 | pass_arg0(this, arg_0); | |||
1650 | call_VM_leaf(entry_point, 3); | |||
1651 | } | |||
1652 | ||||
1653 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { | |||
1654 | pass_arg0(this, arg_0); | |||
1655 | MacroAssembler::call_VM_leaf_base(entry_point, 1); | |||
1656 | } | |||
1657 | ||||
1658 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { | |||
1659 | ||||
1660 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1660, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1661 | pass_arg1(this, arg_1); | |||
1662 | pass_arg0(this, arg_0); | |||
1663 | MacroAssembler::call_VM_leaf_base(entry_point, 2); | |||
1664 | } | |||
1665 | ||||
1666 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { | |||
1667 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1667, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1668 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1668, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1669 | pass_arg2(this, arg_2); | |||
1670 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1670, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1671 | pass_arg1(this, arg_1); | |||
1672 | pass_arg0(this, arg_0); | |||
1673 | MacroAssembler::call_VM_leaf_base(entry_point, 3); | |||
1674 | } | |||
1675 | ||||
1676 | void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { | |||
1677 | LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"))do { if (!(arg_0 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1677, "assert(" "arg_0 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1678 | LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"))do { if (!(arg_1 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1678, "assert(" "arg_1 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1679 | LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"))do { if (!(arg_2 != c_rarg3)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1679, "assert(" "arg_2 != c_rarg3" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1680 | pass_arg3(this, arg_3); | |||
1681 | LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"))do { if (!(arg_0 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1681, "assert(" "arg_0 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1682 | LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"))do { if (!(arg_1 != c_rarg2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1682, "assert(" "arg_1 != c_rarg2" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1683 | pass_arg2(this, arg_2); | |||
1684 | LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"))do { if (!(arg_0 != c_rarg1)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1684, "assert(" "arg_0 != c_rarg1" ") failed", "smashed arg" ); ::breakpoint(); } } while (0); | |||
1685 | pass_arg1(this, arg_1); | |||
1686 | pass_arg0(this, arg_0); | |||
1687 | MacroAssembler::call_VM_leaf_base(entry_point, 4); | |||
1688 | } | |||
1689 | ||||
1690 | void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { | |||
1691 | movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); | |||
1692 | movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD0L); | |||
1693 | verify_oop_msg(oop_result, "broken oop in call_VM_base")_verify_oop_checked(oop_result, "broken oop " "oop_result" ", " "\"broken oop in call_VM_base\"", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1693); | |||
1694 | } | |||
1695 | ||||
1696 | void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { | |||
1697 | movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); | |||
1698 | movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD0L); | |||
1699 | } | |||
1700 | ||||
1701 | void MacroAssembler::check_and_handle_earlyret(Register java_thread) { | |||
1702 | } | |||
1703 | ||||
1704 | void MacroAssembler::check_and_handle_popframe(Register java_thread) { | |||
1705 | } | |||
1706 | ||||
1707 | void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { | |||
1708 | if (reachable(src1)) { | |||
1709 | cmpl(as_Address(src1), imm); | |||
1710 | } else { | |||
1711 | lea(rscratch1, src1); | |||
1712 | cmpl(Address(rscratch1, 0), imm); | |||
1713 | } | |||
1714 | } | |||
1715 | ||||
1716 | void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { | |||
1717 | assert(!src2.is_lval(), "use cmpptr")do { if (!(!src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1717, "assert(" "!src2.is_lval()" ") failed", "use cmpptr") ; ::breakpoint(); } } while (0); | |||
1718 | if (reachable(src2)) { | |||
1719 | cmpl(src1, as_Address(src2)); | |||
1720 | } else { | |||
1721 | lea(rscratch1, src2); | |||
1722 | cmpl(src1, Address(rscratch1, 0)); | |||
1723 | } | |||
1724 | } | |||
1725 | ||||
1726 | void MacroAssembler::cmp32(Register src1, int32_t imm) { | |||
1727 | Assembler::cmpl(src1, imm); | |||
1728 | } | |||
1729 | ||||
1730 | void MacroAssembler::cmp32(Register src1, Address src2) { | |||
1731 | Assembler::cmpl(src1, src2); | |||
1732 | } | |||
1733 | ||||
1734 | void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |||
1735 | ucomisd(opr1, opr2); | |||
1736 | ||||
1737 | Label L; | |||
1738 | if (unordered_is_less) { | |||
1739 | movl(dst, -1); | |||
1740 | jcc(Assembler::parity, L); | |||
1741 | jcc(Assembler::below , L); | |||
1742 | movl(dst, 0); | |||
1743 | jcc(Assembler::equal , L); | |||
1744 | increment(dst); | |||
1745 | } else { // unordered is greater | |||
1746 | movl(dst, 1); | |||
1747 | jcc(Assembler::parity, L); | |||
1748 | jcc(Assembler::above , L); | |||
1749 | movl(dst, 0); | |||
1750 | jcc(Assembler::equal , L); | |||
1751 | decrementl(dst); | |||
1752 | } | |||
1753 | bind(L); | |||
1754 | } | |||
1755 | ||||
1756 | void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { | |||
1757 | ucomiss(opr1, opr2); | |||
1758 | ||||
1759 | Label L; | |||
1760 | if (unordered_is_less) { | |||
1761 | movl(dst, -1); | |||
1762 | jcc(Assembler::parity, L); | |||
1763 | jcc(Assembler::below , L); | |||
1764 | movl(dst, 0); | |||
1765 | jcc(Assembler::equal , L); | |||
1766 | increment(dst); | |||
1767 | } else { // unordered is greater | |||
1768 | movl(dst, 1); | |||
1769 | jcc(Assembler::parity, L); | |||
1770 | jcc(Assembler::above , L); | |||
1771 | movl(dst, 0); | |||
1772 | jcc(Assembler::equal , L); | |||
1773 | decrementl(dst); | |||
1774 | } | |||
1775 | bind(L); | |||
1776 | } | |||
1777 | ||||
1778 | ||||
1779 | void MacroAssembler::cmp8(AddressLiteral src1, int imm) { | |||
1780 | if (reachable(src1)) { | |||
1781 | cmpb(as_Address(src1), imm); | |||
1782 | } else { | |||
1783 | lea(rscratch1, src1); | |||
1784 | cmpb(Address(rscratch1, 0), imm); | |||
1785 | } | |||
1786 | } | |||
1787 | ||||
1788 | void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { | |||
1789 | #ifdef _LP641 | |||
1790 | if (src2.is_lval()) { | |||
1791 | movptr(rscratch1, src2); | |||
1792 | Assembler::cmpq(src1, rscratch1); | |||
1793 | } else if (reachable(src2)) { | |||
1794 | cmpq(src1, as_Address(src2)); | |||
1795 | } else { | |||
1796 | lea(rscratch1, src2); | |||
1797 | Assembler::cmpq(src1, Address(rscratch1, 0)); | |||
1798 | } | |||
1799 | #else | |||
1800 | if (src2.is_lval()) { | |||
1801 | cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |||
1802 | } else { | |||
1803 | cmpl(src1, as_Address(src2)); | |||
1804 | } | |||
1805 | #endif // _LP64 | |||
1806 | } | |||
1807 | ||||
1808 | void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { | |||
1809 | assert(src2.is_lval(), "not a mem-mem compare")do { if (!(src2.is_lval())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1809, "assert(" "src2.is_lval()" ") failed", "not a mem-mem compare" ); ::breakpoint(); } } while (0); | |||
1810 | #ifdef _LP641 | |||
1811 | // moves src2's literal address | |||
1812 | movptr(rscratch1, src2); | |||
1813 | Assembler::cmpq(src1, rscratch1); | |||
1814 | #else | |||
1815 | cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); | |||
1816 | #endif // _LP64 | |||
1817 | } | |||
1818 | ||||
1819 | void MacroAssembler::cmpoop(Register src1, Register src2) { | |||
1820 | cmpptr(src1, src2); | |||
1821 | } | |||
1822 | ||||
1823 | void MacroAssembler::cmpoop(Register src1, Address src2) { | |||
1824 | cmpptr(src1, src2); | |||
1825 | } | |||
1826 | ||||
1827 | #ifdef _LP641 | |||
1828 | void MacroAssembler::cmpoop(Register src1, jobject src2) { | |||
1829 | movoop(rscratch1, src2); | |||
1830 | cmpptr(src1, rscratch1); | |||
1831 | } | |||
1832 | #endif | |||
1833 | ||||
1834 | void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { | |||
1835 | if (reachable(adr)) { | |||
1836 | lock(); | |||
1837 | cmpxchgptr(reg, as_Address(adr)); | |||
1838 | } else { | |||
1839 | lea(rscratch1, adr); | |||
1840 | lock(); | |||
1841 | cmpxchgptr(reg, Address(rscratch1, 0)); | |||
1842 | } | |||
1843 | } | |||
1844 | ||||
1845 | void MacroAssembler::cmpxchgptr(Register reg, Address adr) { | |||
1846 | LP64_ONLY(cmpxchgq(reg, adr))cmpxchgq(reg, adr) NOT_LP64(cmpxchgl(reg, adr)); | |||
1847 | } | |||
1848 | ||||
1849 | void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { | |||
1850 | if (reachable(src)) { | |||
1851 | Assembler::comisd(dst, as_Address(src)); | |||
1852 | } else { | |||
1853 | lea(rscratch1, src); | |||
1854 | Assembler::comisd(dst, Address(rscratch1, 0)); | |||
1855 | } | |||
1856 | } | |||
1857 | ||||
1858 | void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { | |||
1859 | if (reachable(src)) { | |||
1860 | Assembler::comiss(dst, as_Address(src)); | |||
1861 | } else { | |||
1862 | lea(rscratch1, src); | |||
1863 | Assembler::comiss(dst, Address(rscratch1, 0)); | |||
1864 | } | |||
1865 | } | |||
1866 | ||||
1867 | ||||
1868 | void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { | |||
1869 | Condition negated_cond = negate_condition(cond); | |||
1870 | Label L; | |||
1871 | jcc(negated_cond, L); | |||
1872 | pushf(); // Preserve flags | |||
1873 | atomic_incl(counter_addr); | |||
1874 | popf(); | |||
1875 | bind(L); | |||
1876 | } | |||
1877 | ||||
1878 | int MacroAssembler::corrected_idivl(Register reg) { | |||
1879 | // Full implementation of Java idiv and irem; checks for | |||
1880 | // special case as described in JVM spec., p.243 & p.271. | |||
1881 | // The function returns the (pc) offset of the idivl | |||
1882 | // instruction - may be needed for implicit exceptions. | |||
1883 | // | |||
1884 | // normal case special case | |||
1885 | // | |||
1886 | // input : rax,: dividend min_int | |||
1887 | // reg: divisor (may not be rax,/rdx) -1 | |||
1888 | // | |||
1889 | // output: rax,: quotient (= rax, idiv reg) min_int | |||
1890 | // rdx: remainder (= rax, irem reg) 0 | |||
1891 | assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register")do { if (!(reg != rax && reg != rdx)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1891, "assert(" "reg != rax && reg != rdx" ") failed" , "reg cannot be rax, or rdx register"); ::breakpoint(); } } while (0); | |||
1892 | const int min_int = 0x80000000; | |||
1893 | Label normal_case, special_case; | |||
1894 | ||||
1895 | // check for special case | |||
1896 | cmpl(rax, min_int); | |||
1897 | jcc(Assembler::notEqual, normal_case); | |||
1898 | xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) | |||
1899 | cmpl(reg, -1); | |||
1900 | jcc(Assembler::equal, special_case); | |||
1901 | ||||
1902 | // handle normal case | |||
1903 | bind(normal_case); | |||
1904 | cdql(); | |||
1905 | int idivl_offset = offset(); | |||
1906 | idivl(reg); | |||
1907 | ||||
1908 | // normal and special case exit | |||
1909 | bind(special_case); | |||
1910 | ||||
1911 | return idivl_offset; | |||
1912 | } | |||
1913 | ||||
1914 | ||||
1915 | ||||
1916 | void MacroAssembler::decrementl(Register reg, int value) { | |||
1917 | if (value == min_jint) {subl(reg, value) ; return; } | |||
1918 | if (value < 0) { incrementl(reg, -value); return; } | |||
1919 | if (value == 0) { ; return; } | |||
1920 | if (value == 1 && UseIncDec) { decl(reg) ; return; } | |||
1921 | /* else */ { subl(reg, value) ; return; } | |||
1922 | } | |||
1923 | ||||
1924 | void MacroAssembler::decrementl(Address dst, int value) { | |||
1925 | if (value == min_jint) {subl(dst, value) ; return; } | |||
1926 | if (value < 0) { incrementl(dst, -value); return; } | |||
1927 | if (value == 0) { ; return; } | |||
1928 | if (value == 1 && UseIncDec) { decl(dst) ; return; } | |||
1929 | /* else */ { subl(dst, value) ; return; } | |||
1930 | } | |||
1931 | ||||
1932 | void MacroAssembler::division_with_shift (Register reg, int shift_value) { | |||
1933 | assert (shift_value > 0, "illegal shift value")do { if (!(shift_value > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1933, "assert(" "shift_value > 0" ") failed", "illegal shift value" ); ::breakpoint(); } } while (0); | |||
1934 | Label _is_positive; | |||
1935 | testl (reg, reg); | |||
1936 | jcc (Assembler::positive, _is_positive); | |||
1937 | int offset = (1 << shift_value) - 1 ; | |||
1938 | ||||
1939 | if (offset == 1) { | |||
1940 | incrementl(reg); | |||
1941 | } else { | |||
1942 | addl(reg, offset); | |||
1943 | } | |||
1944 | ||||
1945 | bind (_is_positive); | |||
1946 | sarl(reg, shift_value); | |||
1947 | } | |||
1948 | ||||
1949 | void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { | |||
1950 | if (reachable(src)) { | |||
1951 | Assembler::divsd(dst, as_Address(src)); | |||
1952 | } else { | |||
1953 | lea(rscratch1, src); | |||
1954 | Assembler::divsd(dst, Address(rscratch1, 0)); | |||
1955 | } | |||
1956 | } | |||
1957 | ||||
1958 | void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { | |||
1959 | if (reachable(src)) { | |||
1960 | Assembler::divss(dst, as_Address(src)); | |||
1961 | } else { | |||
1962 | lea(rscratch1, src); | |||
1963 | Assembler::divss(dst, Address(rscratch1, 0)); | |||
1964 | } | |||
1965 | } | |||
1966 | ||||
1967 | void MacroAssembler::enter() { | |||
1968 | push(rbp); | |||
1969 | mov(rbp, rsp); | |||
1970 | } | |||
1971 | ||||
1972 | // A 5 byte nop that is safe for patching (see patch_verified_entry) | |||
1973 | void MacroAssembler::fat_nop() { | |||
1974 | if (UseAddressNop) { | |||
1975 | addr_nop_5(); | |||
1976 | } else { | |||
1977 | emit_int8(0x26); // es: | |||
1978 | emit_int8(0x2e); // cs: | |||
1979 | emit_int8(0x64); // fs: | |||
1980 | emit_int8(0x65); // gs: | |||
1981 | emit_int8((unsigned char)0x90); | |||
1982 | } | |||
1983 | } | |||
1984 | ||||
1985 | #ifndef _LP641 | |||
1986 | void MacroAssembler::fcmp(Register tmp) { | |||
1987 | fcmp(tmp, 1, true, true); | |||
1988 | } | |||
1989 | ||||
1990 | void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { | |||
1991 | assert(!pop_right || pop_left, "usage error")do { if (!(!pop_right || pop_left)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1991, "assert(" "!pop_right || pop_left" ") failed", "usage error" ); ::breakpoint(); } } while (0); | |||
1992 | if (VM_Version::supports_cmov()) { | |||
1993 | assert(tmp == noreg, "unneeded temp")do { if (!(tmp == noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 1993, "assert(" "tmp == noreg" ") failed", "unneeded temp") ; ::breakpoint(); } } while (0); | |||
1994 | if (pop_left) { | |||
1995 | fucomip(index); | |||
1996 | } else { | |||
1997 | fucomi(index); | |||
1998 | } | |||
1999 | if (pop_right) { | |||
2000 | fpop(); | |||
2001 | } | |||
2002 | } else { | |||
2003 | assert(tmp != noreg, "need temp")do { if (!(tmp != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2003, "assert(" "tmp != noreg" ") failed", "need temp"); :: breakpoint(); } } while (0); | |||
2004 | if (pop_left) { | |||
2005 | if (pop_right) { | |||
2006 | fcompp(); | |||
2007 | } else { | |||
2008 | fcomp(index); | |||
2009 | } | |||
2010 | } else { | |||
2011 | fcom(index); | |||
2012 | } | |||
2013 | // convert FPU condition into eflags condition via rax, | |||
2014 | save_rax(tmp); | |||
2015 | fwait(); fnstsw_ax(); | |||
2016 | sahf(); | |||
2017 | restore_rax(tmp); | |||
2018 | } | |||
2019 | // condition codes set as follows: | |||
2020 | // | |||
2021 | // CF (corresponds to C0) if x < y | |||
2022 | // PF (corresponds to C2) if unordered | |||
2023 | // ZF (corresponds to C3) if x = y | |||
2024 | } | |||
2025 | ||||
2026 | void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { | |||
2027 | fcmp2int(dst, unordered_is_less, 1, true, true); | |||
2028 | } | |||
2029 | ||||
2030 | void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { | |||
2031 | fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); | |||
2032 | Label L; | |||
2033 | if (unordered_is_less) { | |||
2034 | movl(dst, -1); | |||
2035 | jcc(Assembler::parity, L); | |||
2036 | jcc(Assembler::below , L); | |||
2037 | movl(dst, 0); | |||
2038 | jcc(Assembler::equal , L); | |||
2039 | increment(dst); | |||
2040 | } else { // unordered is greater | |||
2041 | movl(dst, 1); | |||
2042 | jcc(Assembler::parity, L); | |||
2043 | jcc(Assembler::above , L); | |||
2044 | movl(dst, 0); | |||
2045 | jcc(Assembler::equal , L); | |||
2046 | decrementl(dst); | |||
2047 | } | |||
2048 | bind(L); | |||
2049 | } | |||
2050 | ||||
2051 | void MacroAssembler::fld_d(AddressLiteral src) { | |||
2052 | fld_d(as_Address(src)); | |||
2053 | } | |||
2054 | ||||
2055 | void MacroAssembler::fld_s(AddressLiteral src) { | |||
2056 | fld_s(as_Address(src)); | |||
2057 | } | |||
2058 | ||||
2059 | void MacroAssembler::fldcw(AddressLiteral src) { | |||
2060 | Assembler::fldcw(as_Address(src)); | |||
2061 | } | |||
2062 | ||||
2063 | void MacroAssembler::fpop() { | |||
2064 | ffree(); | |||
2065 | fincstp(); | |||
2066 | } | |||
2067 | ||||
2068 | void MacroAssembler::fremr(Register tmp) { | |||
2069 | save_rax(tmp); | |||
2070 | { Label L; | |||
2071 | bind(L); | |||
2072 | fprem(); | |||
2073 | fwait(); fnstsw_ax(); | |||
2074 | sahf(); | |||
2075 | jcc(Assembler::parity, L); | |||
2076 | } | |||
2077 | restore_rax(tmp); | |||
2078 | // Result is in ST0. | |||
2079 | // Note: fxch & fpop to get rid of ST1 | |||
2080 | // (otherwise FPU stack could overflow eventually) | |||
2081 | fxch(1); | |||
2082 | fpop(); | |||
2083 | } | |||
2084 | ||||
2085 | void MacroAssembler::empty_FPU_stack() { | |||
2086 | if (VM_Version::supports_mmx()) { | |||
2087 | emms(); | |||
2088 | } else { | |||
2089 | for (int i = 8; i-- > 0; ) ffree(i); | |||
2090 | } | |||
2091 | } | |||
2092 | #endif // !LP64 | |||
2093 | ||||
2094 | void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) { | |||
2095 | if (reachable(src)) { | |||
2096 | Assembler::mulpd(dst, as_Address(src)); | |||
2097 | } else { | |||
2098 | lea(rscratch1, src); | |||
2099 | Assembler::mulpd(dst, Address(rscratch1, 0)); | |||
2100 | } | |||
2101 | } | |||
2102 | ||||
2103 | void MacroAssembler::load_float(Address src) { | |||
2104 | #ifdef _LP641 | |||
2105 | movflt(xmm0, src); | |||
2106 | #else | |||
2107 | if (UseSSE >= 1) { | |||
2108 | movflt(xmm0, src); | |||
2109 | } else { | |||
2110 | fld_s(src); | |||
2111 | } | |||
2112 | #endif // LP64 | |||
2113 | } | |||
2114 | ||||
2115 | void MacroAssembler::store_float(Address dst) { | |||
2116 | #ifdef _LP641 | |||
2117 | movflt(dst, xmm0); | |||
2118 | #else | |||
2119 | if (UseSSE >= 1) { | |||
2120 | movflt(dst, xmm0); | |||
2121 | } else { | |||
2122 | fstp_s(dst); | |||
2123 | } | |||
2124 | #endif // LP64 | |||
2125 | } | |||
2126 | ||||
2127 | void MacroAssembler::load_double(Address src) { | |||
2128 | #ifdef _LP641 | |||
2129 | movdbl(xmm0, src); | |||
2130 | #else | |||
2131 | if (UseSSE >= 2) { | |||
2132 | movdbl(xmm0, src); | |||
2133 | } else { | |||
2134 | fld_d(src); | |||
2135 | } | |||
2136 | #endif // LP64 | |||
2137 | } | |||
2138 | ||||
2139 | void MacroAssembler::store_double(Address dst) { | |||
2140 | #ifdef _LP641 | |||
2141 | movdbl(dst, xmm0); | |||
2142 | #else | |||
2143 | if (UseSSE >= 2) { | |||
2144 | movdbl(dst, xmm0); | |||
2145 | } else { | |||
2146 | fstp_d(dst); | |||
2147 | } | |||
2148 | #endif // LP64 | |||
2149 | } | |||
2150 | ||||
2151 | // dst = c = a * b + c | |||
2152 | void MacroAssembler::fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) { | |||
2153 | Assembler::vfmadd231sd(c, a, b); | |||
2154 | if (dst != c) { | |||
2155 | movdbl(dst, c); | |||
2156 | } | |||
2157 | } | |||
2158 | ||||
2159 | // dst = c = a * b + c | |||
2160 | void MacroAssembler::fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c) { | |||
2161 | Assembler::vfmadd231ss(c, a, b); | |||
2162 | if (dst != c) { | |||
2163 | movflt(dst, c); | |||
2164 | } | |||
2165 | } | |||
2166 | ||||
2167 | // dst = c = a * b + c | |||
2168 | void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { | |||
2169 | Assembler::vfmadd231pd(c, a, b, vector_len); | |||
2170 | if (dst != c) { | |||
2171 | vmovdqu(dst, c); | |||
2172 | } | |||
2173 | } | |||
2174 | ||||
2175 | // dst = c = a * b + c | |||
2176 | void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c, int vector_len) { | |||
2177 | Assembler::vfmadd231ps(c, a, b, vector_len); | |||
2178 | if (dst != c) { | |||
2179 | vmovdqu(dst, c); | |||
2180 | } | |||
2181 | } | |||
2182 | ||||
2183 | // dst = c = a * b + c | |||
2184 | void MacroAssembler::vfmad(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { | |||
2185 | Assembler::vfmadd231pd(c, a, b, vector_len); | |||
2186 | if (dst != c) { | |||
2187 | vmovdqu(dst, c); | |||
2188 | } | |||
2189 | } | |||
2190 | ||||
2191 | // dst = c = a * b + c | |||
2192 | void MacroAssembler::vfmaf(XMMRegister dst, XMMRegister a, Address b, XMMRegister c, int vector_len) { | |||
2193 | Assembler::vfmadd231ps(c, a, b, vector_len); | |||
2194 | if (dst != c) { | |||
2195 | vmovdqu(dst, c); | |||
2196 | } | |||
2197 | } | |||
2198 | ||||
2199 | void MacroAssembler::incrementl(AddressLiteral dst) { | |||
2200 | if (reachable(dst)) { | |||
2201 | incrementl(as_Address(dst)); | |||
2202 | } else { | |||
2203 | lea(rscratch1, dst); | |||
2204 | incrementl(Address(rscratch1, 0)); | |||
2205 | } | |||
2206 | } | |||
2207 | ||||
2208 | void MacroAssembler::incrementl(ArrayAddress dst) { | |||
2209 | incrementl(as_Address(dst)); | |||
2210 | } | |||
2211 | ||||
2212 | void MacroAssembler::incrementl(Register reg, int value) { | |||
2213 | if (value == min_jint) {addl(reg, value) ; return; } | |||
2214 | if (value < 0) { decrementl(reg, -value); return; } | |||
2215 | if (value == 0) { ; return; } | |||
2216 | if (value == 1 && UseIncDec) { incl(reg) ; return; } | |||
2217 | /* else */ { addl(reg, value) ; return; } | |||
2218 | } | |||
2219 | ||||
2220 | void MacroAssembler::incrementl(Address dst, int value) { | |||
2221 | if (value == min_jint) {addl(dst, value) ; return; } | |||
2222 | if (value < 0) { decrementl(dst, -value); return; } | |||
2223 | if (value == 0) { ; return; } | |||
2224 | if (value == 1 && UseIncDec) { incl(dst) ; return; } | |||
2225 | /* else */ { addl(dst, value) ; return; } | |||
2226 | } | |||
2227 | ||||
2228 | void MacroAssembler::jump(AddressLiteral dst) { | |||
2229 | if (reachable(dst)) { | |||
2230 | jmp_literal(dst.target(), dst.rspec()); | |||
2231 | } else { | |||
2232 | lea(rscratch1, dst); | |||
2233 | jmp(rscratch1); | |||
2234 | } | |||
2235 | } | |||
2236 | ||||
2237 | void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { | |||
2238 | if (reachable(dst)) { | |||
2239 | InstructionMark im(this); | |||
2240 | relocate(dst.reloc()); | |||
2241 | const int short_size = 2; | |||
2242 | const int long_size = 6; | |||
2243 | int offs = (intptr_t)dst.target() - ((intptr_t)pc()); | |||
2244 | if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { | |||
2245 | // 0111 tttn #8-bit disp | |||
2246 | emit_int8(0x70 | cc); | |||
2247 | emit_int8((offs - short_size) & 0xFF); | |||
2248 | } else { | |||
2249 | // 0000 1111 1000 tttn #32-bit disp | |||
2250 | emit_int8(0x0F); | |||
2251 | emit_int8((unsigned char)(0x80 | cc)); | |||
2252 | emit_int32(offs - long_size); | |||
2253 | } | |||
2254 | } else { | |||
2255 | #ifdef ASSERT1 | |||
2256 | warning("reversing conditional branch"); | |||
2257 | #endif /* ASSERT */ | |||
2258 | Label skip; | |||
2259 | jccb(reverse[cc], skip)jccb_0(reverse[cc], skip, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2259); | |||
2260 | lea(rscratch1, dst); | |||
2261 | Assembler::jmp(rscratch1); | |||
2262 | bind(skip); | |||
2263 | } | |||
2264 | } | |||
2265 | ||||
2266 | void MacroAssembler::fld_x(AddressLiteral src) { | |||
2267 | Assembler::fld_x(as_Address(src)); | |||
2268 | } | |||
2269 | ||||
2270 | void MacroAssembler::ldmxcsr(AddressLiteral src) { | |||
2271 | if (reachable(src)) { | |||
2272 | Assembler::ldmxcsr(as_Address(src)); | |||
2273 | } else { | |||
2274 | lea(rscratch1, src); | |||
2275 | Assembler::ldmxcsr(Address(rscratch1, 0)); | |||
2276 | } | |||
2277 | } | |||
2278 | ||||
2279 | int MacroAssembler::load_signed_byte(Register dst, Address src) { | |||
2280 | int off; | |||
2281 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { | |||
2282 | off = offset(); | |||
2283 | movsbl(dst, src); // movsxb | |||
2284 | } else { | |||
2285 | off = load_unsigned_byte(dst, src); | |||
2286 | shll(dst, 24); | |||
2287 | sarl(dst, 24); | |||
2288 | } | |||
2289 | return off; | |||
2290 | } | |||
2291 | ||||
2292 | // Note: load_signed_short used to be called load_signed_word. | |||
2293 | // Although the 'w' in x86 opcodes refers to the term "word" in the assembler | |||
2294 | // manual, which means 16 bits, that usage is found nowhere in HotSpot code. | |||
2295 | // The term "word" in HotSpot means a 32- or 64-bit machine word. | |||
2296 | int MacroAssembler::load_signed_short(Register dst, Address src) { | |||
2297 | int off; | |||
2298 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { | |||
2299 | // This is dubious to me since it seems safe to do a signed 16 => 64 bit | |||
2300 | // version but this is what 64bit has always done. This seems to imply | |||
2301 | // that users are only using 32bits worth. | |||
2302 | off = offset(); | |||
2303 | movswl(dst, src); // movsxw | |||
2304 | } else { | |||
2305 | off = load_unsigned_short(dst, src); | |||
2306 | shll(dst, 16); | |||
2307 | sarl(dst, 16); | |||
2308 | } | |||
2309 | return off; | |||
2310 | } | |||
2311 | ||||
2312 | int MacroAssembler::load_unsigned_byte(Register dst, Address src) { | |||
2313 | // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |||
2314 | // and "3.9 Partial Register Penalties", p. 22). | |||
2315 | int off; | |||
2316 | if (LP64_ONLY(true || )true || VM_Version::is_P6() || src.uses(dst)) { | |||
2317 | off = offset(); | |||
2318 | movzbl(dst, src); // movzxb | |||
2319 | } else { | |||
2320 | xorl(dst, dst); | |||
2321 | off = offset(); | |||
2322 | movb(dst, src); | |||
2323 | } | |||
2324 | return off; | |||
2325 | } | |||
2326 | ||||
2327 | // Note: load_unsigned_short used to be called load_unsigned_word. | |||
2328 | int MacroAssembler::load_unsigned_short(Register dst, Address src) { | |||
2329 | // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, | |||
2330 | // and "3.9 Partial Register Penalties", p. 22). | |||
2331 | int off; | |||
2332 | if (LP64_ONLY(true ||)true || VM_Version::is_P6() || src.uses(dst)) { | |||
2333 | off = offset(); | |||
2334 | movzwl(dst, src); // movzxw | |||
2335 | } else { | |||
2336 | xorl(dst, dst); | |||
2337 | off = offset(); | |||
2338 | movw(dst, src); | |||
2339 | } | |||
2340 | return off; | |||
2341 | } | |||
2342 | ||||
2343 | void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { | |||
2344 | switch (size_in_bytes) { | |||
2345 | #ifndef _LP641 | |||
2346 | case 8: | |||
2347 | assert(dst2 != noreg, "second dest register required")do { if (!(dst2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2347, "assert(" "dst2 != noreg" ") failed", "second dest register required" ); ::breakpoint(); } } while (0); | |||
2348 | movl(dst, src); | |||
2349 | movl(dst2, src.plus_disp(BytesPerInt)); | |||
2350 | break; | |||
2351 | #else | |||
2352 | case 8: movq(dst, src); break; | |||
2353 | #endif | |||
2354 | case 4: movl(dst, src); break; | |||
2355 | case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; | |||
2356 | case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; | |||
2357 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2357); ::breakpoint(); } while (0); | |||
2358 | } | |||
2359 | } | |||
2360 | ||||
2361 | void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { | |||
2362 | switch (size_in_bytes) { | |||
2363 | #ifndef _LP641 | |||
2364 | case 8: | |||
2365 | assert(src2 != noreg, "second source register required")do { if (!(src2 != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2365, "assert(" "src2 != noreg" ") failed", "second source register required" ); ::breakpoint(); } } while (0); | |||
2366 | movl(dst, src); | |||
2367 | movl(dst.plus_disp(BytesPerInt), src2); | |||
2368 | break; | |||
2369 | #else | |||
2370 | case 8: movq(dst, src); break; | |||
2371 | #endif | |||
2372 | case 4: movl(dst, src); break; | |||
2373 | case 2: movw(dst, src); break; | |||
2374 | case 1: movb(dst, src); break; | |||
2375 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2375); ::breakpoint(); } while (0); | |||
2376 | } | |||
2377 | } | |||
2378 | ||||
2379 | void MacroAssembler::mov32(AddressLiteral dst, Register src) { | |||
2380 | if (reachable(dst)) { | |||
2381 | movl(as_Address(dst), src); | |||
2382 | } else { | |||
2383 | lea(rscratch1, dst); | |||
2384 | movl(Address(rscratch1, 0), src); | |||
2385 | } | |||
2386 | } | |||
2387 | ||||
2388 | void MacroAssembler::mov32(Register dst, AddressLiteral src) { | |||
2389 | if (reachable(src)) { | |||
2390 | movl(dst, as_Address(src)); | |||
2391 | } else { | |||
2392 | lea(rscratch1, src); | |||
2393 | movl(dst, Address(rscratch1, 0)); | |||
2394 | } | |||
2395 | } | |||
2396 | ||||
2397 | // C++ bool manipulation | |||
2398 | ||||
2399 | void MacroAssembler::movbool(Register dst, Address src) { | |||
2400 | if(sizeof(bool) == 1) | |||
2401 | movb(dst, src); | |||
2402 | else if(sizeof(bool) == 2) | |||
2403 | movw(dst, src); | |||
2404 | else if(sizeof(bool) == 4) | |||
2405 | movl(dst, src); | |||
2406 | else | |||
2407 | // unsupported | |||
2408 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2408); ::breakpoint(); } while (0); | |||
2409 | } | |||
2410 | ||||
2411 | void MacroAssembler::movbool(Address dst, bool boolconst) { | |||
2412 | if(sizeof(bool) == 1) | |||
2413 | movb(dst, (int) boolconst); | |||
2414 | else if(sizeof(bool) == 2) | |||
2415 | movw(dst, (int) boolconst); | |||
2416 | else if(sizeof(bool) == 4) | |||
2417 | movl(dst, (int) boolconst); | |||
2418 | else | |||
2419 | // unsupported | |||
2420 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2420); ::breakpoint(); } while (0); | |||
2421 | } | |||
2422 | ||||
2423 | void MacroAssembler::movbool(Address dst, Register src) { | |||
2424 | if(sizeof(bool) == 1) | |||
2425 | movb(dst, src); | |||
2426 | else if(sizeof(bool) == 2) | |||
2427 | movw(dst, src); | |||
2428 | else if(sizeof(bool) == 4) | |||
2429 | movl(dst, src); | |||
2430 | else | |||
2431 | // unsupported | |||
2432 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2432); ::breakpoint(); } while (0); | |||
2433 | } | |||
2434 | ||||
2435 | void MacroAssembler::movbyte(ArrayAddress dst, int src) { | |||
2436 | movb(as_Address(dst), src); | |||
2437 | } | |||
2438 | ||||
2439 | void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { | |||
2440 | if (reachable(src)) { | |||
2441 | movdl(dst, as_Address(src)); | |||
2442 | } else { | |||
2443 | lea(rscratch1, src); | |||
2444 | movdl(dst, Address(rscratch1, 0)); | |||
2445 | } | |||
2446 | } | |||
2447 | ||||
2448 | void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { | |||
2449 | if (reachable(src)) { | |||
2450 | movq(dst, as_Address(src)); | |||
2451 | } else { | |||
2452 | lea(rscratch1, src); | |||
2453 | movq(dst, Address(rscratch1, 0)); | |||
2454 | } | |||
2455 | } | |||
2456 | ||||
2457 | void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { | |||
2458 | if (reachable(src)) { | |||
2459 | if (UseXmmLoadAndClearUpper) { | |||
2460 | movsd (dst, as_Address(src)); | |||
2461 | } else { | |||
2462 | movlpd(dst, as_Address(src)); | |||
2463 | } | |||
2464 | } else { | |||
2465 | lea(rscratch1, src); | |||
2466 | if (UseXmmLoadAndClearUpper) { | |||
2467 | movsd (dst, Address(rscratch1, 0)); | |||
2468 | } else { | |||
2469 | movlpd(dst, Address(rscratch1, 0)); | |||
2470 | } | |||
2471 | } | |||
2472 | } | |||
2473 | ||||
2474 | void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { | |||
2475 | if (reachable(src)) { | |||
2476 | movss(dst, as_Address(src)); | |||
2477 | } else { | |||
2478 | lea(rscratch1, src); | |||
2479 | movss(dst, Address(rscratch1, 0)); | |||
2480 | } | |||
2481 | } | |||
2482 | ||||
2483 | void MacroAssembler::movptr(Register dst, Register src) { | |||
2484 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); | |||
2485 | } | |||
2486 | ||||
2487 | void MacroAssembler::movptr(Register dst, Address src) { | |||
2488 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); | |||
2489 | } | |||
2490 | ||||
2491 | // src should NEVER be a real pointer. Use AddressLiteral for true pointers | |||
2492 | void MacroAssembler::movptr(Register dst, intptr_t src) { | |||
2493 | LP64_ONLY(mov64(dst, src))mov64(dst, src) NOT_LP64(movl(dst, src)); | |||
2494 | } | |||
2495 | ||||
2496 | void MacroAssembler::movptr(Address dst, Register src) { | |||
2497 | LP64_ONLY(movq(dst, src))movq(dst, src) NOT_LP64(movl(dst, src)); | |||
2498 | } | |||
2499 | ||||
2500 | void MacroAssembler::movdqu(Address dst, XMMRegister src) { | |||
2501 | assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2501, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2502 | Assembler::movdqu(dst, src); | |||
2503 | } | |||
2504 | ||||
2505 | void MacroAssembler::movdqu(XMMRegister dst, Address src) { | |||
2506 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2506, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
| ||||
2507 | Assembler::movdqu(dst, src); | |||
2508 | } | |||
2509 | ||||
2510 | void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { | |||
2511 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2511, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2512 | Assembler::movdqu(dst, src); | |||
2513 | } | |||
2514 | ||||
2515 | void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) { | |||
2516 | if (reachable(src)) { | |||
2517 | movdqu(dst, as_Address(src)); | |||
2518 | } else { | |||
2519 | lea(scratchReg, src); | |||
2520 | movdqu(dst, Address(scratchReg, 0)); | |||
2521 | } | |||
2522 | } | |||
2523 | ||||
2524 | void MacroAssembler::vmovdqu(Address dst, XMMRegister src) { | |||
2525 | assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((src->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2525, "assert(" "((src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2526 | Assembler::vmovdqu(dst, src); | |||
2527 | } | |||
2528 | ||||
2529 | void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { | |||
2530 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2530, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2531 | Assembler::vmovdqu(dst, src); | |||
2532 | } | |||
2533 | ||||
2534 | void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { | |||
2535 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vl()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2535, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2536 | Assembler::vmovdqu(dst, src); | |||
2537 | } | |||
2538 | ||||
2539 | void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) { | |||
2540 | if (reachable(src)) { | |||
2541 | vmovdqu(dst, as_Address(src)); | |||
2542 | } | |||
2543 | else { | |||
2544 | lea(scratch_reg, src); | |||
2545 | vmovdqu(dst, Address(scratch_reg, 0)); | |||
2546 | } | |||
2547 | } | |||
2548 | ||||
2549 | void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len) { | |||
2550 | assert(vector_len <= AVX_256bit, "AVX2 vector length")do { if (!(vector_len <= AVX_256bit)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2550, "assert(" "vector_len <= AVX_256bit" ") failed", "AVX2 vector length" ); ::breakpoint(); } } while (0); | |||
2551 | if (vector_len == AVX_256bit) { | |||
2552 | vmovdqu(dst, src, scratch_reg); | |||
2553 | } else { | |||
2554 | movdqu(dst, src, scratch_reg); | |||
2555 | } | |||
2556 | } | |||
2557 | ||||
2558 | void MacroAssembler::kmov(KRegister dst, Address src) { | |||
2559 | if (VM_Version::supports_avx512bw()) { | |||
2560 | kmovql(dst, src); | |||
2561 | } else { | |||
2562 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2562, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
2563 | kmovwl(dst, src); | |||
2564 | } | |||
2565 | } | |||
2566 | ||||
2567 | void MacroAssembler::kmov(Address dst, KRegister src) { | |||
2568 | if (VM_Version::supports_avx512bw()) { | |||
2569 | kmovql(dst, src); | |||
2570 | } else { | |||
2571 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2571, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
2572 | kmovwl(dst, src); | |||
2573 | } | |||
2574 | } | |||
2575 | ||||
2576 | void MacroAssembler::kmov(KRegister dst, KRegister src) { | |||
2577 | if (VM_Version::supports_avx512bw()) { | |||
2578 | kmovql(dst, src); | |||
2579 | } else { | |||
2580 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2580, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
2581 | kmovwl(dst, src); | |||
2582 | } | |||
2583 | } | |||
2584 | ||||
2585 | void MacroAssembler::kmov(Register dst, KRegister src) { | |||
2586 | if (VM_Version::supports_avx512bw()) { | |||
2587 | kmovql(dst, src); | |||
2588 | } else { | |||
2589 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2589, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
2590 | kmovwl(dst, src); | |||
2591 | } | |||
2592 | } | |||
2593 | ||||
2594 | void MacroAssembler::kmov(KRegister dst, Register src) { | |||
2595 | if (VM_Version::supports_avx512bw()) { | |||
2596 | kmovql(dst, src); | |||
2597 | } else { | |||
2598 | assert(VM_Version::supports_evex(), "")do { if (!(VM_Version::supports_evex())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2598, "assert(" "VM_Version::supports_evex()" ") failed", "" ); ::breakpoint(); } } while (0); | |||
2599 | kmovwl(dst, src); | |||
2600 | } | |||
2601 | } | |||
2602 | ||||
2603 | void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) { | |||
2604 | if (reachable(src)) { | |||
2605 | kmovql(dst, as_Address(src)); | |||
2606 | } else { | |||
2607 | lea(scratch_reg, src); | |||
2608 | kmovql(dst, Address(scratch_reg, 0)); | |||
2609 | } | |||
2610 | } | |||
2611 | ||||
2612 | void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { | |||
2613 | if (reachable(src)) { | |||
2614 | kmovwl(dst, as_Address(src)); | |||
2615 | } else { | |||
2616 | lea(scratch_reg, src); | |||
2617 | kmovwl(dst, Address(scratch_reg, 0)); | |||
2618 | } | |||
2619 | } | |||
2620 | ||||
2621 | void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, | |||
2622 | int vector_len, Register scratch_reg) { | |||
2623 | if (reachable(src)) { | |||
2624 | if (mask == k0) { | |||
2625 | Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); | |||
2626 | } else { | |||
2627 | Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); | |||
2628 | } | |||
2629 | } else { | |||
2630 | lea(scratch_reg, src); | |||
2631 | if (mask == k0) { | |||
2632 | Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); | |||
2633 | } else { | |||
2634 | Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); | |||
2635 | } | |||
2636 | } | |||
2637 | } | |||
2638 | ||||
2639 | void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, | |||
2640 | int vector_len, Register scratch_reg) { | |||
2641 | if (reachable(src)) { | |||
2642 | Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); | |||
2643 | } else { | |||
2644 | lea(scratch_reg, src); | |||
2645 | Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); | |||
2646 | } | |||
2647 | } | |||
2648 | ||||
2649 | void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, | |||
2650 | int vector_len, Register scratch_reg) { | |||
2651 | if (reachable(src)) { | |||
2652 | Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); | |||
2653 | } else { | |||
2654 | lea(scratch_reg, src); | |||
2655 | Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); | |||
2656 | } | |||
2657 | } | |||
2658 | ||||
2659 | void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, | |||
2660 | int vector_len, Register scratch_reg) { | |||
2661 | if (reachable(src)) { | |||
2662 | Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); | |||
2663 | } else { | |||
2664 | lea(scratch_reg, src); | |||
2665 | Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); | |||
2666 | } | |||
2667 | } | |||
2668 | ||||
2669 | void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { | |||
2670 | if (reachable(src)) { | |||
2671 | Assembler::evmovdquq(dst, as_Address(src), vector_len); | |||
2672 | } else { | |||
2673 | lea(rscratch, src); | |||
2674 | Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len); | |||
2675 | } | |||
2676 | } | |||
2677 | ||||
2678 | void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { | |||
2679 | if (reachable(src)) { | |||
2680 | Assembler::movdqa(dst, as_Address(src)); | |||
2681 | } else { | |||
2682 | lea(rscratch1, src); | |||
2683 | Assembler::movdqa(dst, Address(rscratch1, 0)); | |||
2684 | } | |||
2685 | } | |||
2686 | ||||
2687 | void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { | |||
2688 | if (reachable(src)) { | |||
2689 | Assembler::movsd(dst, as_Address(src)); | |||
2690 | } else { | |||
2691 | lea(rscratch1, src); | |||
2692 | Assembler::movsd(dst, Address(rscratch1, 0)); | |||
2693 | } | |||
2694 | } | |||
2695 | ||||
2696 | void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { | |||
2697 | if (reachable(src)) { | |||
2698 | Assembler::movss(dst, as_Address(src)); | |||
2699 | } else { | |||
2700 | lea(rscratch1, src); | |||
2701 | Assembler::movss(dst, Address(rscratch1, 0)); | |||
2702 | } | |||
2703 | } | |||
2704 | ||||
2705 | void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { | |||
2706 | if (reachable(src)) { | |||
2707 | Assembler::mulsd(dst, as_Address(src)); | |||
2708 | } else { | |||
2709 | lea(rscratch1, src); | |||
2710 | Assembler::mulsd(dst, Address(rscratch1, 0)); | |||
2711 | } | |||
2712 | } | |||
2713 | ||||
2714 | void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { | |||
2715 | if (reachable(src)) { | |||
2716 | Assembler::mulss(dst, as_Address(src)); | |||
2717 | } else { | |||
2718 | lea(rscratch1, src); | |||
2719 | Assembler::mulss(dst, Address(rscratch1, 0)); | |||
2720 | } | |||
2721 | } | |||
2722 | ||||
2723 | void MacroAssembler::null_check(Register reg, int offset) { | |||
2724 | if (needs_explicit_null_check(offset)) { | |||
2725 | // provoke OS NULL exception if reg = NULL by | |||
2726 | // accessing M[reg] w/o changing any (non-CC) registers | |||
2727 | // NOTE: cmpl is plenty here to provoke a segv | |||
2728 | cmpptr(rax, Address(reg, 0)); | |||
2729 | // Note: should probably use testl(rax, Address(reg, 0)); | |||
2730 | // may be shorter code (however, this version of | |||
2731 | // testl needs to be implemented first) | |||
2732 | } else { | |||
2733 | // nothing to do, (later) access of M[reg + offset] | |||
2734 | // will provoke OS NULL exception if reg = NULL | |||
2735 | } | |||
2736 | } | |||
2737 | ||||
2738 | void MacroAssembler::os_breakpoint() { | |||
2739 | // instead of directly emitting a breakpoint, call os:breakpoint for better debugability | |||
2740 | // (e.g., MSVC can't call ps() otherwise) | |||
2741 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)((address)((address_word)(os::breakpoint))))); | |||
2742 | } | |||
2743 | ||||
2744 | void MacroAssembler::unimplemented(const char* what) { | |||
2745 | const char* buf = NULL__null; | |||
2746 | { | |||
2747 | ResourceMark rm; | |||
2748 | stringStream ss; | |||
2749 | ss.print("unimplemented: %s", what); | |||
2750 | buf = code_string(ss.as_string()); | |||
2751 | } | |||
2752 | stop(buf); | |||
2753 | } | |||
2754 | ||||
2755 | #ifdef _LP641 | |||
2756 | #define XSTATE_BV0x200 0x200 | |||
2757 | #endif | |||
2758 | ||||
2759 | void MacroAssembler::pop_CPU_state() { | |||
2760 | pop_FPU_state(); | |||
2761 | pop_IU_state(); | |||
2762 | } | |||
2763 | ||||
2764 | void MacroAssembler::pop_FPU_state() { | |||
2765 | #ifndef _LP641 | |||
2766 | frstor(Address(rsp, 0)); | |||
2767 | #else | |||
2768 | fxrstor(Address(rsp, 0)); | |||
2769 | #endif | |||
2770 | addptr(rsp, FPUStateSizeInWords * wordSize); | |||
2771 | } | |||
2772 | ||||
2773 | void MacroAssembler::pop_IU_state() { | |||
2774 | popa(); | |||
2775 | LP64_ONLY(addq(rsp, 8))addq(rsp, 8); | |||
2776 | popf(); | |||
2777 | } | |||
2778 | ||||
2779 | // Save Integer and Float state | |||
2780 | // Warning: Stack must be 16 byte aligned (64bit) | |||
2781 | void MacroAssembler::push_CPU_state() { | |||
2782 | push_IU_state(); | |||
2783 | push_FPU_state(); | |||
2784 | } | |||
2785 | ||||
2786 | void MacroAssembler::push_FPU_state() { | |||
2787 | subptr(rsp, FPUStateSizeInWords * wordSize); | |||
2788 | #ifndef _LP641 | |||
2789 | fnsave(Address(rsp, 0)); | |||
2790 | fwait(); | |||
2791 | #else | |||
2792 | fxsave(Address(rsp, 0)); | |||
2793 | #endif // LP64 | |||
2794 | } | |||
2795 | ||||
2796 | void MacroAssembler::push_IU_state() { | |||
2797 | // Push flags first because pusha kills them | |||
2798 | pushf(); | |||
2799 | // Make sure rsp stays 16-byte aligned | |||
2800 | LP64_ONLY(subq(rsp, 8))subq(rsp, 8); | |||
2801 | pusha(); | |||
2802 | } | |||
2803 | ||||
2804 | void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { // determine java_thread register | |||
2805 | if (!java_thread->is_valid()) { | |||
2806 | java_thread = rdi; | |||
2807 | get_thread(java_thread); | |||
2808 | } | |||
2809 | // we must set sp to zero to clear frame | |||
2810 | movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD0L); | |||
2811 | // must clear fp, so that compiled frames are not confused; it is | |||
2812 | // possible that we need it only for debugging | |||
2813 | if (clear_fp) { | |||
2814 | movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD0L); | |||
2815 | } | |||
2816 | // Always clear the pc because it could have been set by make_walkable() | |||
2817 | movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD0L); | |||
2818 | vzeroupper(); | |||
2819 | } | |||
2820 | ||||
2821 | void MacroAssembler::restore_rax(Register tmp) { | |||
2822 | if (tmp == noreg) pop(rax); | |||
2823 | else if (tmp != rax) mov(rax, tmp); | |||
2824 | } | |||
2825 | ||||
2826 | void MacroAssembler::round_to(Register reg, int modulus) { | |||
2827 | addptr(reg, modulus - 1); | |||
2828 | andptr(reg, -modulus); | |||
2829 | } | |||
2830 | ||||
2831 | void MacroAssembler::save_rax(Register tmp) { | |||
2832 | if (tmp == noreg) push(rax); | |||
2833 | else if (tmp != rax) mov(tmp, rax); | |||
2834 | } | |||
2835 | ||||
2836 | void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) { | |||
2837 | if (at_return) { | |||
2838 | // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, | |||
2839 | // we may safely use rsp instead to perform the stack watermark check. | |||
2840 | cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, JavaThread::polling_word_offset())); | |||
2841 | jcc(Assembler::above, slow_path); | |||
2842 | return; | |||
2843 | } | |||
2844 | testb(Address(thread_reg, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); | |||
2845 | jcc(Assembler::notZero, slow_path); // handshake bit set implies poll | |||
2846 | } | |||
2847 | ||||
2848 | // Calls to C land | |||
2849 | // | |||
2850 | // When entering C land, the rbp, & rsp of the last Java frame have to be recorded | |||
2851 | // in the (thread-local) JavaThread object. When leaving C land, the last Java fp | |||
2852 | // has to be reset to 0. This is required to allow proper stack traversal. | |||
2853 | void MacroAssembler::set_last_Java_frame(Register java_thread, | |||
2854 | Register last_java_sp, | |||
2855 | Register last_java_fp, | |||
2856 | address last_java_pc) { | |||
2857 | vzeroupper(); | |||
2858 | // determine java_thread register | |||
2859 | if (!java_thread->is_valid()) { | |||
2860 | java_thread = rdi; | |||
2861 | get_thread(java_thread); | |||
2862 | } | |||
2863 | // determine last_java_sp register | |||
2864 | if (!last_java_sp->is_valid()) { | |||
2865 | last_java_sp = rsp; | |||
2866 | } | |||
2867 | ||||
2868 | // last_java_fp is optional | |||
2869 | ||||
2870 | if (last_java_fp->is_valid()) { | |||
2871 | movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); | |||
2872 | } | |||
2873 | ||||
2874 | // last_java_pc is optional | |||
2875 | ||||
2876 | if (last_java_pc != NULL__null) { | |||
2877 | lea(Address(java_thread, | |||
2878 | JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), | |||
2879 | InternalAddress(last_java_pc)); | |||
2880 | ||||
2881 | } | |||
2882 | movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); | |||
2883 | } | |||
2884 | ||||
2885 | void MacroAssembler::shlptr(Register dst, int imm8) { | |||
2886 | LP64_ONLY(shlq(dst, imm8))shlq(dst, imm8) NOT_LP64(shll(dst, imm8)); | |||
2887 | } | |||
2888 | ||||
2889 | void MacroAssembler::shrptr(Register dst, int imm8) { | |||
2890 | LP64_ONLY(shrq(dst, imm8))shrq(dst, imm8) NOT_LP64(shrl(dst, imm8)); | |||
2891 | } | |||
2892 | ||||
2893 | void MacroAssembler::sign_extend_byte(Register reg) { | |||
2894 | if (LP64_ONLY(true ||)true || (VM_Version::is_P6() && reg->has_byte_register())) { | |||
2895 | movsbl(reg, reg); // movsxb | |||
2896 | } else { | |||
2897 | shll(reg, 24); | |||
2898 | sarl(reg, 24); | |||
2899 | } | |||
2900 | } | |||
2901 | ||||
2902 | void MacroAssembler::sign_extend_short(Register reg) { | |||
2903 | if (LP64_ONLY(true ||)true || VM_Version::is_P6()) { | |||
2904 | movswl(reg, reg); // movsxw | |||
2905 | } else { | |||
2906 | shll(reg, 16); | |||
2907 | sarl(reg, 16); | |||
2908 | } | |||
2909 | } | |||
2910 | ||||
2911 | void MacroAssembler::testl(Register dst, AddressLiteral src) { | |||
2912 | assert(reachable(src), "Address should be reachable")do { if (!(reachable(src))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2912, "assert(" "reachable(src)" ") failed", "Address should be reachable" ); ::breakpoint(); } } while (0); | |||
2913 | testl(dst, as_Address(src)); | |||
2914 | } | |||
2915 | ||||
2916 | void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) { | |||
2917 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2917, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2918 | Assembler::pcmpeqb(dst, src); | |||
2919 | } | |||
2920 | ||||
2921 | void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) { | |||
2922 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2922, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2923 | Assembler::pcmpeqw(dst, src); | |||
2924 | } | |||
2925 | ||||
2926 | void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) { | |||
2927 | assert((dst->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2927, "assert(" "(dst->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); | |||
2928 | Assembler::pcmpestri(dst, src, imm8); | |||
2929 | } | |||
2930 | ||||
2931 | void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { | |||
2932 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2932, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2933 | Assembler::pcmpestri(dst, src, imm8); | |||
2934 | } | |||
2935 | ||||
2936 | void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) { | |||
2937 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2937, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2938 | Assembler::pmovzxbw(dst, src); | |||
2939 | } | |||
2940 | ||||
2941 | void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) { | |||
2942 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2942, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2943 | Assembler::pmovzxbw(dst, src); | |||
2944 | } | |||
2945 | ||||
2946 | void MacroAssembler::pmovmskb(Register dst, XMMRegister src) { | |||
2947 | assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2947, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); | |||
2948 | Assembler::pmovmskb(dst, src); | |||
2949 | } | |||
2950 | ||||
2951 | void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) { | |||
2952 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 2952, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
2953 | Assembler::ptest(dst, src); | |||
2954 | } | |||
2955 | ||||
2956 | void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { | |||
2957 | if (reachable(src)) { | |||
2958 | Assembler::sqrtsd(dst, as_Address(src)); | |||
2959 | } else { | |||
2960 | lea(rscratch1, src); | |||
2961 | Assembler::sqrtsd(dst, Address(rscratch1, 0)); | |||
2962 | } | |||
2963 | } | |||
2964 | ||||
2965 | void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { | |||
2966 | if (reachable(src)) { | |||
2967 | Assembler::sqrtss(dst, as_Address(src)); | |||
2968 | } else { | |||
2969 | lea(rscratch1, src); | |||
2970 | Assembler::sqrtss(dst, Address(rscratch1, 0)); | |||
2971 | } | |||
2972 | } | |||
2973 | ||||
2974 | void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { | |||
2975 | if (reachable(src)) { | |||
2976 | Assembler::subsd(dst, as_Address(src)); | |||
2977 | } else { | |||
2978 | lea(rscratch1, src); | |||
2979 | Assembler::subsd(dst, Address(rscratch1, 0)); | |||
2980 | } | |||
2981 | } | |||
2982 | ||||
2983 | void MacroAssembler::roundsd(XMMRegister dst, AddressLiteral src, int32_t rmode, Register scratch_reg) { | |||
2984 | if (reachable(src)) { | |||
2985 | Assembler::roundsd(dst, as_Address(src), rmode); | |||
2986 | } else { | |||
2987 | lea(scratch_reg, src); | |||
2988 | Assembler::roundsd(dst, Address(scratch_reg, 0), rmode); | |||
2989 | } | |||
2990 | } | |||
2991 | ||||
2992 | void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { | |||
2993 | if (reachable(src)) { | |||
2994 | Assembler::subss(dst, as_Address(src)); | |||
2995 | } else { | |||
2996 | lea(rscratch1, src); | |||
2997 | Assembler::subss(dst, Address(rscratch1, 0)); | |||
2998 | } | |||
2999 | } | |||
3000 | ||||
3001 | void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { | |||
3002 | if (reachable(src)) { | |||
3003 | Assembler::ucomisd(dst, as_Address(src)); | |||
3004 | } else { | |||
3005 | lea(rscratch1, src); | |||
3006 | Assembler::ucomisd(dst, Address(rscratch1, 0)); | |||
3007 | } | |||
3008 | } | |||
3009 | ||||
3010 | void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { | |||
3011 | if (reachable(src)) { | |||
3012 | Assembler::ucomiss(dst, as_Address(src)); | |||
3013 | } else { | |||
3014 | lea(rscratch1, src); | |||
3015 | Assembler::ucomiss(dst, Address(rscratch1, 0)); | |||
3016 | } | |||
3017 | } | |||
3018 | ||||
3019 | void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) { | |||
3020 | // Used in sign-bit flipping with aligned address. | |||
3021 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3021, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); | |||
3022 | if (reachable(src)) { | |||
3023 | Assembler::xorpd(dst, as_Address(src)); | |||
3024 | } else { | |||
3025 | lea(scratch_reg, src); | |||
3026 | Assembler::xorpd(dst, Address(scratch_reg, 0)); | |||
3027 | } | |||
3028 | } | |||
3029 | ||||
3030 | void MacroAssembler::xorpd(XMMRegister dst, XMMRegister src) { | |||
3031 | if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { | |||
3032 | Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); | |||
3033 | } | |||
3034 | else { | |||
3035 | Assembler::xorpd(dst, src); | |||
3036 | } | |||
3037 | } | |||
3038 | ||||
3039 | void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) { | |||
3040 | if (UseAVX > 2 && !VM_Version::supports_avx512dq() && (dst->encoding() == src->encoding())) { | |||
3041 | Assembler::vpxor(dst, dst, src, Assembler::AVX_512bit); | |||
3042 | } else { | |||
3043 | Assembler::xorps(dst, src); | |||
3044 | } | |||
3045 | } | |||
3046 | ||||
3047 | void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) { | |||
3048 | // Used in sign-bit flipping with aligned address. | |||
3049 | assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || (((intptr_t)src.target() & 15 ) == 0))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3049, "assert(" "(UseAVX > 0) || (((intptr_t)src.target() & 15) == 0)" ") failed", "SSE mode requires address alignment 16 bytes"); ::breakpoint(); } } while (0); | |||
3050 | if (reachable(src)) { | |||
3051 | Assembler::xorps(dst, as_Address(src)); | |||
3052 | } else { | |||
3053 | lea(scratch_reg, src); | |||
3054 | Assembler::xorps(dst, Address(scratch_reg, 0)); | |||
3055 | } | |||
3056 | } | |||
3057 | ||||
3058 | void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { | |||
3059 | // Used in sign-bit flipping with aligned address. | |||
3060 | bool aligned_adr = (((intptr_t)src.target() & 15) == 0); | |||
3061 | assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes")do { if (!((UseAVX > 0) || aligned_adr)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3061, "assert(" "(UseAVX > 0) || aligned_adr" ") failed" , "SSE mode requires address alignment 16 bytes"); ::breakpoint (); } } while (0); | |||
3062 | if (reachable(src)) { | |||
3063 | Assembler::pshufb(dst, as_Address(src)); | |||
3064 | } else { | |||
3065 | lea(rscratch1, src); | |||
3066 | Assembler::pshufb(dst, Address(rscratch1, 0)); | |||
3067 | } | |||
3068 | } | |||
3069 | ||||
3070 | // AVX 3-operands instructions | |||
3071 | ||||
3072 | void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3073 | if (reachable(src)) { | |||
3074 | vaddsd(dst, nds, as_Address(src)); | |||
3075 | } else { | |||
3076 | lea(rscratch1, src); | |||
3077 | vaddsd(dst, nds, Address(rscratch1, 0)); | |||
3078 | } | |||
3079 | } | |||
3080 | ||||
3081 | void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3082 | if (reachable(src)) { | |||
3083 | vaddss(dst, nds, as_Address(src)); | |||
3084 | } else { | |||
3085 | lea(rscratch1, src); | |||
3086 | vaddss(dst, nds, Address(rscratch1, 0)); | |||
3087 | } | |||
3088 | } | |||
3089 | ||||
3090 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { | |||
3091 | assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3091, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX" ); ::breakpoint(); } } while (0); | |||
3092 | if (reachable(src)) { | |||
3093 | Assembler::vpaddb(dst, nds, as_Address(src), vector_len); | |||
3094 | } else { | |||
3095 | lea(rscratch, src); | |||
3096 | Assembler::vpaddb(dst, nds, Address(rscratch, 0), vector_len); | |||
3097 | } | |||
3098 | } | |||
3099 | ||||
3100 | void MacroAssembler::vpaddd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) { | |||
3101 | assert(UseAVX > 0, "requires some form of AVX")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3101, "assert(" "UseAVX > 0" ") failed", "requires some form of AVX" ); ::breakpoint(); } } while (0); | |||
3102 | if (reachable(src)) { | |||
3103 | Assembler::vpaddd(dst, nds, as_Address(src), vector_len); | |||
3104 | } else { | |||
3105 | lea(rscratch, src); | |||
3106 | Assembler::vpaddd(dst, nds, Address(rscratch, 0), vector_len); | |||
3107 | } | |||
3108 | } | |||
3109 | ||||
3110 | void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { | |||
3111 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3111, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3112 | vandps(dst, nds, negate_field, vector_len); | |||
3113 | } | |||
3114 | ||||
3115 | void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) { | |||
3116 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vldq()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3116, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3117 | vandpd(dst, nds, negate_field, vector_len); | |||
3118 | } | |||
3119 | ||||
3120 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3121 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3121, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3122 | Assembler::vpaddb(dst, nds, src, vector_len); | |||
3123 | } | |||
3124 | ||||
3125 | void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
3126 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3126, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3127 | Assembler::vpaddb(dst, nds, src, vector_len); | |||
3128 | } | |||
3129 | ||||
3130 | void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3131 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3131, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3132 | Assembler::vpaddw(dst, nds, src, vector_len); | |||
3133 | } | |||
3134 | ||||
3135 | void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
3136 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3136, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3137 | Assembler::vpaddw(dst, nds, src, vector_len); | |||
3138 | } | |||
3139 | ||||
3140 | void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3141 | if (reachable(src)) { | |||
3142 | Assembler::vpand(dst, nds, as_Address(src), vector_len); | |||
3143 | } else { | |||
3144 | lea(scratch_reg, src); | |||
3145 | Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3146 | } | |||
3147 | } | |||
3148 | ||||
3149 | void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { | |||
3150 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3150, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3151 | Assembler::vpbroadcastw(dst, src, vector_len); | |||
3152 | } | |||
3153 | ||||
3154 | void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3155 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3155, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3156 | Assembler::vpcmpeqb(dst, nds, src, vector_len); | |||
3157 | } | |||
3158 | ||||
3159 | void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3160 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3160, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3161 | Assembler::vpcmpeqw(dst, nds, src, vector_len); | |||
3162 | } | |||
3163 | ||||
3164 | void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, | |||
3165 | AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3166 | if (reachable(src)) { | |||
3167 | Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); | |||
3168 | } else { | |||
3169 | lea(scratch_reg, src); | |||
3170 | Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); | |||
3171 | } | |||
3172 | } | |||
3173 | ||||
3174 | void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
3175 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { | |||
3176 | if (reachable(src)) { | |||
3177 | Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); | |||
3178 | } else { | |||
3179 | lea(scratch_reg, src); | |||
3180 | Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); | |||
3181 | } | |||
3182 | } | |||
3183 | ||||
3184 | void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
3185 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { | |||
3186 | if (reachable(src)) { | |||
3187 | Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); | |||
3188 | } else { | |||
3189 | lea(scratch_reg, src); | |||
3190 | Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); | |||
3191 | } | |||
3192 | } | |||
3193 | ||||
3194 | void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
3195 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { | |||
3196 | if (reachable(src)) { | |||
3197 | Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); | |||
3198 | } else { | |||
3199 | lea(scratch_reg, src); | |||
3200 | Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); | |||
3201 | } | |||
3202 | } | |||
3203 | ||||
3204 | void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
3205 | int comparison, bool is_signed, int vector_len, Register scratch_reg) { | |||
3206 | if (reachable(src)) { | |||
3207 | Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, is_signed, vector_len); | |||
3208 | } else { | |||
3209 | lea(scratch_reg, src); | |||
3210 | Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, is_signed, vector_len); | |||
3211 | } | |||
3212 | } | |||
3213 | ||||
3214 | void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { | |||
3215 | if (width == Assembler::Q) { | |||
3216 | Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); | |||
3217 | } else { | |||
3218 | Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); | |||
3219 | } | |||
3220 | } | |||
3221 | ||||
3222 | void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { | |||
3223 | int eq_cond_enc = 0x29; | |||
3224 | int gt_cond_enc = 0x37; | |||
3225 | if (width != Assembler::Q) { | |||
3226 | eq_cond_enc = 0x74 + width; | |||
3227 | gt_cond_enc = 0x64 + width; | |||
3228 | } | |||
3229 | switch (cond) { | |||
3230 | case eq: | |||
3231 | vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); | |||
3232 | break; | |||
3233 | case neq: | |||
3234 | vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); | |||
3235 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); | |||
3236 | break; | |||
3237 | case le: | |||
3238 | vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); | |||
3239 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); | |||
3240 | break; | |||
3241 | case nlt: | |||
3242 | vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); | |||
3243 | vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); | |||
3244 | break; | |||
3245 | case lt: | |||
3246 | vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); | |||
3247 | break; | |||
3248 | case nle: | |||
3249 | vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); | |||
3250 | break; | |||
3251 | default: | |||
3252 | assert(false, "Should not reach here")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3252, "assert(" "false" ") failed", "Should not reach here" ); ::breakpoint(); } } while (0); | |||
3253 | } | |||
3254 | } | |||
3255 | ||||
3256 | void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { | |||
3257 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3257, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3258 | Assembler::vpmovzxbw(dst, src, vector_len); | |||
3259 | } | |||
3260 | ||||
3261 | void MacroAssembler::vpmovmskb(Register dst, XMMRegister src, int vector_len) { | |||
3262 | assert((src->encoding() < 16),"XMM register should be 0-15")do { if (!((src->encoding() < 16))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3262, "assert(" "(src->encoding() < 16)" ") failed", "XMM register should be 0-15" ); ::breakpoint(); } } while (0); | |||
3263 | Assembler::vpmovmskb(dst, src, vector_len); | |||
3264 | } | |||
3265 | ||||
3266 | void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3267 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3267, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3268 | Assembler::vpmullw(dst, nds, src, vector_len); | |||
3269 | } | |||
3270 | ||||
3271 | void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
3272 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3272, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3273 | Assembler::vpmullw(dst, nds, src, vector_len); | |||
3274 | } | |||
3275 | ||||
3276 | void MacroAssembler::vpmulld(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3277 | assert((UseAVX > 0), "AVX support is needed")do { if (!((UseAVX > 0))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3277, "assert(" "(UseAVX > 0)" ") failed", "AVX support is needed" ); ::breakpoint(); } } while (0); | |||
3278 | if (reachable(src)) { | |||
3279 | Assembler::vpmulld(dst, nds, as_Address(src), vector_len); | |||
3280 | } else { | |||
3281 | lea(scratch_reg, src); | |||
3282 | Assembler::vpmulld(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3283 | } | |||
3284 | } | |||
3285 | ||||
3286 | void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3287 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3287, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3288 | Assembler::vpsubb(dst, nds, src, vector_len); | |||
3289 | } | |||
3290 | ||||
3291 | void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
3292 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3292, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3293 | Assembler::vpsubb(dst, nds, src, vector_len); | |||
3294 | } | |||
3295 | ||||
3296 | void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { | |||
3297 | assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3297, "assert(" "((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3298 | Assembler::vpsubw(dst, nds, src, vector_len); | |||
3299 | } | |||
3300 | ||||
3301 | void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { | |||
3302 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3302, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3303 | Assembler::vpsubw(dst, nds, src, vector_len); | |||
3304 | } | |||
3305 | ||||
3306 | void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { | |||
3307 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3307, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3308 | Assembler::vpsraw(dst, nds, shift, vector_len); | |||
3309 | } | |||
3310 | ||||
3311 | void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { | |||
3312 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3312, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3313 | Assembler::vpsraw(dst, nds, shift, vector_len); | |||
3314 | } | |||
3315 | ||||
3316 | void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { | |||
3317 | assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3317, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint (); } } while (0); | |||
3318 | if (!VM_Version::supports_avx512vl() && vector_len < 2) { | |||
3319 | vector_len = 2; | |||
3320 | } | |||
3321 | Assembler::evpsraq(dst, nds, shift, vector_len); | |||
3322 | } | |||
3323 | ||||
3324 | void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { | |||
3325 | assert(UseAVX > 2,"")do { if (!(UseAVX > 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3325, "assert(" "UseAVX > 2" ") failed", ""); ::breakpoint (); } } while (0); | |||
3326 | if (!VM_Version::supports_avx512vl() && vector_len < 2) { | |||
3327 | vector_len = 2; | |||
3328 | } | |||
3329 | Assembler::evpsraq(dst, nds, shift, vector_len); | |||
3330 | } | |||
3331 | ||||
3332 | void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { | |||
3333 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3333, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3334 | Assembler::vpsrlw(dst, nds, shift, vector_len); | |||
3335 | } | |||
3336 | ||||
3337 | void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { | |||
3338 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3338, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3339 | Assembler::vpsrlw(dst, nds, shift, vector_len); | |||
3340 | } | |||
3341 | ||||
3342 | void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) { | |||
3343 | assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && shift-> encoding() < 16 && nds->encoding() < 16) || VM_Version ::supports_avx512vlbw()))) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3343, "assert(" "((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3344 | Assembler::vpsllw(dst, nds, shift, vector_len); | |||
3345 | } | |||
3346 | ||||
3347 | void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) { | |||
3348 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3348, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3349 | Assembler::vpsllw(dst, nds, shift, vector_len); | |||
3350 | } | |||
3351 | ||||
3352 | void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) { | |||
3353 | assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15")do { if (!((dst->encoding() < 16 && src->encoding () < 16))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3353, "assert(" "(dst->encoding() < 16 && src->encoding() < 16)" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3354 | Assembler::vptest(dst, src); | |||
3355 | } | |||
3356 | ||||
3357 | void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) { | |||
3358 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3358, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3359 | Assembler::punpcklbw(dst, src); | |||
3360 | } | |||
3361 | ||||
3362 | void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) { | |||
3363 | assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16) || VM_Version::supports_avx512vl ()))) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3363, "assert(" "((dst->encoding() < 16) || VM_Version::supports_avx512vl())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3364 | Assembler::pshufd(dst, src, mode); | |||
3365 | } | |||
3366 | ||||
3367 | void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { | |||
3368 | assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && src->encoding () < 16) || VM_Version::supports_avx512vlbw()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3368, "assert(" "((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3369 | Assembler::pshuflw(dst, src, mode); | |||
3370 | } | |||
3371 | ||||
3372 | void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3373 | if (reachable(src)) { | |||
3374 | vandpd(dst, nds, as_Address(src), vector_len); | |||
3375 | } else { | |||
3376 | lea(scratch_reg, src); | |||
3377 | vandpd(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3378 | } | |||
3379 | } | |||
3380 | ||||
3381 | void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3382 | if (reachable(src)) { | |||
3383 | vandps(dst, nds, as_Address(src), vector_len); | |||
3384 | } else { | |||
3385 | lea(scratch_reg, src); | |||
3386 | vandps(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3387 | } | |||
3388 | } | |||
3389 | ||||
3390 | void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, | |||
3391 | bool merge, int vector_len, Register scratch_reg) { | |||
3392 | if (reachable(src)) { | |||
3393 | Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); | |||
3394 | } else { | |||
3395 | lea(scratch_reg, src); | |||
3396 | Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); | |||
3397 | } | |||
3398 | } | |||
3399 | ||||
3400 | void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3401 | if (reachable(src)) { | |||
3402 | vdivsd(dst, nds, as_Address(src)); | |||
3403 | } else { | |||
3404 | lea(rscratch1, src); | |||
3405 | vdivsd(dst, nds, Address(rscratch1, 0)); | |||
3406 | } | |||
3407 | } | |||
3408 | ||||
3409 | void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3410 | if (reachable(src)) { | |||
3411 | vdivss(dst, nds, as_Address(src)); | |||
3412 | } else { | |||
3413 | lea(rscratch1, src); | |||
3414 | vdivss(dst, nds, Address(rscratch1, 0)); | |||
3415 | } | |||
3416 | } | |||
3417 | ||||
3418 | void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3419 | if (reachable(src)) { | |||
3420 | vmulsd(dst, nds, as_Address(src)); | |||
3421 | } else { | |||
3422 | lea(rscratch1, src); | |||
3423 | vmulsd(dst, nds, Address(rscratch1, 0)); | |||
3424 | } | |||
3425 | } | |||
3426 | ||||
3427 | void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3428 | if (reachable(src)) { | |||
3429 | vmulss(dst, nds, as_Address(src)); | |||
3430 | } else { | |||
3431 | lea(rscratch1, src); | |||
3432 | vmulss(dst, nds, Address(rscratch1, 0)); | |||
3433 | } | |||
3434 | } | |||
3435 | ||||
3436 | void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3437 | if (reachable(src)) { | |||
3438 | vsubsd(dst, nds, as_Address(src)); | |||
3439 | } else { | |||
3440 | lea(rscratch1, src); | |||
3441 | vsubsd(dst, nds, Address(rscratch1, 0)); | |||
3442 | } | |||
3443 | } | |||
3444 | ||||
3445 | void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3446 | if (reachable(src)) { | |||
3447 | vsubss(dst, nds, as_Address(src)); | |||
3448 | } else { | |||
3449 | lea(rscratch1, src); | |||
3450 | vsubss(dst, nds, Address(rscratch1, 0)); | |||
3451 | } | |||
3452 | } | |||
3453 | ||||
3454 | void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3455 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3455, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3456 | vxorps(dst, nds, src, Assembler::AVX_128bit); | |||
3457 | } | |||
3458 | ||||
3459 | void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { | |||
3460 | assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15")do { if (!(((dst->encoding() < 16 && nds->encoding () < 16) || VM_Version::supports_avx512vldq()))) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3460, "assert(" "((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq())" ") failed", "XMM register should be 0-15"); ::breakpoint(); } } while (0); | |||
3461 | vxorpd(dst, nds, src, Assembler::AVX_128bit); | |||
3462 | } | |||
3463 | ||||
3464 | void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3465 | if (reachable(src)) { | |||
3466 | vxorpd(dst, nds, as_Address(src), vector_len); | |||
3467 | } else { | |||
3468 | lea(scratch_reg, src); | |||
3469 | vxorpd(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3470 | } | |||
3471 | } | |||
3472 | ||||
3473 | void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3474 | if (reachable(src)) { | |||
3475 | vxorps(dst, nds, as_Address(src), vector_len); | |||
3476 | } else { | |||
3477 | lea(scratch_reg, src); | |||
3478 | vxorps(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3479 | } | |||
3480 | } | |||
3481 | ||||
3482 | void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3483 | if (UseAVX > 1 || (vector_len < 1)) { | |||
3484 | if (reachable(src)) { | |||
3485 | Assembler::vpxor(dst, nds, as_Address(src), vector_len); | |||
3486 | } else { | |||
3487 | lea(scratch_reg, src); | |||
3488 | Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3489 | } | |||
3490 | } | |||
3491 | else { | |||
3492 | MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg); | |||
3493 | } | |||
3494 | } | |||
3495 | ||||
3496 | void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { | |||
3497 | if (reachable(src)) { | |||
3498 | Assembler::vpermd(dst, nds, as_Address(src), vector_len); | |||
3499 | } else { | |||
3500 | lea(scratch_reg, src); | |||
3501 | Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); | |||
3502 | } | |||
3503 | } | |||
3504 | ||||
3505 | void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { | |||
3506 | const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask); | |||
3507 | STATIC_ASSERT(inverted_jweak_mask == -2)static_assert((inverted_jweak_mask == -2), "inverted_jweak_mask == -2" ); // otherwise check this code | |||
3508 | // The inverted mask is sign-extended | |||
3509 | andptr(possibly_jweak, inverted_jweak_mask); | |||
3510 | } | |||
3511 | ||||
3512 | void MacroAssembler::resolve_jobject(Register value, | |||
3513 | Register thread, | |||
3514 | Register tmp) { | |||
3515 | assert_different_registers(value, thread, tmp); | |||
3516 | Label done, not_weak; | |||
3517 | testptr(value, value); | |||
3518 | jcc(Assembler::zero, done); // Use NULL as-is. | |||
3519 | testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag. | |||
3520 | jcc(Assembler::zero, not_weak); | |||
3521 | // Resolve jweak. | |||
3522 | access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, | |||
3523 | value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); | |||
3524 | verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3524); | |||
3525 | jmp(done); | |||
3526 | bind(not_weak); | |||
3527 | // Resolve (untagged) jobject. | |||
3528 | access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); | |||
3529 | verify_oop(value)_verify_oop_checked(value, "broken oop " "value", "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3529); | |||
3530 | bind(done); | |||
3531 | } | |||
3532 | ||||
3533 | void MacroAssembler::subptr(Register dst, int32_t imm32) { | |||
3534 | LP64_ONLY(subq(dst, imm32))subq(dst, imm32) NOT_LP64(subl(dst, imm32)); | |||
3535 | } | |||
3536 | ||||
3537 | // Force generation of a 4 byte immediate value even if it fits into 8bit | |||
3538 | void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { | |||
3539 | LP64_ONLY(subq_imm32(dst, imm32))subq_imm32(dst, imm32) NOT_LP64(subl_imm32(dst, imm32)); | |||
3540 | } | |||
3541 | ||||
3542 | void MacroAssembler::subptr(Register dst, Register src) { | |||
3543 | LP64_ONLY(subq(dst, src))subq(dst, src) NOT_LP64(subl(dst, src)); | |||
3544 | } | |||
3545 | ||||
3546 | // C++ bool manipulation | |||
3547 | void MacroAssembler::testbool(Register dst) { | |||
3548 | if(sizeof(bool) == 1) | |||
3549 | testb(dst, 0xff); | |||
3550 | else if(sizeof(bool) == 2) { | |||
3551 | // testw implementation needed for two byte bools | |||
3552 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3552); ::breakpoint(); } while (0); | |||
3553 | } else if(sizeof(bool) == 4) | |||
3554 | testl(dst, dst); | |||
3555 | else | |||
3556 | // unsupported | |||
3557 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3557); ::breakpoint(); } while (0); | |||
3558 | } | |||
3559 | ||||
3560 | void MacroAssembler::testptr(Register dst, Register src) { | |||
3561 | LP64_ONLY(testq(dst, src))testq(dst, src) NOT_LP64(testl(dst, src)); | |||
3562 | } | |||
3563 | ||||
3564 | // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. | |||
3565 | void MacroAssembler::tlab_allocate(Register thread, Register obj, | |||
3566 | Register var_size_in_bytes, | |||
3567 | int con_size_in_bytes, | |||
3568 | Register t1, | |||
3569 | Register t2, | |||
3570 | Label& slow_case) { | |||
3571 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
3572 | bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); | |||
3573 | } | |||
3574 | ||||
3575 | // Defines obj, preserves var_size_in_bytes | |||
3576 | void MacroAssembler::eden_allocate(Register thread, Register obj, | |||
3577 | Register var_size_in_bytes, | |||
3578 | int con_size_in_bytes, | |||
3579 | Register t1, | |||
3580 | Label& slow_case) { | |||
3581 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
3582 | bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); | |||
3583 | } | |||
3584 | ||||
3585 | // Preserves the contents of address, destroys the contents length_in_bytes and temp. | |||
3586 | void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) { | |||
3587 | assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different")do { if (!(address != length_in_bytes && address != temp && temp != length_in_bytes)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3587, "assert(" "address != length_in_bytes && address != temp && temp != length_in_bytes" ") failed", "registers must be different"); ::breakpoint(); } } while (0); | |||
3588 | assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord")do { if (!((offset_in_bytes & (BytesPerWord - 1)) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3588, "assert(" "(offset_in_bytes & (BytesPerWord - 1)) == 0" ") failed", "offset must be a multiple of BytesPerWord"); :: breakpoint(); } } while (0); | |||
3589 | Label done; | |||
3590 | ||||
3591 | testptr(length_in_bytes, length_in_bytes); | |||
3592 | jcc(Assembler::zero, done); | |||
3593 | ||||
3594 | // initialize topmost word, divide index by 2, check if odd and test if zero | |||
3595 | // note: for the remaining code to work, index must be a multiple of BytesPerWord | |||
3596 | #ifdef ASSERT1 | |||
3597 | { | |||
3598 | Label L; | |||
3599 | testptr(length_in_bytes, BytesPerWord - 1); | |||
3600 | jcc(Assembler::zero, L); | |||
3601 | stop("length must be a multiple of BytesPerWord"); | |||
3602 | bind(L); | |||
3603 | } | |||
3604 | #endif | |||
3605 | Register index = length_in_bytes; | |||
3606 | xorptr(temp, temp); // use _zero reg to clear memory (shorter code) | |||
3607 | if (UseIncDec) { | |||
3608 | shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set | |||
3609 | } else { | |||
3610 | shrptr(index, 2); // use 2 instructions to avoid partial flag stall | |||
3611 | shrptr(index, 1); | |||
3612 | } | |||
3613 | #ifndef _LP641 | |||
3614 | // index could have not been a multiple of 8 (i.e., bit 2 was set) | |||
3615 | { | |||
3616 | Label even; | |||
3617 | // note: if index was a multiple of 8, then it cannot | |||
3618 | // be 0 now otherwise it must have been 0 before | |||
3619 | // => if it is even, we don't need to check for 0 again | |||
3620 | jcc(Assembler::carryClear, even); | |||
3621 | // clear topmost word (no jump would be needed if conditional assignment worked here) | |||
3622 | movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp); | |||
3623 | // index could be 0 now, must check again | |||
3624 | jcc(Assembler::zero, done); | |||
3625 | bind(even); | |||
3626 | } | |||
3627 | #endif // !_LP64 | |||
3628 | // initialize remaining object fields: index is a multiple of 2 now | |||
3629 | { | |||
3630 | Label loop; | |||
3631 | bind(loop); | |||
3632 | movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp); | |||
3633 | NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);) | |||
3634 | decrement(index); | |||
3635 | jcc(Assembler::notZero, loop); | |||
3636 | } | |||
3637 | ||||
3638 | bind(done); | |||
3639 | } | |||
3640 | ||||
3641 | // Look up the method for a megamorphic invokeinterface call. | |||
3642 | // The target method is determined by <intf_klass, itable_index>. | |||
3643 | // The receiver klass is in recv_klass. | |||
3644 | // On success, the result will be in method_result, and execution falls through. | |||
3645 | // On failure, execution transfers to the given label. | |||
3646 | void MacroAssembler::lookup_interface_method(Register recv_klass, | |||
3647 | Register intf_klass, | |||
3648 | RegisterOrConstant itable_index, | |||
3649 | Register method_result, | |||
3650 | Register scan_temp, | |||
3651 | Label& L_no_such_interface, | |||
3652 | bool return_method) { | |||
3653 | assert_different_registers(recv_klass, intf_klass, scan_temp); | |||
3654 | assert_different_registers(method_result, intf_klass, scan_temp); | |||
3655 | assert(recv_klass != method_result || !return_method,do { if (!(recv_klass != method_result || !return_method)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3656, "assert(" "recv_klass != method_result || !return_method" ") failed", "recv_klass can be destroyed when method isn't needed" ); ::breakpoint(); } } while (0) | |||
3656 | "recv_klass can be destroyed when method isn't needed")do { if (!(recv_klass != method_result || !return_method)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3656, "assert(" "recv_klass != method_result || !return_method" ") failed", "recv_klass can be destroyed when method isn't needed" ); ::breakpoint(); } } while (0); | |||
3657 | ||||
3658 | assert(itable_index.is_constant() || itable_index.as_register() == method_result,do { if (!(itable_index.is_constant() || itable_index.as_register () == method_result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result" ") failed", "caller must use same register for non-constant itable index as for method" ); ::breakpoint(); } } while (0) | |||
3659 | "caller must use same register for non-constant itable index as for method")do { if (!(itable_index.is_constant() || itable_index.as_register () == method_result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3659, "assert(" "itable_index.is_constant() || itable_index.as_register() == method_result" ") failed", "caller must use same register for non-constant itable index as for method" ); ::breakpoint(); } } while (0); | |||
3660 | ||||
3661 | // Compute start of first itableOffsetEntry (which is at the end of the vtable) | |||
3662 | int vtable_base = in_bytes(Klass::vtable_start_offset()); | |||
3663 | int itentry_off = itableMethodEntry::method_offset_in_bytes(); | |||
3664 | int scan_step = itableOffsetEntry::size() * wordSize; | |||
3665 | int vte_size = vtableEntry::size_in_bytes(); | |||
3666 | Address::ScaleFactor times_vte_scale = Address::times_ptr; | |||
3667 | assert(vte_size == wordSize, "else adjust times_vte_scale")do { if (!(vte_size == wordSize)) { (*g_assert_poison) = 'X'; ; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3667, "assert(" "vte_size == wordSize" ") failed", "else adjust times_vte_scale" ); ::breakpoint(); } } while (0); | |||
3668 | ||||
3669 | movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); | |||
3670 | ||||
3671 | // %%% Could store the aligned, prescaled offset in the klassoop. | |||
3672 | lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); | |||
3673 | ||||
3674 | if (return_method) { | |||
3675 | // Adjust recv_klass by scaled itable_index, so we can free itable_index. | |||
3676 | assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below")do { if (!(itableMethodEntry::size() * wordSize == wordSize)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3676, "assert(" "itableMethodEntry::size() * wordSize == wordSize" ") failed", "adjust the scaling in the code below"); ::breakpoint (); } } while (0); | |||
3677 | lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); | |||
3678 | } | |||
3679 | ||||
3680 | // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { | |||
3681 | // if (scan->interface() == intf) { | |||
3682 | // result = (klass + scan->offset() + itable_index); | |||
3683 | // } | |||
3684 | // } | |||
3685 | Label search, found_method; | |||
3686 | ||||
3687 | for (int peel = 1; peel >= 0; peel--) { | |||
3688 | movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); | |||
3689 | cmpptr(intf_klass, method_result); | |||
3690 | ||||
3691 | if (peel) { | |||
3692 | jccb(Assembler::equal, found_method)jccb_0(Assembler::equal, found_method, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3692); | |||
3693 | } else { | |||
3694 | jccb(Assembler::notEqual, search)jccb_0(Assembler::notEqual, search, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3694); | |||
3695 | // (invert the test to fall through to found_method...) | |||
3696 | } | |||
3697 | ||||
3698 | if (!peel) break; | |||
3699 | ||||
3700 | bind(search); | |||
3701 | ||||
3702 | // Check that the previous entry is non-null. A null entry means that | |||
3703 | // the receiver class doesn't implement the interface, and wasn't the | |||
3704 | // same as when the caller was compiled. | |||
3705 | testptr(method_result, method_result); | |||
3706 | jcc(Assembler::zero, L_no_such_interface); | |||
3707 | addptr(scan_temp, scan_step); | |||
3708 | } | |||
3709 | ||||
3710 | bind(found_method); | |||
3711 | ||||
3712 | if (return_method) { | |||
3713 | // Got a hit. | |||
3714 | movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); | |||
3715 | movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); | |||
3716 | } | |||
3717 | } | |||
3718 | ||||
3719 | ||||
3720 | // virtual method calling | |||
3721 | void MacroAssembler::lookup_virtual_method(Register recv_klass, | |||
3722 | RegisterOrConstant vtable_index, | |||
3723 | Register method_result) { | |||
3724 | const int base = in_bytes(Klass::vtable_start_offset()); | |||
3725 | assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below")do { if (!(vtableEntry::size() * wordSize == wordSize)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3725, "assert(" "vtableEntry::size() * wordSize == wordSize" ") failed", "else adjust the scaling in the code below"); :: breakpoint(); } } while (0); | |||
3726 | Address vtable_entry_addr(recv_klass, | |||
3727 | vtable_index, Address::times_ptr, | |||
3728 | base + vtableEntry::method_offset_in_bytes()); | |||
3729 | movptr(method_result, vtable_entry_addr); | |||
3730 | } | |||
3731 | ||||
3732 | ||||
3733 | void MacroAssembler::check_klass_subtype(Register sub_klass, | |||
3734 | Register super_klass, | |||
3735 | Register temp_reg, | |||
3736 | Label& L_success) { | |||
3737 | Label L_failure; | |||
3738 | check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL__null); | |||
3739 | check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL__null); | |||
3740 | bind(L_failure); | |||
3741 | } | |||
3742 | ||||
3743 | ||||
3744 | void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, | |||
3745 | Register super_klass, | |||
3746 | Register temp_reg, | |||
3747 | Label* L_success, | |||
3748 | Label* L_failure, | |||
3749 | Label* L_slow_path, | |||
3750 | RegisterOrConstant super_check_offset) { | |||
3751 | assert_different_registers(sub_klass, super_klass, temp_reg); | |||
3752 | bool must_load_sco = (super_check_offset.constant_or_zero() == -1); | |||
3753 | if (super_check_offset.is_register()) { | |||
3754 | assert_different_registers(sub_klass, super_klass, | |||
3755 | super_check_offset.as_register()); | |||
3756 | } else if (must_load_sco) { | |||
3757 | assert(temp_reg != noreg, "supply either a temp or a register offset")do { if (!(temp_reg != noreg)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3757, "assert(" "temp_reg != noreg" ") failed", "supply either a temp or a register offset" ); ::breakpoint(); } } while (0); | |||
3758 | } | |||
3759 | ||||
3760 | Label L_fallthrough; | |||
3761 | int label_nulls = 0; | |||
3762 | if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; } | |||
3763 | if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; } | |||
3764 | if (L_slow_path == NULL__null) { L_slow_path = &L_fallthrough; label_nulls++; } | |||
3765 | assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3765, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch" ); ::breakpoint(); } } while (0); | |||
3766 | ||||
3767 | int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |||
3768 | int sco_offset = in_bytes(Klass::super_check_offset_offset()); | |||
3769 | Address super_check_offset_addr(super_klass, sco_offset); | |||
3770 | ||||
3771 | // Hacked jcc, which "knows" that L_fallthrough, at least, is in | |||
3772 | // range of a jccb. If this routine grows larger, reconsider at | |||
3773 | // least some of these. | |||
3774 | #define local_jcc(assembler_cond, label) \ | |||
3775 | if (&(label) == &L_fallthrough) jccb(assembler_cond, label)jccb_0(assembler_cond, label, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3775); \ | |||
3776 | else jcc( assembler_cond, label) /*omit semi*/ | |||
3777 | ||||
3778 | // Hacked jmp, which may only be used just before L_fallthrough. | |||
3779 | #define final_jmp(label) \ | |||
3780 | if (&(label) == &L_fallthrough) { /*do nothing*/ } \ | |||
3781 | else jmp(label) /*omit semi*/ | |||
3782 | ||||
3783 | // If the pointers are equal, we are done (e.g., String[] elements). | |||
3784 | // This self-check enables sharing of secondary supertype arrays among | |||
3785 | // non-primary types such as array-of-interface. Otherwise, each such | |||
3786 | // type would need its own customized SSA. | |||
3787 | // We move this check to the front of the fast path because many | |||
3788 | // type checks are in fact trivially successful in this manner, | |||
3789 | // so we get a nicely predicted branch right at the start of the check. | |||
3790 | cmpptr(sub_klass, super_klass); | |||
3791 | local_jcc(Assembler::equal, *L_success); | |||
3792 | ||||
3793 | // Check the supertype display: | |||
3794 | if (must_load_sco) { | |||
3795 | // Positive movl does right thing on LP64. | |||
3796 | movl(temp_reg, super_check_offset_addr); | |||
3797 | super_check_offset = RegisterOrConstant(temp_reg); | |||
3798 | } | |||
3799 | Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); | |||
3800 | cmpptr(super_klass, super_check_addr); // load displayed supertype | |||
3801 | ||||
3802 | // This check has worked decisively for primary supers. | |||
3803 | // Secondary supers are sought in the super_cache ('super_cache_addr'). | |||
3804 | // (Secondary supers are interfaces and very deeply nested subtypes.) | |||
3805 | // This works in the same check above because of a tricky aliasing | |||
3806 | // between the super_cache and the primary super display elements. | |||
3807 | // (The 'super_check_addr' can address either, as the case requires.) | |||
3808 | // Note that the cache is updated below if it does not help us find | |||
3809 | // what we need immediately. | |||
3810 | // So if it was a primary super, we can just fail immediately. | |||
3811 | // Otherwise, it's the slow path for us (no success at this point). | |||
3812 | ||||
3813 | if (super_check_offset.is_register()) { | |||
3814 | local_jcc(Assembler::equal, *L_success); | |||
3815 | cmpl(super_check_offset.as_register(), sc_offset); | |||
3816 | if (L_failure == &L_fallthrough) { | |||
3817 | local_jcc(Assembler::equal, *L_slow_path); | |||
3818 | } else { | |||
3819 | local_jcc(Assembler::notEqual, *L_failure); | |||
3820 | final_jmp(*L_slow_path); | |||
3821 | } | |||
3822 | } else if (super_check_offset.as_constant() == sc_offset) { | |||
3823 | // Need a slow path; fast failure is impossible. | |||
3824 | if (L_slow_path == &L_fallthrough) { | |||
3825 | local_jcc(Assembler::equal, *L_success); | |||
3826 | } else { | |||
3827 | local_jcc(Assembler::notEqual, *L_slow_path); | |||
3828 | final_jmp(*L_success); | |||
3829 | } | |||
3830 | } else { | |||
3831 | // No slow path; it's a fast decision. | |||
3832 | if (L_failure == &L_fallthrough) { | |||
3833 | local_jcc(Assembler::equal, *L_success); | |||
3834 | } else { | |||
3835 | local_jcc(Assembler::notEqual, *L_failure); | |||
3836 | final_jmp(*L_success); | |||
3837 | } | |||
3838 | } | |||
3839 | ||||
3840 | bind(L_fallthrough); | |||
3841 | ||||
3842 | #undef local_jcc | |||
3843 | #undef final_jmp | |||
3844 | } | |||
3845 | ||||
3846 | ||||
3847 | void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, | |||
3848 | Register super_klass, | |||
3849 | Register temp_reg, | |||
3850 | Register temp2_reg, | |||
3851 | Label* L_success, | |||
3852 | Label* L_failure, | |||
3853 | bool set_cond_codes) { | |||
3854 | assert_different_registers(sub_klass, super_klass, temp_reg); | |||
3855 | if (temp2_reg != noreg) | |||
3856 | assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); | |||
3857 | #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) | |||
3858 | ||||
3859 | Label L_fallthrough; | |||
3860 | int label_nulls = 0; | |||
3861 | if (L_success == NULL__null) { L_success = &L_fallthrough; label_nulls++; } | |||
3862 | if (L_failure == NULL__null) { L_failure = &L_fallthrough; label_nulls++; } | |||
3863 | assert(label_nulls <= 1, "at most one NULL in the batch")do { if (!(label_nulls <= 1)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3863, "assert(" "label_nulls <= 1" ") failed", "at most one NULL in the batch" ); ::breakpoint(); } } while (0); | |||
3864 | ||||
3865 | // a couple of useful fields in sub_klass: | |||
3866 | int ss_offset = in_bytes(Klass::secondary_supers_offset()); | |||
3867 | int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); | |||
3868 | Address secondary_supers_addr(sub_klass, ss_offset); | |||
3869 | Address super_cache_addr( sub_klass, sc_offset); | |||
3870 | ||||
3871 | // Do a linear scan of the secondary super-klass chain. | |||
3872 | // This code is rarely used, so simplicity is a virtue here. | |||
3873 | // The repne_scan instruction uses fixed registers, which we must spill. | |||
3874 | // Don't worry too much about pre-existing connections with the input regs. | |||
3875 | ||||
3876 | assert(sub_klass != rax, "killed reg")do { if (!(sub_klass != rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3876, "assert(" "sub_klass != rax" ") failed", "killed reg" ); ::breakpoint(); } } while (0); // killed by mov(rax, super) | |||
3877 | assert(sub_klass != rcx, "killed reg")do { if (!(sub_klass != rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3877, "assert(" "sub_klass != rcx" ") failed", "killed reg" ); ::breakpoint(); } } while (0); // killed by lea(rcx, &pst_counter) | |||
3878 | ||||
3879 | // Get super_klass value into rax (even if it was in rdi or rcx). | |||
3880 | bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; | |||
3881 | if (super_klass != rax || UseCompressedOops) { | |||
3882 | if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } | |||
3883 | mov(rax, super_klass); | |||
3884 | } | |||
3885 | if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } | |||
3886 | if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } | |||
3887 | ||||
3888 | #ifndef PRODUCT | |||
3889 | int* pst_counter = &SharedRuntime::_partial_subtype_ctr; | |||
3890 | ExternalAddress pst_counter_addr((address) pst_counter); | |||
3891 | NOT_LP64( incrementl(pst_counter_addr) ); | |||
3892 | LP64_ONLY( lea(rcx, pst_counter_addr) )lea(rcx, pst_counter_addr); | |||
3893 | LP64_ONLY( incrementl(Address(rcx, 0)) )incrementl(Address(rcx, 0)); | |||
3894 | #endif //PRODUCT | |||
3895 | ||||
3896 | // We will consult the secondary-super array. | |||
3897 | movptr(rdi, secondary_supers_addr); | |||
3898 | // Load the array length. (Positive movl does right thing on LP64.) | |||
3899 | movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); | |||
3900 | // Skip to start of data. | |||
3901 | addptr(rdi, Array<Klass*>::base_offset_in_bytes()); | |||
3902 | ||||
3903 | // Scan RCX words at [RDI] for an occurrence of RAX. | |||
3904 | // Set NZ/Z based on last compare. | |||
3905 | // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does | |||
3906 | // not change flags (only scas instruction which is repeated sets flags). | |||
3907 | // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. | |||
3908 | ||||
3909 | testptr(rax,rax); // Set Z = 0 | |||
3910 | repne_scan(); | |||
3911 | ||||
3912 | // Unspill the temp. registers: | |||
3913 | if (pushed_rdi) pop(rdi); | |||
3914 | if (pushed_rcx) pop(rcx); | |||
3915 | if (pushed_rax) pop(rax); | |||
3916 | ||||
3917 | if (set_cond_codes) { | |||
3918 | // Special hack for the AD files: rdi is guaranteed non-zero. | |||
3919 | assert(!pushed_rdi, "rdi must be left non-NULL")do { if (!(!pushed_rdi)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3919, "assert(" "!pushed_rdi" ") failed", "rdi must be left non-NULL" ); ::breakpoint(); } } while (0); | |||
3920 | // Also, the condition codes are properly set Z/NZ on succeed/failure. | |||
3921 | } | |||
3922 | ||||
3923 | if (L_failure == &L_fallthrough) | |||
3924 | jccb(Assembler::notEqual, *L_failure)jccb_0(Assembler::notEqual, *L_failure, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3924); | |||
3925 | else jcc(Assembler::notEqual, *L_failure); | |||
3926 | ||||
3927 | // Success. Cache the super we found and proceed in triumph. | |||
3928 | movptr(super_cache_addr, super_klass); | |||
3929 | ||||
3930 | if (L_success != &L_fallthrough) { | |||
3931 | jmp(*L_success); | |||
3932 | } | |||
3933 | ||||
3934 | #undef IS_A_TEMP | |||
3935 | ||||
3936 | bind(L_fallthrough); | |||
3937 | } | |||
3938 | ||||
3939 | void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { | |||
3940 | assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required")do { if (!(L_fast_path != __null || L_slow_path != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3940, "assert(" "L_fast_path != __null || L_slow_path != __null" ") failed", "at least one is required"); ::breakpoint(); } } while (0); | |||
3941 | ||||
3942 | Label L_fallthrough; | |||
3943 | if (L_fast_path == NULL__null) { | |||
3944 | L_fast_path = &L_fallthrough; | |||
3945 | } else if (L_slow_path == NULL__null) { | |||
3946 | L_slow_path = &L_fallthrough; | |||
3947 | } | |||
3948 | ||||
3949 | // Fast path check: class is fully initialized | |||
3950 | cmpb(Address(klass, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); | |||
3951 | jcc(Assembler::equal, *L_fast_path); | |||
3952 | ||||
3953 | // Fast path check: current thread is initializer thread | |||
3954 | cmpptr(thread, Address(klass, InstanceKlass::init_thread_offset())); | |||
3955 | if (L_slow_path == &L_fallthrough) { | |||
3956 | jcc(Assembler::equal, *L_fast_path); | |||
3957 | bind(*L_slow_path); | |||
3958 | } else if (L_fast_path == &L_fallthrough) { | |||
3959 | jcc(Assembler::notEqual, *L_slow_path); | |||
3960 | bind(*L_fast_path); | |||
3961 | } else { | |||
3962 | Unimplemented()do { (*g_assert_poison) = 'X';; report_unimplemented("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3962); ::breakpoint(); } while (0); | |||
3963 | } | |||
3964 | } | |||
3965 | ||||
3966 | void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { | |||
3967 | if (VM_Version::supports_cmov()) { | |||
3968 | cmovl(cc, dst, src); | |||
3969 | } else { | |||
3970 | Label L; | |||
3971 | jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3971); | |||
3972 | movl(dst, src); | |||
3973 | bind(L); | |||
3974 | } | |||
3975 | } | |||
3976 | ||||
3977 | void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { | |||
3978 | if (VM_Version::supports_cmov()) { | |||
3979 | cmovl(cc, dst, src); | |||
3980 | } else { | |||
3981 | Label L; | |||
3982 | jccb(negate_condition(cc), L)jccb_0(negate_condition(cc), L, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 3982); | |||
3983 | movl(dst, src); | |||
3984 | bind(L); | |||
3985 | } | |||
3986 | } | |||
3987 | ||||
3988 | void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { | |||
3989 | if (!VerifyOops) return; | |||
3990 | ||||
3991 | // Pass register number to verify_oop_subroutine | |||
3992 | const char* b = NULL__null; | |||
3993 | { | |||
3994 | ResourceMark rm; | |||
3995 | stringStream ss; | |||
3996 | ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line); | |||
3997 | b = code_string(ss.as_string()); | |||
3998 | } | |||
3999 | BLOCK_COMMENT("verify_oop {"); | |||
4000 | #ifdef _LP641 | |||
4001 | push(rscratch1); // save r10, trashed by movptr() | |||
4002 | #endif | |||
4003 | push(rax); // save rax, | |||
4004 | push(reg); // pass register argument | |||
4005 | ExternalAddress buffer((address) b); | |||
4006 | // avoid using pushptr, as it modifies scratch registers | |||
4007 | // and our contract is not to modify anything | |||
4008 | movptr(rax, buffer.addr()); | |||
4009 | push(rax); | |||
4010 | // call indirectly to solve generation ordering problem | |||
4011 | movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |||
4012 | call(rax); | |||
4013 | // Caller pops the arguments (oop, message) and restores rax, r10 | |||
4014 | BLOCK_COMMENT("} verify_oop"); | |||
4015 | } | |||
4016 | ||||
4017 | void MacroAssembler::vallones(XMMRegister dst, int vector_len) { | |||
4018 | if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { | |||
4019 | vpternlogd(dst, 0xFF, dst, dst, vector_len); | |||
4020 | } else { | |||
4021 | assert(UseAVX > 0, "")do { if (!(UseAVX > 0)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4021, "assert(" "UseAVX > 0" ") failed", ""); ::breakpoint (); } } while (0); | |||
4022 | vpcmpeqb(dst, dst, dst, vector_len); | |||
4023 | } | |||
4024 | } | |||
4025 | ||||
4026 | Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, | |||
4027 | int extra_slot_offset) { | |||
4028 | // cf. TemplateTable::prepare_invoke(), if (load_receiver). | |||
4029 | int stackElementSize = Interpreter::stackElementSize; | |||
4030 | int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); | |||
4031 | #ifdef ASSERT1 | |||
4032 | int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); | |||
4033 | assert(offset1 - offset == stackElementSize, "correct arithmetic")do { if (!(offset1 - offset == stackElementSize)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4033, "assert(" "offset1 - offset == stackElementSize" ") failed" , "correct arithmetic"); ::breakpoint(); } } while (0); | |||
4034 | #endif | |||
4035 | Register scale_reg = noreg; | |||
4036 | Address::ScaleFactor scale_factor = Address::no_scale; | |||
4037 | if (arg_slot.is_constant()) { | |||
4038 | offset += arg_slot.as_constant() * stackElementSize; | |||
4039 | } else { | |||
4040 | scale_reg = arg_slot.as_register(); | |||
4041 | scale_factor = Address::times(stackElementSize); | |||
4042 | } | |||
4043 | offset += wordSize; // return PC is on stack | |||
4044 | return Address(rsp, scale_reg, scale_factor, offset); | |||
4045 | } | |||
4046 | ||||
4047 | void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { | |||
4048 | if (!VerifyOops) return; | |||
4049 | ||||
4050 | // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); | |||
4051 | // Pass register number to verify_oop_subroutine | |||
4052 | const char* b = NULL__null; | |||
4053 | { | |||
4054 | ResourceMark rm; | |||
4055 | stringStream ss; | |||
4056 | ss.print("verify_oop_addr: %s (%s:%d)", s, file, line); | |||
4057 | b = code_string(ss.as_string()); | |||
4058 | } | |||
4059 | #ifdef _LP641 | |||
4060 | push(rscratch1); // save r10, trashed by movptr() | |||
4061 | #endif | |||
4062 | push(rax); // save rax, | |||
4063 | // addr may contain rsp so we will have to adjust it based on the push | |||
4064 | // we just did (and on 64 bit we do two pushes) | |||
4065 | // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which | |||
4066 | // stores rax into addr which is backwards of what was intended. | |||
4067 | if (addr.uses(rsp)) { | |||
4068 | lea(rax, addr); | |||
4069 | pushptr(Address(rax, LP64_ONLY(2 *)2 * BytesPerWord)); | |||
4070 | } else { | |||
4071 | pushptr(addr); | |||
4072 | } | |||
4073 | ||||
4074 | ExternalAddress buffer((address) b); | |||
4075 | // pass msg argument | |||
4076 | // avoid using pushptr, as it modifies scratch registers | |||
4077 | // and our contract is not to modify anything | |||
4078 | movptr(rax, buffer.addr()); | |||
4079 | push(rax); | |||
4080 | ||||
4081 | // call indirectly to solve generation ordering problem | |||
4082 | movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); | |||
4083 | call(rax); | |||
4084 | // Caller pops the arguments (addr, message) and restores rax, r10. | |||
4085 | } | |||
4086 | ||||
4087 | void MacroAssembler::verify_tlab() { | |||
4088 | #ifdef ASSERT1 | |||
4089 | if (UseTLAB && VerifyOops) { | |||
4090 | Label next, ok; | |||
4091 | Register t1 = rsi; | |||
4092 | Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread)r15_thread; | |||
4093 | ||||
4094 | push(t1); | |||
4095 | NOT_LP64(push(thread_reg)); | |||
4096 | NOT_LP64(get_thread(thread_reg)); | |||
4097 | ||||
4098 | movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |||
4099 | cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); | |||
4100 | jcc(Assembler::aboveEqual, next); | |||
4101 | STOP("assert(top >= start)")block_comment("assert(top >= start)"); stop("assert(top >= start)" ); | |||
4102 | should_not_reach_here(); | |||
4103 | ||||
4104 | bind(next); | |||
4105 | movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); | |||
4106 | cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); | |||
4107 | jcc(Assembler::aboveEqual, ok); | |||
4108 | STOP("assert(top <= end)")block_comment("assert(top <= end)"); stop("assert(top <= end)" ); | |||
4109 | should_not_reach_here(); | |||
4110 | ||||
4111 | bind(ok); | |||
4112 | NOT_LP64(pop(thread_reg)); | |||
4113 | pop(t1); | |||
4114 | } | |||
4115 | #endif | |||
4116 | } | |||
4117 | ||||
4118 | class ControlWord { | |||
4119 | public: | |||
4120 | int32_t _value; | |||
4121 | ||||
4122 | int rounding_control() const { return (_value >> 10) & 3 ; } | |||
4123 | int precision_control() const { return (_value >> 8) & 3 ; } | |||
4124 | bool precision() const { return ((_value >> 5) & 1) != 0; } | |||
4125 | bool underflow() const { return ((_value >> 4) & 1) != 0; } | |||
4126 | bool overflow() const { return ((_value >> 3) & 1) != 0; } | |||
4127 | bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |||
4128 | bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |||
4129 | bool invalid() const { return ((_value >> 0) & 1) != 0; } | |||
4130 | ||||
4131 | void print() const { | |||
4132 | // rounding control | |||
4133 | const char* rc; | |||
4134 | switch (rounding_control()) { | |||
4135 | case 0: rc = "round near"; break; | |||
4136 | case 1: rc = "round down"; break; | |||
4137 | case 2: rc = "round up "; break; | |||
4138 | case 3: rc = "chop "; break; | |||
4139 | default: | |||
4140 | rc = NULL__null; // silence compiler warnings | |||
4141 | fatal("Unknown rounding control: %d", rounding_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4141, "Unknown rounding control: %d", rounding_control()); :: breakpoint(); } while (0); | |||
4142 | }; | |||
4143 | // precision control | |||
4144 | const char* pc; | |||
4145 | switch (precision_control()) { | |||
4146 | case 0: pc = "24 bits "; break; | |||
4147 | case 1: pc = "reserved"; break; | |||
4148 | case 2: pc = "53 bits "; break; | |||
4149 | case 3: pc = "64 bits "; break; | |||
4150 | default: | |||
4151 | pc = NULL__null; // silence compiler warnings | |||
4152 | fatal("Unknown precision control: %d", precision_control())do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4152, "Unknown precision control: %d", precision_control()) ; ::breakpoint(); } while (0); | |||
4153 | }; | |||
4154 | // flags | |||
4155 | char f[9]; | |||
4156 | f[0] = ' '; | |||
4157 | f[1] = ' '; | |||
4158 | f[2] = (precision ()) ? 'P' : 'p'; | |||
4159 | f[3] = (underflow ()) ? 'U' : 'u'; | |||
4160 | f[4] = (overflow ()) ? 'O' : 'o'; | |||
4161 | f[5] = (zero_divide ()) ? 'Z' : 'z'; | |||
4162 | f[6] = (denormalized()) ? 'D' : 'd'; | |||
4163 | f[7] = (invalid ()) ? 'I' : 'i'; | |||
4164 | f[8] = '\x0'; | |||
4165 | // output | |||
4166 | printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); | |||
4167 | } | |||
4168 | ||||
4169 | }; | |||
4170 | ||||
4171 | class StatusWord { | |||
4172 | public: | |||
4173 | int32_t _value; | |||
4174 | ||||
4175 | bool busy() const { return ((_value >> 15) & 1) != 0; } | |||
4176 | bool C3() const { return ((_value >> 14) & 1) != 0; } | |||
4177 | bool C2() const { return ((_value >> 10) & 1) != 0; } | |||
4178 | bool C1() const { return ((_value >> 9) & 1) != 0; } | |||
4179 | bool C0() const { return ((_value >> 8) & 1) != 0; } | |||
4180 | int top() const { return (_value >> 11) & 7 ; } | |||
4181 | bool error_status() const { return ((_value >> 7) & 1) != 0; } | |||
4182 | bool stack_fault() const { return ((_value >> 6) & 1) != 0; } | |||
4183 | bool precision() const { return ((_value >> 5) & 1) != 0; } | |||
4184 | bool underflow() const { return ((_value >> 4) & 1) != 0; } | |||
4185 | bool overflow() const { return ((_value >> 3) & 1) != 0; } | |||
4186 | bool zero_divide() const { return ((_value >> 2) & 1) != 0; } | |||
4187 | bool denormalized() const { return ((_value >> 1) & 1) != 0; } | |||
4188 | bool invalid() const { return ((_value >> 0) & 1) != 0; } | |||
4189 | ||||
4190 | void print() const { | |||
4191 | // condition codes | |||
4192 | char c[5]; | |||
4193 | c[0] = (C3()) ? '3' : '-'; | |||
4194 | c[1] = (C2()) ? '2' : '-'; | |||
4195 | c[2] = (C1()) ? '1' : '-'; | |||
4196 | c[3] = (C0()) ? '0' : '-'; | |||
4197 | c[4] = '\x0'; | |||
4198 | // flags | |||
4199 | char f[9]; | |||
4200 | f[0] = (error_status()) ? 'E' : '-'; | |||
4201 | f[1] = (stack_fault ()) ? 'S' : '-'; | |||
4202 | f[2] = (precision ()) ? 'P' : '-'; | |||
4203 | f[3] = (underflow ()) ? 'U' : '-'; | |||
4204 | f[4] = (overflow ()) ? 'O' : '-'; | |||
4205 | f[5] = (zero_divide ()) ? 'Z' : '-'; | |||
4206 | f[6] = (denormalized()) ? 'D' : '-'; | |||
4207 | f[7] = (invalid ()) ? 'I' : '-'; | |||
4208 | f[8] = '\x0'; | |||
4209 | // output | |||
4210 | printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); | |||
4211 | } | |||
4212 | ||||
4213 | }; | |||
4214 | ||||
4215 | class TagWord { | |||
4216 | public: | |||
4217 | int32_t _value; | |||
4218 | ||||
4219 | int tag_at(int i) const { return (_value >> (i*2)) & 3; } | |||
4220 | ||||
4221 | void print() const { | |||
4222 | printf("%04x", _value & 0xFFFF); | |||
4223 | } | |||
4224 | ||||
4225 | }; | |||
4226 | ||||
4227 | class FPU_Register { | |||
4228 | public: | |||
4229 | int32_t _m0; | |||
4230 | int32_t _m1; | |||
4231 | int16_t _ex; | |||
4232 | ||||
4233 | bool is_indefinite() const { | |||
4234 | return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; | |||
4235 | } | |||
4236 | ||||
4237 | void print() const { | |||
4238 | char sign = (_ex < 0) ? '-' : '+'; | |||
4239 | const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; | |||
4240 | printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); | |||
4241 | }; | |||
4242 | ||||
4243 | }; | |||
4244 | ||||
4245 | class FPU_State { | |||
4246 | public: | |||
4247 | enum { | |||
4248 | register_size = 10, | |||
4249 | number_of_registers = 8, | |||
4250 | register_mask = 7 | |||
4251 | }; | |||
4252 | ||||
4253 | ControlWord _control_word; | |||
4254 | StatusWord _status_word; | |||
4255 | TagWord _tag_word; | |||
4256 | int32_t _error_offset; | |||
4257 | int32_t _error_selector; | |||
4258 | int32_t _data_offset; | |||
4259 | int32_t _data_selector; | |||
4260 | int8_t _register[register_size * number_of_registers]; | |||
4261 | ||||
4262 | int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } | |||
4263 | FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } | |||
4264 | ||||
4265 | const char* tag_as_string(int tag) const { | |||
4266 | switch (tag) { | |||
4267 | case 0: return "valid"; | |||
4268 | case 1: return "zero"; | |||
4269 | case 2: return "special"; | |||
4270 | case 3: return "empty"; | |||
4271 | } | |||
4272 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4272); ::breakpoint(); } while (0); | |||
4273 | return NULL__null; | |||
4274 | } | |||
4275 | ||||
4276 | void print() const { | |||
4277 | // print computation registers | |||
4278 | { int t = _status_word.top(); | |||
4279 | for (int i = 0; i < number_of_registers; i++) { | |||
4280 | int j = (i - t) & register_mask; | |||
4281 | printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); | |||
4282 | st(j)->print(); | |||
4283 | printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); | |||
4284 | } | |||
4285 | } | |||
4286 | printf("\n"); | |||
4287 | // print control registers | |||
4288 | printf("ctrl = "); _control_word.print(); printf("\n"); | |||
4289 | printf("stat = "); _status_word .print(); printf("\n"); | |||
4290 | printf("tags = "); _tag_word .print(); printf("\n"); | |||
4291 | } | |||
4292 | ||||
4293 | }; | |||
4294 | ||||
4295 | class Flag_Register { | |||
4296 | public: | |||
4297 | int32_t _value; | |||
4298 | ||||
4299 | bool overflow() const { return ((_value >> 11) & 1) != 0; } | |||
4300 | bool direction() const { return ((_value >> 10) & 1) != 0; } | |||
4301 | bool sign() const { return ((_value >> 7) & 1) != 0; } | |||
4302 | bool zero() const { return ((_value >> 6) & 1) != 0; } | |||
4303 | bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } | |||
4304 | bool parity() const { return ((_value >> 2) & 1) != 0; } | |||
4305 | bool carry() const { return ((_value >> 0) & 1) != 0; } | |||
4306 | ||||
4307 | void print() const { | |||
4308 | // flags | |||
4309 | char f[8]; | |||
4310 | f[0] = (overflow ()) ? 'O' : '-'; | |||
4311 | f[1] = (direction ()) ? 'D' : '-'; | |||
4312 | f[2] = (sign ()) ? 'S' : '-'; | |||
4313 | f[3] = (zero ()) ? 'Z' : '-'; | |||
4314 | f[4] = (auxiliary_carry()) ? 'A' : '-'; | |||
4315 | f[5] = (parity ()) ? 'P' : '-'; | |||
4316 | f[6] = (carry ()) ? 'C' : '-'; | |||
4317 | f[7] = '\x0'; | |||
4318 | // output | |||
4319 | printf("%08x flags = %s", _value, f); | |||
4320 | } | |||
4321 | ||||
4322 | }; | |||
4323 | ||||
4324 | class IU_Register { | |||
4325 | public: | |||
4326 | int32_t _value; | |||
4327 | ||||
4328 | void print() const { | |||
4329 | printf("%08x %11d", _value, _value); | |||
4330 | } | |||
4331 | ||||
4332 | }; | |||
4333 | ||||
4334 | class IU_State { | |||
4335 | public: | |||
4336 | Flag_Register _eflags; | |||
4337 | IU_Register _rdi; | |||
4338 | IU_Register _rsi; | |||
4339 | IU_Register _rbp; | |||
4340 | IU_Register _rsp; | |||
4341 | IU_Register _rbx; | |||
4342 | IU_Register _rdx; | |||
4343 | IU_Register _rcx; | |||
4344 | IU_Register _rax; | |||
4345 | ||||
4346 | void print() const { | |||
4347 | // computation registers | |||
4348 | printf("rax, = "); _rax.print(); printf("\n"); | |||
4349 | printf("rbx, = "); _rbx.print(); printf("\n"); | |||
4350 | printf("rcx = "); _rcx.print(); printf("\n"); | |||
4351 | printf("rdx = "); _rdx.print(); printf("\n"); | |||
4352 | printf("rdi = "); _rdi.print(); printf("\n"); | |||
4353 | printf("rsi = "); _rsi.print(); printf("\n"); | |||
4354 | printf("rbp, = "); _rbp.print(); printf("\n"); | |||
4355 | printf("rsp = "); _rsp.print(); printf("\n"); | |||
4356 | printf("\n"); | |||
4357 | // control registers | |||
4358 | printf("flgs = "); _eflags.print(); printf("\n"); | |||
4359 | } | |||
4360 | }; | |||
4361 | ||||
4362 | ||||
4363 | class CPU_State { | |||
4364 | public: | |||
4365 | FPU_State _fpu_state; | |||
4366 | IU_State _iu_state; | |||
4367 | ||||
4368 | void print() const { | |||
4369 | printf("--------------------------------------------------\n"); | |||
4370 | _iu_state .print(); | |||
4371 | printf("\n"); | |||
4372 | _fpu_state.print(); | |||
4373 | printf("--------------------------------------------------\n"); | |||
4374 | } | |||
4375 | ||||
4376 | }; | |||
4377 | ||||
4378 | ||||
4379 | static void _print_CPU_state(CPU_State* state) { | |||
4380 | state->print(); | |||
4381 | }; | |||
4382 | ||||
4383 | ||||
4384 | void MacroAssembler::print_CPU_state() { | |||
4385 | push_CPU_state(); | |||
4386 | push(rsp); // pass CPU state | |||
4387 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state)((address)((address_word)(_print_CPU_state))))); | |||
4388 | addptr(rsp, wordSize); // discard argument | |||
4389 | pop_CPU_state(); | |||
4390 | } | |||
4391 | ||||
4392 | ||||
4393 | #ifndef _LP641 | |||
4394 | static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { | |||
4395 | static int counter = 0; | |||
4396 | FPU_State* fs = &state->_fpu_state; | |||
4397 | counter++; | |||
4398 | // For leaf calls, only verify that the top few elements remain empty. | |||
4399 | // We only need 1 empty at the top for C2 code. | |||
4400 | if( stack_depth < 0 ) { | |||
4401 | if( fs->tag_for_st(7) != 3 ) { | |||
4402 | printf("FPR7 not empty\n"); | |||
4403 | state->print(); | |||
4404 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4404, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); | |||
4405 | return false; | |||
4406 | } | |||
4407 | return true; // All other stack states do not matter | |||
4408 | } | |||
4409 | ||||
4410 | assert((fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std(),do { if (!((fs->_control_word._value & 0xffff) == StubRoutines ::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()" ") failed", "bad FPU control word"); ::breakpoint(); } } while (0) | |||
4411 | "bad FPU control word")do { if (!((fs->_control_word._value & 0xffff) == StubRoutines ::x86::fpu_cntrl_wrd_std())) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4411, "assert(" "(fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std()" ") failed", "bad FPU control word"); ::breakpoint(); } } while (0); | |||
4412 | ||||
4413 | // compute stack depth | |||
4414 | int i = 0; | |||
4415 | while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; | |||
4416 | int d = i; | |||
4417 | while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; | |||
4418 | // verify findings | |||
4419 | if (i != FPU_State::number_of_registers) { | |||
4420 | // stack not contiguous | |||
4421 | printf("%s: stack not contiguous at ST%d\n", s, i); | |||
4422 | state->print(); | |||
4423 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4423, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); | |||
4424 | return false; | |||
4425 | } | |||
4426 | // check if computed stack depth corresponds to expected stack depth | |||
4427 | if (stack_depth < 0) { | |||
4428 | // expected stack depth is -stack_depth or less | |||
4429 | if (d > -stack_depth) { | |||
4430 | // too many elements on the stack | |||
4431 | printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); | |||
4432 | state->print(); | |||
4433 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4433, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); | |||
4434 | return false; | |||
4435 | } | |||
4436 | } else { | |||
4437 | // expected stack depth is stack_depth | |||
4438 | if (d != stack_depth) { | |||
4439 | // wrong stack depth | |||
4440 | printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); | |||
4441 | state->print(); | |||
4442 | assert(false, "error")do { if (!(false)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4442, "assert(" "false" ") failed", "error"); ::breakpoint( ); } } while (0); | |||
4443 | return false; | |||
4444 | } | |||
4445 | } | |||
4446 | // everything is cool | |||
4447 | return true; | |||
4448 | } | |||
4449 | ||||
4450 | void MacroAssembler::verify_FPU(int stack_depth, const char* s) { | |||
4451 | if (!VerifyFPU) return; | |||
4452 | push_CPU_state(); | |||
4453 | push(rsp); // pass CPU state | |||
4454 | ExternalAddress msg((address) s); | |||
4455 | // pass message string s | |||
4456 | pushptr(msg.addr()); | |||
4457 | push(stack_depth); // pass stack depth | |||
4458 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU)((address)((address_word)(_verify_FPU))))); | |||
4459 | addptr(rsp, 3 * wordSize); // discard arguments | |||
4460 | // check for error | |||
4461 | { Label L; | |||
4462 | testl(rax, rax); | |||
4463 | jcc(Assembler::notZero, L); | |||
4464 | int3(); // break if error condition | |||
4465 | bind(L); | |||
4466 | } | |||
4467 | pop_CPU_state(); | |||
4468 | } | |||
4469 | #endif // _LP64 | |||
4470 | ||||
4471 | void MacroAssembler::restore_cpu_control_state_after_jni() { | |||
4472 | // Either restore the MXCSR register after returning from the JNI Call | |||
4473 | // or verify that it wasn't changed (with -Xcheck:jni flag). | |||
4474 | if (VM_Version::supports_sse()) { | |||
4475 | if (RestoreMXCSROnJNICalls) { | |||
4476 | ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std())); | |||
4477 | } else if (CheckJNICalls) { | |||
4478 | call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); | |||
4479 | } | |||
4480 | } | |||
4481 | // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. | |||
4482 | vzeroupper(); | |||
4483 | // Reset k1 to 0xffff. | |||
4484 | ||||
4485 | #ifdef COMPILER21 | |||
4486 | if (PostLoopMultiversioning && VM_Version::supports_evex()) { | |||
4487 | push(rcx); | |||
4488 | movl(rcx, 0xffff); | |||
4489 | kmovwl(k1, rcx); | |||
4490 | pop(rcx); | |||
4491 | } | |||
4492 | #endif // COMPILER2 | |||
4493 | ||||
4494 | #ifndef _LP641 | |||
4495 | // Either restore the x87 floating pointer control word after returning | |||
4496 | // from the JNI call or verify that it wasn't changed. | |||
4497 | if (CheckJNICalls) { | |||
4498 | call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); | |||
4499 | } | |||
4500 | #endif // _LP64 | |||
4501 | } | |||
4502 | ||||
4503 | // ((OopHandle)result).resolve(); | |||
4504 | void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { | |||
4505 | assert_different_registers(result, tmp); | |||
4506 | ||||
4507 | // Only 64 bit platforms support GCs that require a tmp register | |||
4508 | // Only IN_HEAP loads require a thread_tmp register | |||
4509 | // OopHandle::resolve is an indirection like jobject. | |||
4510 | access_load_at(T_OBJECT, IN_NATIVE, | |||
4511 | result, Address(result, 0), tmp, /*tmp_thread*/noreg); | |||
4512 | } | |||
4513 | ||||
4514 | // ((WeakHandle)result).resolve(); | |||
4515 | void MacroAssembler::resolve_weak_handle(Register rresult, Register rtmp) { | |||
4516 | assert_different_registers(rresult, rtmp); | |||
4517 | Label resolved; | |||
4518 | ||||
4519 | // A null weak handle resolves to null. | |||
4520 | cmpptr(rresult, 0); | |||
4521 | jcc(Assembler::equal, resolved); | |||
4522 | ||||
4523 | // Only 64 bit platforms support GCs that require a tmp register | |||
4524 | // Only IN_HEAP loads require a thread_tmp register | |||
4525 | // WeakHandle::resolve is an indirection like jweak. | |||
4526 | access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, | |||
4527 | rresult, Address(rresult, 0), rtmp, /*tmp_thread*/noreg); | |||
4528 | bind(resolved); | |||
4529 | } | |||
4530 | ||||
4531 | void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { | |||
4532 | // get mirror | |||
4533 | const int mirror_offset = in_bytes(Klass::java_mirror_offset()); | |||
4534 | load_method_holder(mirror, method); | |||
4535 | movptr(mirror, Address(mirror, mirror_offset)); | |||
4536 | resolve_oop_handle(mirror, tmp); | |||
4537 | } | |||
4538 | ||||
4539 | void MacroAssembler::load_method_holder_cld(Register rresult, Register rmethod) { | |||
4540 | load_method_holder(rresult, rmethod); | |||
4541 | movptr(rresult, Address(rresult, InstanceKlass::class_loader_data_offset())); | |||
4542 | } | |||
4543 | ||||
4544 | void MacroAssembler::load_method_holder(Register holder, Register method) { | |||
4545 | movptr(holder, Address(method, Method::const_offset())); // ConstMethod* | |||
4546 | movptr(holder, Address(holder, ConstMethod::constants_offset())); // ConstantPool* | |||
4547 | movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass* | |||
4548 | } | |||
4549 | ||||
4550 | void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { | |||
4551 | assert_different_registers(src, tmp); | |||
4552 | assert_different_registers(dst, tmp); | |||
4553 | #ifdef _LP641 | |||
4554 | if (UseCompressedClassPointers) { | |||
4555 | movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |||
4556 | decode_klass_not_null(dst, tmp); | |||
4557 | } else | |||
4558 | #endif | |||
4559 | movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); | |||
4560 | } | |||
4561 | ||||
4562 | void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { | |||
4563 | assert_different_registers(src, tmp); | |||
4564 | assert_different_registers(dst, tmp); | |||
4565 | #ifdef _LP641 | |||
4566 | if (UseCompressedClassPointers) { | |||
4567 | encode_klass_not_null(src, tmp); | |||
4568 | movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |||
4569 | } else | |||
4570 | #endif | |||
4571 | movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); | |||
4572 | } | |||
4573 | ||||
4574 | void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, | |||
4575 | Register tmp1, Register thread_tmp) { | |||
4576 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
4577 | decorators = AccessInternal::decorator_fixup(decorators); | |||
4578 | bool as_raw = (decorators & AS_RAW) != 0; | |||
4579 | if (as_raw) { | |||
4580 | bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); | |||
4581 | } else { | |||
4582 | bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); | |||
4583 | } | |||
4584 | } | |||
4585 | ||||
4586 | void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, | |||
4587 | Register tmp1, Register tmp2) { | |||
4588 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
4589 | decorators = AccessInternal::decorator_fixup(decorators); | |||
4590 | bool as_raw = (decorators & AS_RAW) != 0; | |||
4591 | if (as_raw) { | |||
4592 | bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); | |||
4593 | } else { | |||
4594 | bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); | |||
4595 | } | |||
4596 | } | |||
4597 | ||||
4598 | void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, | |||
4599 | Register thread_tmp, DecoratorSet decorators) { | |||
4600 | access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); | |||
4601 | } | |||
4602 | ||||
4603 | // Doesn't do verfication, generates fixed size code | |||
4604 | void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, | |||
4605 | Register thread_tmp, DecoratorSet decorators) { | |||
4606 | access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); | |||
4607 | } | |||
4608 | ||||
4609 | void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, | |||
4610 | Register tmp2, DecoratorSet decorators) { | |||
4611 | access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); | |||
4612 | } | |||
4613 | ||||
4614 | // Used for storing NULLs. | |||
4615 | void MacroAssembler::store_heap_oop_null(Address dst) { | |||
4616 | access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); | |||
4617 | } | |||
4618 | ||||
4619 | #ifdef _LP641 | |||
4620 | void MacroAssembler::store_klass_gap(Register dst, Register src) { | |||
4621 | if (UseCompressedClassPointers) { | |||
4622 | // Store to klass gap in destination | |||
4623 | movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); | |||
4624 | } | |||
4625 | } | |||
4626 | ||||
4627 | #ifdef ASSERT1 | |||
4628 | void MacroAssembler::verify_heapbase(const char* msg) { | |||
4629 | assert (UseCompressedOops, "should be compressed")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4629, "assert(" "UseCompressedOops" ") failed", "should be compressed" ); ::breakpoint(); } } while (0); | |||
4630 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4630, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4631 | if (CheckCompressedOops) { | |||
4632 | Label ok; | |||
4633 | push(rscratch1); // cmpptr trashes rscratch1 | |||
4634 | cmpptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); | |||
4635 | jcc(Assembler::equal, ok); | |||
4636 | STOP(msg)block_comment(msg); stop(msg); | |||
4637 | bind(ok); | |||
4638 | pop(rscratch1); | |||
4639 | } | |||
4640 | } | |||
4641 | #endif | |||
4642 | ||||
4643 | // Algorithm must match oop.inline.hpp encode_heap_oop. | |||
4644 | void MacroAssembler::encode_heap_oop(Register r) { | |||
4645 | #ifdef ASSERT1 | |||
4646 | verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); | |||
4647 | #endif | |||
4648 | verify_oop_msg(r, "broken oop in encode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4648); | |||
4649 | if (CompressedOops::base() == NULL__null) { | |||
4650 | if (CompressedOops::shift() != 0) { | |||
4651 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4651, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4652 | shrq(r, LogMinObjAlignmentInBytes); | |||
4653 | } | |||
4654 | return; | |||
4655 | } | |||
4656 | testq(r, r); | |||
4657 | cmovq(Assembler::equal, r, r12_heapbase); | |||
4658 | subq(r, r12_heapbase); | |||
4659 | shrq(r, LogMinObjAlignmentInBytes); | |||
4660 | } | |||
4661 | ||||
4662 | void MacroAssembler::encode_heap_oop_not_null(Register r) { | |||
4663 | #ifdef ASSERT1 | |||
4664 | verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); | |||
4665 | if (CheckCompressedOops) { | |||
4666 | Label ok; | |||
4667 | testq(r, r); | |||
4668 | jcc(Assembler::notEqual, ok); | |||
4669 | STOP("null oop passed to encode_heap_oop_not_null")block_comment("null oop passed to encode_heap_oop_not_null"); stop("null oop passed to encode_heap_oop_not_null"); | |||
4670 | bind(ok); | |||
4671 | } | |||
4672 | #endif | |||
4673 | verify_oop_msg(r, "broken oop in encode_heap_oop_not_null")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in encode_heap_oop_not_null\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4673); | |||
4674 | if (CompressedOops::base() != NULL__null) { | |||
4675 | subq(r, r12_heapbase); | |||
4676 | } | |||
4677 | if (CompressedOops::shift() != 0) { | |||
4678 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4678, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4679 | shrq(r, LogMinObjAlignmentInBytes); | |||
4680 | } | |||
4681 | } | |||
4682 | ||||
4683 | void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { | |||
4684 | #ifdef ASSERT1 | |||
4685 | verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); | |||
4686 | if (CheckCompressedOops) { | |||
4687 | Label ok; | |||
4688 | testq(src, src); | |||
4689 | jcc(Assembler::notEqual, ok); | |||
4690 | STOP("null oop passed to encode_heap_oop_not_null2")block_comment("null oop passed to encode_heap_oop_not_null2") ; stop("null oop passed to encode_heap_oop_not_null2"); | |||
4691 | bind(ok); | |||
4692 | } | |||
4693 | #endif | |||
4694 | verify_oop_msg(src, "broken oop in encode_heap_oop_not_null2")_verify_oop_checked(src, "broken oop " "src" ", " "\"broken oop in encode_heap_oop_not_null2\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4694); | |||
4695 | if (dst != src) { | |||
4696 | movq(dst, src); | |||
4697 | } | |||
4698 | if (CompressedOops::base() != NULL__null) { | |||
4699 | subq(dst, r12_heapbase); | |||
4700 | } | |||
4701 | if (CompressedOops::shift() != 0) { | |||
4702 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4702, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4703 | shrq(dst, LogMinObjAlignmentInBytes); | |||
4704 | } | |||
4705 | } | |||
4706 | ||||
4707 | void MacroAssembler::decode_heap_oop(Register r) { | |||
4708 | #ifdef ASSERT1 | |||
4709 | verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); | |||
4710 | #endif | |||
4711 | if (CompressedOops::base() == NULL__null) { | |||
4712 | if (CompressedOops::shift() != 0) { | |||
4713 | assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4713, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4714 | shlq(r, LogMinObjAlignmentInBytes); | |||
4715 | } | |||
4716 | } else { | |||
4717 | Label done; | |||
4718 | shlq(r, LogMinObjAlignmentInBytes); | |||
4719 | jccb(Assembler::equal, done)jccb_0(Assembler::equal, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4719); | |||
4720 | addq(r, r12_heapbase); | |||
4721 | bind(done); | |||
4722 | } | |||
4723 | verify_oop_msg(r, "broken oop in decode_heap_oop")_verify_oop_checked(r, "broken oop " "r" ", " "\"broken oop in decode_heap_oop\"" , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4723); | |||
4724 | } | |||
4725 | ||||
4726 | void MacroAssembler::decode_heap_oop_not_null(Register r) { | |||
4727 | // Note: it will change flags | |||
4728 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4728, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4729 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4729, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4730 | // Cannot assert, unverified entry point counts instructions (see .ad file) | |||
4731 | // vtableStubs also counts instructions in pd_code_size_limit. | |||
4732 | // Also do not verify_oop as this is called by verify_oop. | |||
4733 | if (CompressedOops::shift() != 0) { | |||
4734 | assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4734, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4735 | shlq(r, LogMinObjAlignmentInBytes); | |||
4736 | if (CompressedOops::base() != NULL__null) { | |||
4737 | addq(r, r12_heapbase); | |||
4738 | } | |||
4739 | } else { | |||
4740 | assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4740, "assert(" "CompressedOops::base() == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
4741 | } | |||
4742 | } | |||
4743 | ||||
4744 | void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { | |||
4745 | // Note: it will change flags | |||
4746 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4746, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4747 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4747, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4748 | // Cannot assert, unverified entry point counts instructions (see .ad file) | |||
4749 | // vtableStubs also counts instructions in pd_code_size_limit. | |||
4750 | // Also do not verify_oop as this is called by verify_oop. | |||
4751 | if (CompressedOops::shift() != 0) { | |||
4752 | assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong")do { if (!(LogMinObjAlignmentInBytes == CompressedOops::shift ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4752, "assert(" "LogMinObjAlignmentInBytes == CompressedOops::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4753 | if (LogMinObjAlignmentInBytes == Address::times_8) { | |||
4754 | leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); | |||
4755 | } else { | |||
4756 | if (dst != src) { | |||
4757 | movq(dst, src); | |||
4758 | } | |||
4759 | shlq(dst, LogMinObjAlignmentInBytes); | |||
4760 | if (CompressedOops::base() != NULL__null) { | |||
4761 | addq(dst, r12_heapbase); | |||
4762 | } | |||
4763 | } | |||
4764 | } else { | |||
4765 | assert (CompressedOops::base() == NULL, "sanity")do { if (!(CompressedOops::base() == __null)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4765, "assert(" "CompressedOops::base() == __null" ") failed" , "sanity"); ::breakpoint(); } } while (0); | |||
4766 | if (dst != src) { | |||
4767 | movq(dst, src); | |||
4768 | } | |||
4769 | } | |||
4770 | } | |||
4771 | ||||
4772 | void MacroAssembler::encode_klass_not_null(Register r, Register tmp) { | |||
4773 | assert_different_registers(r, tmp); | |||
4774 | if (CompressedKlassPointers::base() != NULL__null) { | |||
4775 | mov64(tmp, (int64_t)CompressedKlassPointers::base()); | |||
4776 | subq(r, tmp); | |||
4777 | } | |||
4778 | if (CompressedKlassPointers::shift() != 0) { | |||
4779 | assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4779, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4780 | shrq(r, LogKlassAlignmentInBytes); | |||
4781 | } | |||
4782 | } | |||
4783 | ||||
4784 | void MacroAssembler::encode_and_move_klass_not_null(Register dst, Register src) { | |||
4785 | assert_different_registers(src, dst); | |||
4786 | if (CompressedKlassPointers::base() != NULL__null) { | |||
4787 | mov64(dst, -(int64_t)CompressedKlassPointers::base()); | |||
4788 | addq(dst, src); | |||
4789 | } else { | |||
4790 | movptr(dst, src); | |||
4791 | } | |||
4792 | if (CompressedKlassPointers::shift() != 0) { | |||
4793 | assert (LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4793, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4794 | shrq(dst, LogKlassAlignmentInBytes); | |||
4795 | } | |||
4796 | } | |||
4797 | ||||
4798 | void MacroAssembler::decode_klass_not_null(Register r, Register tmp) { | |||
4799 | assert_different_registers(r, tmp); | |||
4800 | // Note: it will change flags | |||
4801 | assert(UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4801, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4802 | // Cannot assert, unverified entry point counts instructions (see .ad file) | |||
4803 | // vtableStubs also counts instructions in pd_code_size_limit. | |||
4804 | // Also do not verify_oop as this is called by verify_oop. | |||
4805 | if (CompressedKlassPointers::shift() != 0) { | |||
4806 | assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4806, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4807 | shlq(r, LogKlassAlignmentInBytes); | |||
4808 | } | |||
4809 | if (CompressedKlassPointers::base() != NULL__null) { | |||
4810 | mov64(tmp, (int64_t)CompressedKlassPointers::base()); | |||
4811 | addq(r, tmp); | |||
4812 | } | |||
4813 | } | |||
4814 | ||||
4815 | void MacroAssembler::decode_and_move_klass_not_null(Register dst, Register src) { | |||
4816 | assert_different_registers(src, dst); | |||
4817 | // Note: it will change flags | |||
4818 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4818, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4819 | // Cannot assert, unverified entry point counts instructions (see .ad file) | |||
4820 | // vtableStubs also counts instructions in pd_code_size_limit. | |||
4821 | // Also do not verify_oop as this is called by verify_oop. | |||
4822 | ||||
4823 | if (CompressedKlassPointers::base() == NULL__null && | |||
4824 | CompressedKlassPointers::shift() == 0) { | |||
4825 | // The best case scenario is that there is no base or shift. Then it is already | |||
4826 | // a pointer that needs nothing but a register rename. | |||
4827 | movl(dst, src); | |||
4828 | } else { | |||
4829 | if (CompressedKlassPointers::base() != NULL__null) { | |||
4830 | mov64(dst, (int64_t)CompressedKlassPointers::base()); | |||
4831 | } else { | |||
4832 | xorq(dst, dst); | |||
4833 | } | |||
4834 | if (CompressedKlassPointers::shift() != 0) { | |||
4835 | assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong")do { if (!(LogKlassAlignmentInBytes == CompressedKlassPointers ::shift())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4835, "assert(" "LogKlassAlignmentInBytes == CompressedKlassPointers::shift()" ") failed", "decode alg wrong"); ::breakpoint(); } } while ( 0); | |||
4836 | assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?")do { if (!(LogKlassAlignmentInBytes == Address::times_8)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4836, "assert(" "LogKlassAlignmentInBytes == Address::times_8" ") failed", "klass not aligned on 64bits?"); ::breakpoint(); } } while (0); | |||
4837 | leaq(dst, Address(dst, src, Address::times_8, 0)); | |||
4838 | } else { | |||
4839 | addq(dst, src); | |||
4840 | } | |||
4841 | } | |||
4842 | } | |||
4843 | ||||
4844 | void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { | |||
4845 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4845, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4846 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4846, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4847 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4847, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4848 | int oop_index = oop_recorder()->find_index(obj); | |||
4849 | RelocationHolder rspec = oop_Relocation::spec(oop_index); | |||
4850 | mov_narrow_oop(dst, oop_index, rspec); | |||
4851 | } | |||
4852 | ||||
4853 | void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { | |||
4854 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4854, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4855 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4855, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4856 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4856, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4857 | int oop_index = oop_recorder()->find_index(obj); | |||
4858 | RelocationHolder rspec = oop_Relocation::spec(oop_index); | |||
4859 | mov_narrow_oop(dst, oop_index, rspec); | |||
4860 | } | |||
4861 | ||||
4862 | void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { | |||
4863 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4863, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4864 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4864, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4865 | int klass_index = oop_recorder()->find_index(k); | |||
4866 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |||
4867 | mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); | |||
4868 | } | |||
4869 | ||||
4870 | void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { | |||
4871 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4871, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4872 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4872, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4873 | int klass_index = oop_recorder()->find_index(k); | |||
4874 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |||
4875 | mov_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); | |||
4876 | } | |||
4877 | ||||
4878 | void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { | |||
4879 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4879, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4880 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4880, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4881 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4881, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4882 | int oop_index = oop_recorder()->find_index(obj); | |||
4883 | RelocationHolder rspec = oop_Relocation::spec(oop_index); | |||
4884 | Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |||
4885 | } | |||
4886 | ||||
4887 | void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { | |||
4888 | assert (UseCompressedOops, "should only be used for compressed headers")do { if (!(UseCompressedOops)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4888, "assert(" "UseCompressedOops" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4889 | assert (Universe::heap() != NULL, "java heap should be initialized")do { if (!(Universe::heap() != __null)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4889, "assert(" "Universe::heap() != __null" ") failed", "java heap should be initialized" ); ::breakpoint(); } } while (0); | |||
4890 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4890, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4891 | int oop_index = oop_recorder()->find_index(obj); | |||
4892 | RelocationHolder rspec = oop_Relocation::spec(oop_index); | |||
4893 | Assembler::cmp_narrow_oop(dst, oop_index, rspec); | |||
4894 | } | |||
4895 | ||||
4896 | void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { | |||
4897 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4897, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4898 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4898, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4899 | int klass_index = oop_recorder()->find_index(k); | |||
4900 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |||
4901 | Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); | |||
4902 | } | |||
4903 | ||||
4904 | void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { | |||
4905 | assert (UseCompressedClassPointers, "should only be used for compressed headers")do { if (!(UseCompressedClassPointers)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4905, "assert(" "UseCompressedClassPointers" ") failed", "should only be used for compressed headers" ); ::breakpoint(); } } while (0); | |||
4906 | assert (oop_recorder() != NULL, "this assembler needs an OopRecorder")do { if (!(oop_recorder() != __null)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4906, "assert(" "oop_recorder() != __null" ") failed", "this assembler needs an OopRecorder" ); ::breakpoint(); } } while (0); | |||
4907 | int klass_index = oop_recorder()->find_index(k); | |||
4908 | RelocationHolder rspec = metadata_Relocation::spec(klass_index); | |||
4909 | Assembler::cmp_narrow_oop(dst, CompressedKlassPointers::encode(k), rspec); | |||
4910 | } | |||
4911 | ||||
4912 | void MacroAssembler::reinit_heapbase() { | |||
4913 | if (UseCompressedOops) { | |||
4914 | if (Universe::heap() != NULL__null) { | |||
4915 | if (CompressedOops::base() == NULL__null) { | |||
4916 | MacroAssembler::xorptr(r12_heapbase, r12_heapbase); | |||
4917 | } else { | |||
4918 | mov64(r12_heapbase, (int64_t)CompressedOops::ptrs_base()); | |||
4919 | } | |||
4920 | } else { | |||
4921 | movptr(r12_heapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr())); | |||
4922 | } | |||
4923 | } | |||
4924 | } | |||
4925 | ||||
4926 | #endif // _LP64 | |||
4927 | ||||
4928 | // C2 compiled method's prolog code. | |||
4929 | void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) { | |||
4930 | ||||
4931 | // WARNING: Initial instruction MUST be 5 bytes or longer so that | |||
4932 | // NativeJump::patch_verified_entry will be able to patch out the entry | |||
4933 | // code safely. The push to verify stack depth is ok at 5 bytes, | |||
4934 | // the frame allocation can be either 3 or 6 bytes. So if we don't do | |||
4935 | // stack bang then we must use the 6 byte frame allocation even if | |||
4936 | // we have no frame. :-( | |||
4937 | assert(stack_bang_size >= framesize || stack_bang_size <= 0, "stack bang size incorrect")do { if (!(stack_bang_size >= framesize || stack_bang_size <= 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4937, "assert(" "stack_bang_size >= framesize || stack_bang_size <= 0" ") failed", "stack bang size incorrect"); ::breakpoint(); } } while (0); | |||
4938 | ||||
4939 | assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned")do { if (!((framesize & (StackAlignmentInBytes-1)) == 0)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 4939, "assert(" "(framesize & (StackAlignmentInBytes-1)) == 0" ") failed", "frame size not aligned"); ::breakpoint(); } } while (0); | |||
4940 | // Remove word for return addr | |||
4941 | framesize -= wordSize; | |||
4942 | stack_bang_size -= wordSize; | |||
4943 | ||||
4944 | // Calls to C2R adapters often do not accept exceptional returns. | |||
4945 | // We require that their callers must bang for them. But be careful, because | |||
4946 | // some VM calls (such as call site linkage) can use several kilobytes of | |||
4947 | // stack. But the stack safety zone should account for that. | |||
4948 | // See bugs 4446381, 4468289, 4497237. | |||
4949 | if (stack_bang_size > 0) { | |||
4950 | generate_stack_overflow_check(stack_bang_size); | |||
4951 | ||||
4952 | // We always push rbp, so that on return to interpreter rbp, will be | |||
4953 | // restored correctly and we can correct the stack. | |||
4954 | push(rbp); | |||
4955 | // Save caller's stack pointer into RBP if the frame pointer is preserved. | |||
4956 | if (PreserveFramePointer) { | |||
4957 | mov(rbp, rsp); | |||
4958 | } | |||
4959 | // Remove word for ebp | |||
4960 | framesize -= wordSize; | |||
4961 | ||||
4962 | // Create frame | |||
4963 | if (framesize) { | |||
4964 | subptr(rsp, framesize); | |||
4965 | } | |||
4966 | } else { | |||
4967 | // Create frame (force generation of a 4 byte immediate value) | |||
4968 | subptr_imm32(rsp, framesize); | |||
4969 | ||||
4970 | // Save RBP register now. | |||
4971 | framesize -= wordSize; | |||
4972 | movptr(Address(rsp, framesize), rbp); | |||
4973 | // Save caller's stack pointer into RBP if the frame pointer is preserved. | |||
4974 | if (PreserveFramePointer) { | |||
4975 | movptr(rbp, rsp); | |||
4976 | if (framesize > 0) { | |||
4977 | addptr(rbp, framesize); | |||
4978 | } | |||
4979 | } | |||
4980 | } | |||
4981 | ||||
4982 | if (VerifyStackAtCalls) { // Majik cookie to verify stack depth | |||
4983 | framesize -= wordSize; | |||
4984 | movptr(Address(rsp, framesize), (int32_t)0xbadb100d); | |||
4985 | } | |||
4986 | ||||
4987 | #ifndef _LP641 | |||
4988 | // If method sets FPU control word do it now | |||
4989 | if (fp_mode_24b) { | |||
4990 | fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); | |||
4991 | } | |||
4992 | if (UseSSE >= 2 && VerifyFPU) { | |||
4993 | verify_FPU(0, "FPU stack must be clean on entry"); | |||
4994 | } | |||
4995 | #endif | |||
4996 | ||||
4997 | #ifdef ASSERT1 | |||
4998 | if (VerifyStackAtCalls) { | |||
4999 | Label L; | |||
5000 | push(rax); | |||
5001 | mov(rax, rsp); | |||
5002 | andptr(rax, StackAlignmentInBytes-1); | |||
5003 | cmpptr(rax, StackAlignmentInBytes-wordSize); | |||
5004 | pop(rax); | |||
5005 | jcc(Assembler::equal, L); | |||
5006 | STOP("Stack is not properly aligned!")block_comment("Stack is not properly aligned!"); stop("Stack is not properly aligned!" ); | |||
5007 | bind(L); | |||
5008 | } | |||
5009 | #endif | |||
5010 | ||||
5011 | if (!is_stub) { | |||
5012 | BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); | |||
5013 | bs->nmethod_entry_barrier(this); | |||
5014 | } | |||
5015 | } | |||
5016 | ||||
5017 | #if COMPILER2_OR_JVMCI1 | |||
5018 | ||||
5019 | // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers | |||
5020 | void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) { | |||
5021 | // cnt - number of qwords (8-byte words). | |||
5022 | // base - start address, qword aligned. | |||
5023 | Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end; | |||
5024 | bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0); | |||
5025 | if (use64byteVector) { | |||
5026 | vpxor(xtmp, xtmp, xtmp, AVX_512bit); | |||
5027 | } else if (MaxVectorSize >= 32) { | |||
5028 | vpxor(xtmp, xtmp, xtmp, AVX_256bit); | |||
5029 | } else { | |||
5030 | pxor(xtmp, xtmp); | |||
5031 | } | |||
5032 | jmp(L_zero_64_bytes); | |||
5033 | ||||
5034 | BIND(L_loop); | |||
5035 | if (MaxVectorSize >= 32) { | |||
5036 | fill64(base, 0, xtmp, use64byteVector); | |||
5037 | } else { | |||
5038 | movdqu(Address(base, 0), xtmp); | |||
5039 | movdqu(Address(base, 16), xtmp); | |||
5040 | movdqu(Address(base, 32), xtmp); | |||
5041 | movdqu(Address(base, 48), xtmp); | |||
5042 | } | |||
5043 | addptr(base, 64); | |||
5044 | ||||
5045 | BIND(L_zero_64_bytes); | |||
5046 | subptr(cnt, 8); | |||
5047 | jccb(Assembler::greaterEqual, L_loop)jccb_0(Assembler::greaterEqual, L_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5047); | |||
5048 | ||||
5049 | // Copy trailing 64 bytes | |||
5050 | if (use64byteVector) { | |||
5051 | addptr(cnt, 8); | |||
5052 | jccb(Assembler::equal, L_end)jccb_0(Assembler::equal, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5052); | |||
5053 | fill64_masked(3, base, 0, xtmp, mask, cnt, rtmp, true); | |||
5054 | jmp(L_end); | |||
5055 | } else { | |||
5056 | addptr(cnt, 4); | |||
5057 | jccb(Assembler::less, L_tail)jccb_0(Assembler::less, L_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5057); | |||
5058 | if (MaxVectorSize >= 32) { | |||
5059 | vmovdqu(Address(base, 0), xtmp); | |||
5060 | } else { | |||
5061 | movdqu(Address(base, 0), xtmp); | |||
5062 | movdqu(Address(base, 16), xtmp); | |||
5063 | } | |||
5064 | } | |||
5065 | addptr(base, 32); | |||
5066 | subptr(cnt, 4); | |||
5067 | ||||
5068 | BIND(L_tail); | |||
5069 | addptr(cnt, 4); | |||
5070 | jccb(Assembler::lessEqual, L_end)jccb_0(Assembler::lessEqual, L_end, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5070); | |||
5071 | if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) { | |||
5072 | fill32_masked(3, base, 0, xtmp, mask, cnt, rtmp); | |||
5073 | } else { | |||
5074 | decrement(cnt); | |||
5075 | ||||
5076 | BIND(L_sloop); | |||
5077 | movq(Address(base, 0), xtmp); | |||
5078 | addptr(base, 8); | |||
5079 | decrement(cnt); | |||
5080 | jccb(Assembler::greaterEqual, L_sloop)jccb_0(Assembler::greaterEqual, L_sloop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5080); | |||
5081 | } | |||
5082 | BIND(L_end); | |||
5083 | } | |||
5084 | ||||
5085 | // Clearing constant sized memory using YMM/ZMM registers. | |||
5086 | void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) { | |||
5087 | assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "")do { if (!(UseAVX > 2 && VM_Version::supports_avx512vlbw ())) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5087, "assert(" "UseAVX > 2 && VM_Version::supports_avx512vlbw()" ") failed", ""); ::breakpoint(); } } while (0); | |||
5088 | bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0); | |||
5089 | ||||
5090 | int vector64_count = (cnt & (~0x7)) >> 3; | |||
5091 | cnt = cnt & 0x7; | |||
5092 | ||||
5093 | // 64 byte initialization loop. | |||
5094 | vpxor(xtmp, xtmp, xtmp, use64byteVector ? AVX_512bit : AVX_256bit); | |||
5095 | for (int i = 0; i < vector64_count; i++) { | |||
5096 | fill64(base, i * 64, xtmp, use64byteVector); | |||
5097 | } | |||
5098 | ||||
5099 | // Clear remaining 64 byte tail. | |||
5100 | int disp = vector64_count * 64; | |||
5101 | if (cnt) { | |||
5102 | switch (cnt) { | |||
5103 | case 1: | |||
5104 | movq(Address(base, disp), xtmp); | |||
5105 | break; | |||
5106 | case 2: | |||
5107 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_128bit); | |||
5108 | break; | |||
5109 | case 3: | |||
5110 | movl(rtmp, 0x7); | |||
5111 | kmovwl(mask, rtmp); | |||
5112 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit); | |||
5113 | break; | |||
5114 | case 4: | |||
5115 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); | |||
5116 | break; | |||
5117 | case 5: | |||
5118 | if (use64byteVector) { | |||
5119 | movl(rtmp, 0x1F); | |||
5120 | kmovwl(mask, rtmp); | |||
5121 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); | |||
5122 | } else { | |||
5123 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); | |||
5124 | movq(Address(base, disp + 32), xtmp); | |||
5125 | } | |||
5126 | break; | |||
5127 | case 6: | |||
5128 | if (use64byteVector) { | |||
5129 | movl(rtmp, 0x3F); | |||
5130 | kmovwl(mask, rtmp); | |||
5131 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); | |||
5132 | } else { | |||
5133 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); | |||
5134 | evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit); | |||
5135 | } | |||
5136 | break; | |||
5137 | case 7: | |||
5138 | if (use64byteVector) { | |||
5139 | movl(rtmp, 0x7F); | |||
5140 | kmovwl(mask, rtmp); | |||
5141 | evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit); | |||
5142 | } else { | |||
5143 | evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit); | |||
5144 | movl(rtmp, 0x7); | |||
5145 | kmovwl(mask, rtmp); | |||
5146 | evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit); | |||
5147 | } | |||
5148 | break; | |||
5149 | default: | |||
5150 | fatal("Unexpected length : %d\n",cnt)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5150, "Unexpected length : %d\n",cnt); ::breakpoint(); } while (0); | |||
5151 | break; | |||
5152 | } | |||
5153 | } | |||
5154 | } | |||
5155 | ||||
5156 | void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, | |||
5157 | bool is_large, KRegister mask) { | |||
5158 | // cnt - number of qwords (8-byte words). | |||
5159 | // base - start address, qword aligned. | |||
5160 | // is_large - if optimizers know cnt is larger than InitArrayShortSize | |||
5161 | assert(base==rdi, "base register must be edi for rep stos")do { if (!(base==rdi)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5161, "assert(" "base==rdi" ") failed", "base register must be edi for rep stos" ); ::breakpoint(); } } while (0); | |||
5162 | assert(tmp==rax, "tmp register must be eax for rep stos")do { if (!(tmp==rax)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5162, "assert(" "tmp==rax" ") failed", "tmp register must be eax for rep stos" ); ::breakpoint(); } } while (0); | |||
5163 | assert(cnt==rcx, "cnt register must be ecx for rep stos")do { if (!(cnt==rcx)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5163, "assert(" "cnt==rcx" ") failed", "cnt register must be ecx for rep stos" ); ::breakpoint(); } } while (0); | |||
5164 | assert(InitArrayShortSize % BytesPerLong == 0,do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed" , "InitArrayShortSize should be the multiple of BytesPerLong" ); ::breakpoint(); } } while (0) | |||
5165 | "InitArrayShortSize should be the multiple of BytesPerLong")do { if (!(InitArrayShortSize % BytesPerLong == 0)) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5165, "assert(" "InitArrayShortSize % BytesPerLong == 0" ") failed" , "InitArrayShortSize should be the multiple of BytesPerLong" ); ::breakpoint(); } } while (0); | |||
5166 | ||||
5167 | Label DONE; | |||
5168 | if (!is_large || !UseXMMForObjInit) { | |||
5169 | xorptr(tmp, tmp); | |||
5170 | } | |||
5171 | ||||
5172 | if (!is_large) { | |||
5173 | Label LOOP, LONG; | |||
5174 | cmpptr(cnt, InitArrayShortSize/BytesPerLong); | |||
5175 | jccb(Assembler::greater, LONG)jccb_0(Assembler::greater, LONG, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5175); | |||
5176 | ||||
5177 | NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM | |||
5178 | ||||
5179 | decrement(cnt); | |||
5180 | jccb(Assembler::negative, DONE)jccb_0(Assembler::negative, DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5180); // Zero length | |||
5181 | ||||
5182 | // Use individual pointer-sized stores for small counts: | |||
5183 | BIND(LOOP); | |||
5184 | movptr(Address(base, cnt, Address::times_ptr), tmp); | |||
5185 | decrement(cnt); | |||
5186 | jccb(Assembler::greaterEqual, LOOP)jccb_0(Assembler::greaterEqual, LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5186); | |||
5187 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5187); | |||
5188 | ||||
5189 | BIND(LONG); | |||
5190 | } | |||
5191 | ||||
5192 | // Use longer rep-prefixed ops for non-small counts: | |||
5193 | if (UseFastStosb) { | |||
5194 | shlptr(cnt, 3); // convert to number of bytes | |||
5195 | rep_stosb(); | |||
5196 | } else if (UseXMMForObjInit) { | |||
5197 | xmm_clear_mem(base, cnt, tmp, xtmp, mask); | |||
5198 | } else { | |||
5199 | NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM | |||
5200 | rep_stos(); | |||
5201 | } | |||
5202 | ||||
5203 | BIND(DONE); | |||
5204 | } | |||
5205 | ||||
5206 | #endif //COMPILER2_OR_JVMCI | |||
5207 | ||||
5208 | ||||
5209 | void MacroAssembler::generate_fill(BasicType t, bool aligned, | |||
5210 | Register to, Register value, Register count, | |||
5211 | Register rtmp, XMMRegister xtmp) { | |||
5212 | ShortBranchVerifier sbv(this); | |||
5213 | assert_different_registers(to, value, count, rtmp); | |||
5214 | Label L_exit; | |||
5215 | Label L_fill_2_bytes, L_fill_4_bytes; | |||
5216 | ||||
5217 | #if defined(COMPILER21) && defined(_LP641) | |||
5218 | if(MaxVectorSize >=32 && | |||
5219 | VM_Version::supports_avx512vlbw() && | |||
5220 | VM_Version::supports_bmi2()) { | |||
5221 | generate_fill_avx3(t, to, value, count, rtmp, xtmp); | |||
5222 | return; | |||
5223 | } | |||
5224 | #endif | |||
5225 | ||||
5226 | int shift = -1; | |||
5227 | switch (t) { | |||
5228 | case T_BYTE: | |||
5229 | shift = 2; | |||
5230 | break; | |||
5231 | case T_SHORT: | |||
5232 | shift = 1; | |||
5233 | break; | |||
5234 | case T_INT: | |||
5235 | shift = 0; | |||
5236 | break; | |||
5237 | default: ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5237); ::breakpoint(); } while (0); | |||
5238 | } | |||
5239 | ||||
5240 | if (t == T_BYTE) { | |||
5241 | andl(value, 0xff); | |||
5242 | movl(rtmp, value); | |||
5243 | shll(rtmp, 8); | |||
5244 | orl(value, rtmp); | |||
5245 | } | |||
5246 | if (t == T_SHORT) { | |||
5247 | andl(value, 0xffff); | |||
5248 | } | |||
5249 | if (t == T_BYTE || t == T_SHORT) { | |||
5250 | movl(rtmp, value); | |||
5251 | shll(rtmp, 16); | |||
5252 | orl(value, rtmp); | |||
5253 | } | |||
5254 | ||||
5255 | cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element | |||
5256 | jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp | |||
5257 | if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { | |||
5258 | Label L_skip_align2; | |||
5259 | // align source address at 4 bytes address boundary | |||
5260 | if (t == T_BYTE) { | |||
5261 | Label L_skip_align1; | |||
5262 | // One byte misalignment happens only for byte arrays | |||
5263 | testptr(to, 1); | |||
5264 | jccb(Assembler::zero, L_skip_align1)jccb_0(Assembler::zero, L_skip_align1, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5264); | |||
5265 | movb(Address(to, 0), value); | |||
5266 | increment(to); | |||
5267 | decrement(count); | |||
5268 | BIND(L_skip_align1); | |||
5269 | } | |||
5270 | // Two bytes misalignment happens only for byte and short (char) arrays | |||
5271 | testptr(to, 2); | |||
5272 | jccb(Assembler::zero, L_skip_align2)jccb_0(Assembler::zero, L_skip_align2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5272); | |||
5273 | movw(Address(to, 0), value); | |||
5274 | addptr(to, 2); | |||
5275 | subl(count, 1<<(shift-1)); | |||
5276 | BIND(L_skip_align2); | |||
5277 | } | |||
5278 | if (UseSSE < 2) { | |||
5279 | Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |||
5280 | // Fill 32-byte chunks | |||
5281 | subl(count, 8 << shift); | |||
5282 | jcc(Assembler::less, L_check_fill_8_bytes); | |||
5283 | align(16); | |||
5284 | ||||
5285 | BIND(L_fill_32_bytes_loop); | |||
5286 | ||||
5287 | for (int i = 0; i < 32; i += 4) { | |||
5288 | movl(Address(to, i), value); | |||
5289 | } | |||
5290 | ||||
5291 | addptr(to, 32); | |||
5292 | subl(count, 8 << shift); | |||
5293 | jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |||
5294 | BIND(L_check_fill_8_bytes); | |||
5295 | addl(count, 8 << shift); | |||
5296 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5296); | |||
5297 | jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5297); | |||
5298 | ||||
5299 | // | |||
5300 | // length is too short, just fill qwords | |||
5301 | // | |||
5302 | BIND(L_fill_8_bytes_loop); | |||
5303 | movl(Address(to, 0), value); | |||
5304 | movl(Address(to, 4), value); | |||
5305 | addptr(to, 8); | |||
5306 | BIND(L_fill_8_bytes); | |||
5307 | subl(count, 1 << (shift + 1)); | |||
5308 | jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |||
5309 | // fall through to fill 4 bytes | |||
5310 | } else { | |||
5311 | Label L_fill_32_bytes; | |||
5312 | if (!UseUnalignedLoadStores) { | |||
5313 | // align to 8 bytes, we know we are 4 byte aligned to start | |||
5314 | testptr(to, 4); | |||
5315 | jccb(Assembler::zero, L_fill_32_bytes)jccb_0(Assembler::zero, L_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5315); | |||
5316 | movl(Address(to, 0), value); | |||
5317 | addptr(to, 4); | |||
5318 | subl(count, 1<<shift); | |||
5319 | } | |||
5320 | BIND(L_fill_32_bytes); | |||
5321 | { | |||
5322 | assert( UseSSE >= 2, "supported cpu only" )do { if (!(UseSSE >= 2)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5322, "assert(" "UseSSE >= 2" ") failed", "supported cpu only" ); ::breakpoint(); } } while (0); | |||
5323 | Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; | |||
5324 | movdl(xtmp, value); | |||
5325 | if (UseAVX >= 2 && UseUnalignedLoadStores) { | |||
5326 | Label L_check_fill_32_bytes; | |||
5327 | if (UseAVX > 2) { | |||
5328 | // Fill 64-byte chunks | |||
5329 | Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2; | |||
5330 | ||||
5331 | // If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2 | |||
5332 | cmpl(count, VM_Version::avx3_threshold()); | |||
5333 | jccb(Assembler::below, L_check_fill_64_bytes_avx2)jccb_0(Assembler::below, L_check_fill_64_bytes_avx2, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5333); | |||
5334 | ||||
5335 | vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit); | |||
5336 | ||||
5337 | subl(count, 16 << shift); | |||
5338 | jccb(Assembler::less, L_check_fill_32_bytes)jccb_0(Assembler::less, L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5338); | |||
5339 | align(16); | |||
5340 | ||||
5341 | BIND(L_fill_64_bytes_loop_avx3); | |||
5342 | evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit); | |||
5343 | addptr(to, 64); | |||
5344 | subl(count, 16 << shift); | |||
5345 | jcc(Assembler::greaterEqual, L_fill_64_bytes_loop_avx3); | |||
5346 | jmpb(L_check_fill_32_bytes)jmpb_0(L_check_fill_32_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5346); | |||
5347 | ||||
5348 | BIND(L_check_fill_64_bytes_avx2); | |||
5349 | } | |||
5350 | // Fill 64-byte chunks | |||
5351 | Label L_fill_64_bytes_loop; | |||
5352 | vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit); | |||
5353 | ||||
5354 | subl(count, 16 << shift); | |||
5355 | jcc(Assembler::less, L_check_fill_32_bytes); | |||
5356 | align(16); | |||
5357 | ||||
5358 | BIND(L_fill_64_bytes_loop); | |||
5359 | vmovdqu(Address(to, 0), xtmp); | |||
5360 | vmovdqu(Address(to, 32), xtmp); | |||
5361 | addptr(to, 64); | |||
5362 | subl(count, 16 << shift); | |||
5363 | jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); | |||
5364 | ||||
5365 | BIND(L_check_fill_32_bytes); | |||
5366 | addl(count, 8 << shift); | |||
5367 | jccb(Assembler::less, L_check_fill_8_bytes)jccb_0(Assembler::less, L_check_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5367); | |||
5368 | vmovdqu(Address(to, 0), xtmp); | |||
5369 | addptr(to, 32); | |||
5370 | subl(count, 8 << shift); | |||
5371 | ||||
5372 | BIND(L_check_fill_8_bytes); | |||
5373 | // clean upper bits of YMM registers | |||
5374 | movdl(xtmp, value); | |||
5375 | pshufd(xtmp, xtmp, 0); | |||
5376 | } else { | |||
5377 | // Fill 32-byte chunks | |||
5378 | pshufd(xtmp, xtmp, 0); | |||
5379 | ||||
5380 | subl(count, 8 << shift); | |||
5381 | jcc(Assembler::less, L_check_fill_8_bytes); | |||
5382 | align(16); | |||
5383 | ||||
5384 | BIND(L_fill_32_bytes_loop); | |||
5385 | ||||
5386 | if (UseUnalignedLoadStores) { | |||
5387 | movdqu(Address(to, 0), xtmp); | |||
5388 | movdqu(Address(to, 16), xtmp); | |||
5389 | } else { | |||
5390 | movq(Address(to, 0), xtmp); | |||
5391 | movq(Address(to, 8), xtmp); | |||
5392 | movq(Address(to, 16), xtmp); | |||
5393 | movq(Address(to, 24), xtmp); | |||
5394 | } | |||
5395 | ||||
5396 | addptr(to, 32); | |||
5397 | subl(count, 8 << shift); | |||
5398 | jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); | |||
5399 | ||||
5400 | BIND(L_check_fill_8_bytes); | |||
5401 | } | |||
5402 | addl(count, 8 << shift); | |||
5403 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5403); | |||
5404 | jmpb(L_fill_8_bytes)jmpb_0(L_fill_8_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5404); | |||
5405 | ||||
5406 | // | |||
5407 | // length is too short, just fill qwords | |||
5408 | // | |||
5409 | BIND(L_fill_8_bytes_loop); | |||
5410 | movq(Address(to, 0), xtmp); | |||
5411 | addptr(to, 8); | |||
5412 | BIND(L_fill_8_bytes); | |||
5413 | subl(count, 1 << (shift + 1)); | |||
5414 | jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); | |||
5415 | } | |||
5416 | } | |||
5417 | // fill trailing 4 bytes | |||
5418 | BIND(L_fill_4_bytes); | |||
5419 | testl(count, 1<<shift); | |||
5420 | jccb(Assembler::zero, L_fill_2_bytes)jccb_0(Assembler::zero, L_fill_2_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5420); | |||
5421 | movl(Address(to, 0), value); | |||
5422 | if (t == T_BYTE || t == T_SHORT) { | |||
5423 | Label L_fill_byte; | |||
5424 | addptr(to, 4); | |||
5425 | BIND(L_fill_2_bytes); | |||
5426 | // fill trailing 2 bytes | |||
5427 | testl(count, 1<<(shift-1)); | |||
5428 | jccb(Assembler::zero, L_fill_byte)jccb_0(Assembler::zero, L_fill_byte, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5428); | |||
5429 | movw(Address(to, 0), value); | |||
5430 | if (t == T_BYTE) { | |||
5431 | addptr(to, 2); | |||
5432 | BIND(L_fill_byte); | |||
5433 | // fill trailing byte | |||
5434 | testl(count, 1); | |||
5435 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5435); | |||
5436 | movb(Address(to, 0), value); | |||
5437 | } else { | |||
5438 | BIND(L_fill_byte); | |||
5439 | } | |||
5440 | } else { | |||
5441 | BIND(L_fill_2_bytes); | |||
5442 | } | |||
5443 | BIND(L_exit); | |||
5444 | } | |||
5445 | ||||
5446 | void MacroAssembler::evpbroadcast(BasicType type, XMMRegister dst, Register src, int vector_len) { | |||
5447 | switch(type) { | |||
5448 | case T_BYTE: | |||
5449 | case T_BOOLEAN: | |||
5450 | evpbroadcastb(dst, src, vector_len); | |||
5451 | break; | |||
5452 | case T_SHORT: | |||
5453 | case T_CHAR: | |||
5454 | evpbroadcastw(dst, src, vector_len); | |||
5455 | break; | |||
5456 | case T_INT: | |||
5457 | case T_FLOAT: | |||
5458 | evpbroadcastd(dst, src, vector_len); | |||
5459 | break; | |||
5460 | case T_LONG: | |||
5461 | case T_DOUBLE: | |||
5462 | evpbroadcastq(dst, src, vector_len); | |||
5463 | break; | |||
5464 | default: | |||
5465 | fatal("Unhandled type : %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5465, "Unhandled type : %s", type2name(type)); ::breakpoint (); } while (0); | |||
5466 | break; | |||
5467 | } | |||
5468 | } | |||
5469 | ||||
5470 | // encode char[] to byte[] in ISO_8859_1 or ASCII | |||
5471 | //@IntrinsicCandidate | |||
5472 | //private static int implEncodeISOArray(byte[] sa, int sp, | |||
5473 | //byte[] da, int dp, int len) { | |||
5474 | // int i = 0; | |||
5475 | // for (; i < len; i++) { | |||
5476 | // char c = StringUTF16.getChar(sa, sp++); | |||
5477 | // if (c > '\u00FF') | |||
5478 | // break; | |||
5479 | // da[dp++] = (byte)c; | |||
5480 | // } | |||
5481 | // return i; | |||
5482 | //} | |||
5483 | // | |||
5484 | //@IntrinsicCandidate | |||
5485 | //private static int implEncodeAsciiArray(char[] sa, int sp, | |||
5486 | // byte[] da, int dp, int len) { | |||
5487 | // int i = 0; | |||
5488 | // for (; i < len; i++) { | |||
5489 | // char c = sa[sp++]; | |||
5490 | // if (c >= '\u0080') | |||
5491 | // break; | |||
5492 | // da[dp++] = (byte)c; | |||
5493 | // } | |||
5494 | // return i; | |||
5495 | //} | |||
5496 | void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, | |||
5497 | XMMRegister tmp1Reg, XMMRegister tmp2Reg, | |||
5498 | XMMRegister tmp3Reg, XMMRegister tmp4Reg, | |||
5499 | Register tmp5, Register result, bool ascii) { | |||
5500 | ||||
5501 | // rsi: src | |||
5502 | // rdi: dst | |||
5503 | // rdx: len | |||
5504 | // rcx: tmp5 | |||
5505 | // rax: result | |||
5506 | ShortBranchVerifier sbv(this); | |||
5507 | assert_different_registers(src, dst, len, tmp5, result); | |||
5508 | Label L_done, L_copy_1_char, L_copy_1_char_exit; | |||
5509 | ||||
5510 | int mask = ascii ? 0xff80ff80 : 0xff00ff00; | |||
5511 | int short_mask = ascii ? 0xff80 : 0xff00; | |||
5512 | ||||
5513 | // set result | |||
5514 | xorl(result, result); | |||
5515 | // check for zero length | |||
5516 | testl(len, len); | |||
5517 | jcc(Assembler::zero, L_done); | |||
5518 | ||||
5519 | movl(result, len); | |||
5520 | ||||
5521 | // Setup pointers | |||
5522 | lea(src, Address(src, len, Address::times_2)); // char[] | |||
5523 | lea(dst, Address(dst, len, Address::times_1)); // byte[] | |||
5524 | negptr(len); | |||
5525 | ||||
5526 | if (UseSSE42Intrinsics || UseAVX >= 2) { | |||
5527 | Label L_copy_8_chars, L_copy_8_chars_exit; | |||
5528 | Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; | |||
5529 | ||||
5530 | if (UseAVX >= 2) { | |||
5531 | Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; | |||
5532 | movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector | |||
5533 | movdl(tmp1Reg, tmp5); | |||
5534 | vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit); | |||
5535 | jmp(L_chars_32_check); | |||
5536 | ||||
5537 | bind(L_copy_32_chars); | |||
5538 | vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); | |||
5539 | vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); | |||
5540 | vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); | |||
5541 | vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector | |||
5542 | jccb(Assembler::notZero, L_copy_32_chars_exit)jccb_0(Assembler::notZero, L_copy_32_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5542); | |||
5543 | vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); | |||
5544 | vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1); | |||
5545 | vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); | |||
5546 | ||||
5547 | bind(L_chars_32_check); | |||
5548 | addptr(len, 32); | |||
5549 | jcc(Assembler::lessEqual, L_copy_32_chars); | |||
5550 | ||||
5551 | bind(L_copy_32_chars_exit); | |||
5552 | subptr(len, 16); | |||
5553 | jccb(Assembler::greater, L_copy_16_chars_exit)jccb_0(Assembler::greater, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5553); | |||
5554 | ||||
5555 | } else if (UseSSE42Intrinsics) { | |||
5556 | movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector | |||
5557 | movdl(tmp1Reg, tmp5); | |||
5558 | pshufd(tmp1Reg, tmp1Reg, 0); | |||
5559 | jmpb(L_chars_16_check)jmpb_0(L_chars_16_check, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5559); | |||
5560 | } | |||
5561 | ||||
5562 | bind(L_copy_16_chars); | |||
5563 | if (UseAVX >= 2) { | |||
5564 | vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); | |||
5565 | vptest(tmp2Reg, tmp1Reg); | |||
5566 | jcc(Assembler::notZero, L_copy_16_chars_exit); | |||
5567 | vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector_len */ 1); | |||
5568 | vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector_len */ 1); | |||
5569 | } else { | |||
5570 | if (UseAVX > 0) { | |||
5571 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); | |||
5572 | movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); | |||
5573 | vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 0); | |||
5574 | } else { | |||
5575 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); | |||
5576 | por(tmp2Reg, tmp3Reg); | |||
5577 | movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); | |||
5578 | por(tmp2Reg, tmp4Reg); | |||
5579 | } | |||
5580 | ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector | |||
5581 | jccb(Assembler::notZero, L_copy_16_chars_exit)jccb_0(Assembler::notZero, L_copy_16_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5581); | |||
5582 | packuswb(tmp3Reg, tmp4Reg); | |||
5583 | } | |||
5584 | movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); | |||
5585 | ||||
5586 | bind(L_chars_16_check); | |||
5587 | addptr(len, 16); | |||
5588 | jcc(Assembler::lessEqual, L_copy_16_chars); | |||
5589 | ||||
5590 | bind(L_copy_16_chars_exit); | |||
5591 | if (UseAVX >= 2) { | |||
5592 | // clean upper bits of YMM registers | |||
5593 | vpxor(tmp2Reg, tmp2Reg); | |||
5594 | vpxor(tmp3Reg, tmp3Reg); | |||
5595 | vpxor(tmp4Reg, tmp4Reg); | |||
5596 | movdl(tmp1Reg, tmp5); | |||
5597 | pshufd(tmp1Reg, tmp1Reg, 0); | |||
5598 | } | |||
5599 | subptr(len, 8); | |||
5600 | jccb(Assembler::greater, L_copy_8_chars_exit)jccb_0(Assembler::greater, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5600); | |||
5601 | ||||
5602 | bind(L_copy_8_chars); | |||
5603 | movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); | |||
5604 | ptest(tmp3Reg, tmp1Reg); | |||
5605 | jccb(Assembler::notZero, L_copy_8_chars_exit)jccb_0(Assembler::notZero, L_copy_8_chars_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5605); | |||
5606 | packuswb(tmp3Reg, tmp1Reg); | |||
5607 | movq(Address(dst, len, Address::times_1, -8), tmp3Reg); | |||
5608 | addptr(len, 8); | |||
5609 | jccb(Assembler::lessEqual, L_copy_8_chars)jccb_0(Assembler::lessEqual, L_copy_8_chars, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5609); | |||
5610 | ||||
5611 | bind(L_copy_8_chars_exit); | |||
5612 | subptr(len, 8); | |||
5613 | jccb(Assembler::zero, L_done)jccb_0(Assembler::zero, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5613); | |||
5614 | } | |||
5615 | ||||
5616 | bind(L_copy_1_char); | |||
5617 | load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); | |||
5618 | testl(tmp5, short_mask); // check if Unicode or non-ASCII char | |||
5619 | jccb(Assembler::notZero, L_copy_1_char_exit)jccb_0(Assembler::notZero, L_copy_1_char_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5619); | |||
5620 | movb(Address(dst, len, Address::times_1, 0), tmp5); | |||
5621 | addptr(len, 1); | |||
5622 | jccb(Assembler::less, L_copy_1_char)jccb_0(Assembler::less, L_copy_1_char, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5622); | |||
5623 | ||||
5624 | bind(L_copy_1_char_exit); | |||
5625 | addptr(result, len); // len is negative count of not processed elements | |||
5626 | ||||
5627 | bind(L_done); | |||
5628 | } | |||
5629 | ||||
5630 | #ifdef _LP641 | |||
5631 | /** | |||
5632 | * Helper for multiply_to_len(). | |||
5633 | */ | |||
5634 | void MacroAssembler::add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { | |||
5635 | addq(dest_lo, src1); | |||
5636 | adcq(dest_hi, 0); | |||
5637 | addq(dest_lo, src2); | |||
5638 | adcq(dest_hi, 0); | |||
5639 | } | |||
5640 | ||||
5641 | /** | |||
5642 | * Multiply 64 bit by 64 bit first loop. | |||
5643 | */ | |||
5644 | void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, | |||
5645 | Register y, Register y_idx, Register z, | |||
5646 | Register carry, Register product, | |||
5647 | Register idx, Register kdx) { | |||
5648 | // | |||
5649 | // jlong carry, x[], y[], z[]; | |||
5650 | // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { | |||
5651 | // huge_128 product = y[idx] * x[xstart] + carry; | |||
5652 | // z[kdx] = (jlong)product; | |||
5653 | // carry = (jlong)(product >>> 64); | |||
5654 | // } | |||
5655 | // z[xstart] = carry; | |||
5656 | // | |||
5657 | ||||
5658 | Label L_first_loop, L_first_loop_exit; | |||
5659 | Label L_one_x, L_one_y, L_multiply; | |||
5660 | ||||
5661 | decrementl(xstart); | |||
5662 | jcc(Assembler::negative, L_one_x); | |||
5663 | ||||
5664 | movq(x_xstart, Address(x, xstart, Address::times_4, 0)); | |||
5665 | rorq(x_xstart, 32); // convert big-endian to little-endian | |||
5666 | ||||
5667 | bind(L_first_loop); | |||
5668 | decrementl(idx); | |||
5669 | jcc(Assembler::negative, L_first_loop_exit); | |||
5670 | decrementl(idx); | |||
5671 | jcc(Assembler::negative, L_one_y); | |||
5672 | movq(y_idx, Address(y, idx, Address::times_4, 0)); | |||
5673 | rorq(y_idx, 32); // convert big-endian to little-endian | |||
5674 | bind(L_multiply); | |||
5675 | movq(product, x_xstart); | |||
5676 | mulq(y_idx); // product(rax) * y_idx -> rdx:rax | |||
5677 | addq(product, carry); | |||
5678 | adcq(rdx, 0); | |||
5679 | subl(kdx, 2); | |||
5680 | movl(Address(z, kdx, Address::times_4, 4), product); | |||
5681 | shrq(product, 32); | |||
5682 | movl(Address(z, kdx, Address::times_4, 0), product); | |||
5683 | movq(carry, rdx); | |||
5684 | jmp(L_first_loop); | |||
5685 | ||||
5686 | bind(L_one_y); | |||
5687 | movl(y_idx, Address(y, 0)); | |||
5688 | jmp(L_multiply); | |||
5689 | ||||
5690 | bind(L_one_x); | |||
5691 | movl(x_xstart, Address(x, 0)); | |||
5692 | jmp(L_first_loop); | |||
5693 | ||||
5694 | bind(L_first_loop_exit); | |||
5695 | } | |||
5696 | ||||
5697 | /** | |||
5698 | * Multiply 64 bit by 64 bit and add 128 bit. | |||
5699 | */ | |||
5700 | void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, Register z, | |||
5701 | Register yz_idx, Register idx, | |||
5702 | Register carry, Register product, int offset) { | |||
5703 | // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; | |||
5704 | // z[kdx] = (jlong)product; | |||
5705 | ||||
5706 | movq(yz_idx, Address(y, idx, Address::times_4, offset)); | |||
5707 | rorq(yz_idx, 32); // convert big-endian to little-endian | |||
5708 | movq(product, x_xstart); | |||
5709 | mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) | |||
5710 | movq(yz_idx, Address(z, idx, Address::times_4, offset)); | |||
5711 | rorq(yz_idx, 32); // convert big-endian to little-endian | |||
5712 | ||||
5713 | add2_with_carry(rdx, product, carry, yz_idx); | |||
5714 | ||||
5715 | movl(Address(z, idx, Address::times_4, offset+4), product); | |||
5716 | shrq(product, 32); | |||
5717 | movl(Address(z, idx, Address::times_4, offset), product); | |||
5718 | ||||
5719 | } | |||
5720 | ||||
5721 | /** | |||
5722 | * Multiply 128 bit by 128 bit. Unrolled inner loop. | |||
5723 | */ | |||
5724 | void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, Register y, Register z, | |||
5725 | Register yz_idx, Register idx, Register jdx, | |||
5726 | Register carry, Register product, | |||
5727 | Register carry2) { | |||
5728 | // jlong carry, x[], y[], z[]; | |||
5729 | // int kdx = ystart+1; | |||
5730 | // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop | |||
5731 | // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; | |||
5732 | // z[kdx+idx+1] = (jlong)product; | |||
5733 | // jlong carry2 = (jlong)(product >>> 64); | |||
5734 | // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; | |||
5735 | // z[kdx+idx] = (jlong)product; | |||
5736 | // carry = (jlong)(product >>> 64); | |||
5737 | // } | |||
5738 | // idx += 2; | |||
5739 | // if (idx > 0) { | |||
5740 | // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; | |||
5741 | // z[kdx+idx] = (jlong)product; | |||
5742 | // carry = (jlong)(product >>> 64); | |||
5743 | // } | |||
5744 | // | |||
5745 | ||||
5746 | Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; | |||
5747 | ||||
5748 | movl(jdx, idx); | |||
5749 | andl(jdx, 0xFFFFFFFC); | |||
5750 | shrl(jdx, 2); | |||
5751 | ||||
5752 | bind(L_third_loop); | |||
5753 | subl(jdx, 1); | |||
5754 | jcc(Assembler::negative, L_third_loop_exit); | |||
5755 | subl(idx, 4); | |||
5756 | ||||
5757 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); | |||
5758 | movq(carry2, rdx); | |||
5759 | ||||
5760 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); | |||
5761 | movq(carry, rdx); | |||
5762 | jmp(L_third_loop); | |||
5763 | ||||
5764 | bind (L_third_loop_exit); | |||
5765 | ||||
5766 | andl (idx, 0x3); | |||
5767 | jcc(Assembler::zero, L_post_third_loop_done); | |||
5768 | ||||
5769 | Label L_check_1; | |||
5770 | subl(idx, 2); | |||
5771 | jcc(Assembler::negative, L_check_1); | |||
5772 | ||||
5773 | multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); | |||
5774 | movq(carry, rdx); | |||
5775 | ||||
5776 | bind (L_check_1); | |||
5777 | addl (idx, 0x2); | |||
5778 | andl (idx, 0x1); | |||
5779 | subl(idx, 1); | |||
5780 | jcc(Assembler::negative, L_post_third_loop_done); | |||
5781 | ||||
5782 | movl(yz_idx, Address(y, idx, Address::times_4, 0)); | |||
5783 | movq(product, x_xstart); | |||
5784 | mulq(yz_idx); // product(rax) * yz_idx -> rdx:product(rax) | |||
5785 | movl(yz_idx, Address(z, idx, Address::times_4, 0)); | |||
5786 | ||||
5787 | add2_with_carry(rdx, product, yz_idx, carry); | |||
5788 | ||||
5789 | movl(Address(z, idx, Address::times_4, 0), product); | |||
5790 | shrq(product, 32); | |||
5791 | ||||
5792 | shlq(rdx, 32); | |||
5793 | orq(product, rdx); | |||
5794 | movq(carry, product); | |||
5795 | ||||
5796 | bind(L_post_third_loop_done); | |||
5797 | } | |||
5798 | ||||
5799 | /** | |||
5800 | * Multiply 128 bit by 128 bit using BMI2. Unrolled inner loop. | |||
5801 | * | |||
5802 | */ | |||
5803 | void MacroAssembler::multiply_128_x_128_bmi2_loop(Register y, Register z, | |||
5804 | Register carry, Register carry2, | |||
5805 | Register idx, Register jdx, | |||
5806 | Register yz_idx1, Register yz_idx2, | |||
5807 | Register tmp, Register tmp3, Register tmp4) { | |||
5808 | assert(UseBMI2Instructions, "should be used only when BMI2 is available")do { if (!(UseBMI2Instructions)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 5808, "assert(" "UseBMI2Instructions" ") failed", "should be used only when BMI2 is available" ); ::breakpoint(); } } while (0); | |||
5809 | ||||
5810 | // jlong carry, x[], y[], z[]; | |||
5811 | // int kdx = ystart+1; | |||
5812 | // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop | |||
5813 | // huge_128 tmp3 = (y[idx+1] * rdx) + z[kdx+idx+1] + carry; | |||
5814 | // jlong carry2 = (jlong)(tmp3 >>> 64); | |||
5815 | // huge_128 tmp4 = (y[idx] * rdx) + z[kdx+idx] + carry2; | |||
5816 | // carry = (jlong)(tmp4 >>> 64); | |||
5817 | // z[kdx+idx+1] = (jlong)tmp3; | |||
5818 | // z[kdx+idx] = (jlong)tmp4; | |||
5819 | // } | |||
5820 | // idx += 2; | |||
5821 | // if (idx > 0) { | |||
5822 | // yz_idx1 = (y[idx] * rdx) + z[kdx+idx] + carry; | |||
5823 | // z[kdx+idx] = (jlong)yz_idx1; | |||
5824 | // carry = (jlong)(yz_idx1 >>> 64); | |||
5825 | // } | |||
5826 | // | |||
5827 | ||||
5828 | Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; | |||
5829 | ||||
5830 | movl(jdx, idx); | |||
5831 | andl(jdx, 0xFFFFFFFC); | |||
5832 | shrl(jdx, 2); | |||
5833 | ||||
5834 | bind(L_third_loop); | |||
5835 | subl(jdx, 1); | |||
5836 | jcc(Assembler::negative, L_third_loop_exit); | |||
5837 | subl(idx, 4); | |||
5838 | ||||
5839 | movq(yz_idx1, Address(y, idx, Address::times_4, 8)); | |||
5840 | rorxq(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian | |||
5841 | movq(yz_idx2, Address(y, idx, Address::times_4, 0)); | |||
5842 | rorxq(yz_idx2, yz_idx2, 32); | |||
5843 | ||||
5844 | mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 | |||
5845 | mulxq(carry2, tmp, yz_idx2); // yz_idx2 * rdx -> carry2:tmp | |||
5846 | ||||
5847 | movq(yz_idx1, Address(z, idx, Address::times_4, 8)); | |||
5848 | rorxq(yz_idx1, yz_idx1, 32); | |||
5849 | movq(yz_idx2, Address(z, idx, Address::times_4, 0)); | |||
5850 | rorxq(yz_idx2, yz_idx2, 32); | |||
5851 | ||||
5852 | if (VM_Version::supports_adx()) { | |||
5853 | adcxq(tmp3, carry); | |||
5854 | adoxq(tmp3, yz_idx1); | |||
5855 | ||||
5856 | adcxq(tmp4, tmp); | |||
5857 | adoxq(tmp4, yz_idx2); | |||
5858 | ||||
5859 | movl(carry, 0); // does not affect flags | |||
5860 | adcxq(carry2, carry); | |||
5861 | adoxq(carry2, carry); | |||
5862 | } else { | |||
5863 | add2_with_carry(tmp4, tmp3, carry, yz_idx1); | |||
5864 | add2_with_carry(carry2, tmp4, tmp, yz_idx2); | |||
5865 | } | |||
5866 | movq(carry, carry2); | |||
5867 | ||||
5868 | movl(Address(z, idx, Address::times_4, 12), tmp3); | |||
5869 | shrq(tmp3, 32); | |||
5870 | movl(Address(z, idx, Address::times_4, 8), tmp3); | |||
5871 | ||||
5872 | movl(Address(z, idx, Address::times_4, 4), tmp4); | |||
5873 | shrq(tmp4, 32); | |||
5874 | movl(Address(z, idx, Address::times_4, 0), tmp4); | |||
5875 | ||||
5876 | jmp(L_third_loop); | |||
5877 | ||||
5878 | bind (L_third_loop_exit); | |||
5879 | ||||
5880 | andl (idx, 0x3); | |||
5881 | jcc(Assembler::zero, L_post_third_loop_done); | |||
5882 | ||||
5883 | Label L_check_1; | |||
5884 | subl(idx, 2); | |||
5885 | jcc(Assembler::negative, L_check_1); | |||
5886 | ||||
5887 | movq(yz_idx1, Address(y, idx, Address::times_4, 0)); | |||
5888 | rorxq(yz_idx1, yz_idx1, 32); | |||
5889 | mulxq(tmp4, tmp3, yz_idx1); // yz_idx1 * rdx -> tmp4:tmp3 | |||
5890 | movq(yz_idx2, Address(z, idx, Address::times_4, 0)); | |||
5891 | rorxq(yz_idx2, yz_idx2, 32); | |||
5892 | ||||
5893 | add2_with_carry(tmp4, tmp3, carry, yz_idx2); | |||
5894 | ||||
5895 | movl(Address(z, idx, Address::times_4, 4), tmp3); | |||
5896 | shrq(tmp3, 32); | |||
5897 | movl(Address(z, idx, Address::times_4, 0), tmp3); | |||
5898 | movq(carry, tmp4); | |||
5899 | ||||
5900 | bind (L_check_1); | |||
5901 | addl (idx, 0x2); | |||
5902 | andl (idx, 0x1); | |||
5903 | subl(idx, 1); | |||
5904 | jcc(Assembler::negative, L_post_third_loop_done); | |||
5905 | movl(tmp4, Address(y, idx, Address::times_4, 0)); | |||
5906 | mulxq(carry2, tmp3, tmp4); // tmp4 * rdx -> carry2:tmp3 | |||
5907 | movl(tmp4, Address(z, idx, Address::times_4, 0)); | |||
5908 | ||||
5909 | add2_with_carry(carry2, tmp3, tmp4, carry); | |||
5910 | ||||
5911 | movl(Address(z, idx, Address::times_4, 0), tmp3); | |||
5912 | shrq(tmp3, 32); | |||
5913 | ||||
5914 | shlq(carry2, 32); | |||
5915 | orq(tmp3, carry2); | |||
5916 | movq(carry, tmp3); | |||
5917 | ||||
5918 | bind(L_post_third_loop_done); | |||
5919 | } | |||
5920 | ||||
5921 | /** | |||
5922 | * Code for BigInteger::multiplyToLen() instrinsic. | |||
5923 | * | |||
5924 | * rdi: x | |||
5925 | * rax: xlen | |||
5926 | * rsi: y | |||
5927 | * rcx: ylen | |||
5928 | * r8: z | |||
5929 | * r11: zlen | |||
5930 | * r12: tmp1 | |||
5931 | * r13: tmp2 | |||
5932 | * r14: tmp3 | |||
5933 | * r15: tmp4 | |||
5934 | * rbx: tmp5 | |||
5935 | * | |||
5936 | */ | |||
5937 | void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen, | |||
5938 | Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { | |||
5939 | ShortBranchVerifier sbv(this); | |||
5940 | assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx); | |||
5941 | ||||
5942 | push(tmp1); | |||
5943 | push(tmp2); | |||
5944 | push(tmp3); | |||
5945 | push(tmp4); | |||
5946 | push(tmp5); | |||
5947 | ||||
5948 | push(xlen); | |||
5949 | push(zlen); | |||
5950 | ||||
5951 | const Register idx = tmp1; | |||
5952 | const Register kdx = tmp2; | |||
5953 | const Register xstart = tmp3; | |||
5954 | ||||
5955 | const Register y_idx = tmp4; | |||
5956 | const Register carry = tmp5; | |||
5957 | const Register product = xlen; | |||
5958 | const Register x_xstart = zlen; // reuse register | |||
5959 | ||||
5960 | // First Loop. | |||
5961 | // | |||
5962 | // final static long LONG_MASK = 0xffffffffL; | |||
5963 | // int xstart = xlen - 1; | |||
5964 | // int ystart = ylen - 1; | |||
5965 | // long carry = 0; | |||
5966 | // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { | |||
5967 | // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; | |||
5968 | // z[kdx] = (int)product; | |||
5969 | // carry = product >>> 32; | |||
5970 | // } | |||
5971 | // z[xstart] = (int)carry; | |||
5972 | // | |||
5973 | ||||
5974 | movl(idx, ylen); // idx = ylen; | |||
5975 | movl(kdx, zlen); // kdx = xlen+ylen; | |||
5976 | xorq(carry, carry); // carry = 0; | |||
5977 | ||||
5978 | Label L_done; | |||
5979 | ||||
5980 | movl(xstart, xlen); | |||
5981 | decrementl(xstart); | |||
5982 | jcc(Assembler::negative, L_done); | |||
5983 | ||||
5984 | multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); | |||
5985 | ||||
5986 | Label L_second_loop; | |||
5987 | testl(kdx, kdx); | |||
5988 | jcc(Assembler::zero, L_second_loop); | |||
5989 | ||||
5990 | Label L_carry; | |||
5991 | subl(kdx, 1); | |||
5992 | jcc(Assembler::zero, L_carry); | |||
5993 | ||||
5994 | movl(Address(z, kdx, Address::times_4, 0), carry); | |||
5995 | shrq(carry, 32); | |||
5996 | subl(kdx, 1); | |||
5997 | ||||
5998 | bind(L_carry); | |||
5999 | movl(Address(z, kdx, Address::times_4, 0), carry); | |||
6000 | ||||
6001 | // Second and third (nested) loops. | |||
6002 | // | |||
6003 | // for (int i = xstart-1; i >= 0; i--) { // Second loop | |||
6004 | // carry = 0; | |||
6005 | // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop | |||
6006 | // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + | |||
6007 | // (z[k] & LONG_MASK) + carry; | |||
6008 | // z[k] = (int)product; | |||
6009 | // carry = product >>> 32; | |||
6010 | // } | |||
6011 | // z[i] = (int)carry; | |||
6012 | // } | |||
6013 | // | |||
6014 | // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx | |||
6015 | ||||
6016 | const Register jdx = tmp1; | |||
6017 | ||||
6018 | bind(L_second_loop); | |||
6019 | xorl(carry, carry); // carry = 0; | |||
6020 | movl(jdx, ylen); // j = ystart+1 | |||
6021 | ||||
6022 | subl(xstart, 1); // i = xstart-1; | |||
6023 | jcc(Assembler::negative, L_done); | |||
6024 | ||||
6025 | push (z); | |||
6026 | ||||
6027 | Label L_last_x; | |||
6028 | lea(z, Address(z, xstart, Address::times_4, 4)); // z = z + k - j | |||
6029 | subl(xstart, 1); // i = xstart-1; | |||
6030 | jcc(Assembler::negative, L_last_x); | |||
6031 | ||||
6032 | if (UseBMI2Instructions) { | |||
6033 | movq(rdx, Address(x, xstart, Address::times_4, 0)); | |||
6034 | rorxq(rdx, rdx, 32); // convert big-endian to little-endian | |||
6035 | } else { | |||
6036 | movq(x_xstart, Address(x, xstart, Address::times_4, 0)); | |||
6037 | rorq(x_xstart, 32); // convert big-endian to little-endian | |||
6038 | } | |||
6039 | ||||
6040 | Label L_third_loop_prologue; | |||
6041 | bind(L_third_loop_prologue); | |||
6042 | ||||
6043 | push (x); | |||
6044 | push (xstart); | |||
6045 | push (ylen); | |||
6046 | ||||
6047 | ||||
6048 | if (UseBMI2Instructions) { | |||
6049 | multiply_128_x_128_bmi2_loop(y, z, carry, x, jdx, ylen, product, tmp2, x_xstart, tmp3, tmp4); | |||
6050 | } else { // !UseBMI2Instructions | |||
6051 | multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); | |||
6052 | } | |||
6053 | ||||
6054 | pop(ylen); | |||
6055 | pop(xlen); | |||
6056 | pop(x); | |||
6057 | pop(z); | |||
6058 | ||||
6059 | movl(tmp3, xlen); | |||
6060 | addl(tmp3, 1); | |||
6061 | movl(Address(z, tmp3, Address::times_4, 0), carry); | |||
6062 | subl(tmp3, 1); | |||
6063 | jccb(Assembler::negative, L_done)jccb_0(Assembler::negative, L_done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6063); | |||
6064 | ||||
6065 | shrq(carry, 32); | |||
6066 | movl(Address(z, tmp3, Address::times_4, 0), carry); | |||
6067 | jmp(L_second_loop); | |||
6068 | ||||
6069 | // Next infrequent code is moved outside loops. | |||
6070 | bind(L_last_x); | |||
6071 | if (UseBMI2Instructions) { | |||
6072 | movl(rdx, Address(x, 0)); | |||
6073 | } else { | |||
6074 | movl(x_xstart, Address(x, 0)); | |||
6075 | } | |||
6076 | jmp(L_third_loop_prologue); | |||
6077 | ||||
6078 | bind(L_done); | |||
6079 | ||||
6080 | pop(zlen); | |||
6081 | pop(xlen); | |||
6082 | ||||
6083 | pop(tmp5); | |||
6084 | pop(tmp4); | |||
6085 | pop(tmp3); | |||
6086 | pop(tmp2); | |||
6087 | pop(tmp1); | |||
6088 | } | |||
6089 | ||||
6090 | void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, | |||
6091 | Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){ | |||
6092 | assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.")do { if (!(UseSSE42Intrinsics)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6092, "assert(" "UseSSE42Intrinsics" ") failed", "SSE4.2 must be enabled." ); ::breakpoint(); } } while (0); | |||
6093 | Label VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP; | |||
6094 | Label VECTOR8_TAIL, VECTOR4_TAIL; | |||
6095 | Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL; | |||
6096 | Label SAME_TILL_END, DONE; | |||
6097 | Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL; | |||
6098 | ||||
6099 | //scale is in rcx in both Win64 and Unix | |||
6100 | ShortBranchVerifier sbv(this); | |||
6101 | ||||
6102 | shlq(length); | |||
6103 | xorq(result, result); | |||
6104 | ||||
6105 | if ((AVX3Threshold == 0) && (UseAVX > 2) && | |||
6106 | VM_Version::supports_avx512vlbw()) { | |||
6107 | Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL; | |||
6108 | ||||
6109 | cmpq(length, 64); | |||
6110 | jcc(Assembler::less, VECTOR32_TAIL); | |||
6111 | ||||
6112 | movq(tmp1, length); | |||
6113 | andq(tmp1, 0x3F); // tail count | |||
6114 | andq(length, ~(0x3F)); //vector count | |||
6115 | ||||
6116 | bind(VECTOR64_LOOP); | |||
6117 | // AVX512 code to compare 64 byte vectors. | |||
6118 | evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); | |||
6119 | evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); | |||
6120 | kortestql(k7, k7); | |||
6121 | jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch | |||
6122 | addq(result, 64); | |||
6123 | subq(length, 64); | |||
6124 | jccb(Assembler::notZero, VECTOR64_LOOP)jccb_0(Assembler::notZero, VECTOR64_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6124); | |||
6125 | ||||
6126 | //bind(VECTOR64_TAIL); | |||
6127 | testq(tmp1, tmp1); | |||
6128 | jcc(Assembler::zero, SAME_TILL_END); | |||
6129 | ||||
6130 | //bind(VECTOR64_TAIL); | |||
6131 | // AVX512 code to compare upto 63 byte vectors. | |||
6132 | mov64(tmp2, 0xFFFFFFFFFFFFFFFF); | |||
6133 | shlxq(tmp2, tmp2, tmp1); | |||
6134 | notq(tmp2); | |||
6135 | kmovql(k3, tmp2); | |||
6136 | ||||
6137 | evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); | |||
6138 | evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); | |||
6139 | ||||
6140 | ktestql(k7, k3); | |||
6141 | jcc(Assembler::below, SAME_TILL_END); // not mismatch | |||
6142 | ||||
6143 | bind(VECTOR64_NOT_EQUAL); | |||
6144 | kmovql(tmp1, k7); | |||
6145 | notq(tmp1); | |||
6146 | tzcntq(tmp1, tmp1); | |||
6147 | addq(result, tmp1); | |||
6148 | shrq(result); | |||
6149 | jmp(DONE); | |||
6150 | bind(VECTOR32_TAIL); | |||
6151 | } | |||
6152 | ||||
6153 | cmpq(length, 8); | |||
6154 | jcc(Assembler::equal, VECTOR8_LOOP); | |||
6155 | jcc(Assembler::less, VECTOR4_TAIL); | |||
6156 | ||||
6157 | if (UseAVX >= 2) { | |||
6158 | Label VECTOR16_TAIL, VECTOR32_LOOP; | |||
6159 | ||||
6160 | cmpq(length, 16); | |||
6161 | jcc(Assembler::equal, VECTOR16_LOOP); | |||
6162 | jcc(Assembler::less, VECTOR8_LOOP); | |||
6163 | ||||
6164 | cmpq(length, 32); | |||
6165 | jccb(Assembler::less, VECTOR16_TAIL)jccb_0(Assembler::less, VECTOR16_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6165); | |||
6166 | ||||
6167 | subq(length, 32); | |||
6168 | bind(VECTOR32_LOOP); | |||
6169 | vmovdqu(rymm0, Address(obja, result)); | |||
6170 | vmovdqu(rymm1, Address(objb, result)); | |||
6171 | vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit); | |||
6172 | vptest(rymm2, rymm2); | |||
6173 | jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found | |||
6174 | addq(result, 32); | |||
6175 | subq(length, 32); | |||
6176 | jcc(Assembler::greaterEqual, VECTOR32_LOOP); | |||
6177 | addq(length, 32); | |||
6178 | jcc(Assembler::equal, SAME_TILL_END); | |||
6179 | //falling through if less than 32 bytes left //close the branch here. | |||
6180 | ||||
6181 | bind(VECTOR16_TAIL); | |||
6182 | cmpq(length, 16); | |||
6183 | jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6183); | |||
6184 | bind(VECTOR16_LOOP); | |||
6185 | movdqu(rymm0, Address(obja, result)); | |||
6186 | movdqu(rymm1, Address(objb, result)); | |||
6187 | vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit); | |||
6188 | ptest(rymm2, rymm2); | |||
6189 | jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found | |||
6190 | addq(result, 16); | |||
6191 | subq(length, 16); | |||
6192 | jcc(Assembler::equal, SAME_TILL_END); | |||
6193 | //falling through if less than 16 bytes left | |||
6194 | } else {//regular intrinsics | |||
6195 | ||||
6196 | cmpq(length, 16); | |||
6197 | jccb(Assembler::less, VECTOR8_TAIL)jccb_0(Assembler::less, VECTOR8_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6197); | |||
6198 | ||||
6199 | subq(length, 16); | |||
6200 | bind(VECTOR16_LOOP); | |||
6201 | movdqu(rymm0, Address(obja, result)); | |||
6202 | movdqu(rymm1, Address(objb, result)); | |||
6203 | pxor(rymm0, rymm1); | |||
6204 | ptest(rymm0, rymm0); | |||
6205 | jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found | |||
6206 | addq(result, 16); | |||
6207 | subq(length, 16); | |||
6208 | jccb(Assembler::greaterEqual, VECTOR16_LOOP)jccb_0(Assembler::greaterEqual, VECTOR16_LOOP, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6208); | |||
6209 | addq(length, 16); | |||
6210 | jcc(Assembler::equal, SAME_TILL_END); | |||
6211 | //falling through if less than 16 bytes left | |||
6212 | } | |||
6213 | ||||
6214 | bind(VECTOR8_TAIL); | |||
6215 | cmpq(length, 8); | |||
6216 | jccb(Assembler::less, VECTOR4_TAIL)jccb_0(Assembler::less, VECTOR4_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6216); | |||
6217 | bind(VECTOR8_LOOP); | |||
6218 | movq(tmp1, Address(obja, result)); | |||
6219 | movq(tmp2, Address(objb, result)); | |||
6220 | xorq(tmp1, tmp2); | |||
6221 | testq(tmp1, tmp1); | |||
6222 | jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found | |||
6223 | addq(result, 8); | |||
6224 | subq(length, 8); | |||
6225 | jcc(Assembler::equal, SAME_TILL_END); | |||
6226 | //falling through if less than 8 bytes left | |||
6227 | ||||
6228 | bind(VECTOR4_TAIL); | |||
6229 | cmpq(length, 4); | |||
6230 | jccb(Assembler::less, BYTES_TAIL)jccb_0(Assembler::less, BYTES_TAIL, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6230); | |||
6231 | bind(VECTOR4_LOOP); | |||
6232 | movl(tmp1, Address(obja, result)); | |||
6233 | xorl(tmp1, Address(objb, result)); | |||
6234 | testl(tmp1, tmp1); | |||
6235 | jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found | |||
6236 | addq(result, 4); | |||
6237 | subq(length, 4); | |||
6238 | jcc(Assembler::equal, SAME_TILL_END); | |||
6239 | //falling through if less than 4 bytes left | |||
6240 | ||||
6241 | bind(BYTES_TAIL); | |||
6242 | bind(BYTES_LOOP); | |||
6243 | load_unsigned_byte(tmp1, Address(obja, result)); | |||
6244 | load_unsigned_byte(tmp2, Address(objb, result)); | |||
6245 | xorl(tmp1, tmp2); | |||
6246 | testl(tmp1, tmp1); | |||
6247 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found | |||
6248 | decq(length); | |||
6249 | jcc(Assembler::zero, SAME_TILL_END); | |||
6250 | incq(result); | |||
6251 | load_unsigned_byte(tmp1, Address(obja, result)); | |||
6252 | load_unsigned_byte(tmp2, Address(objb, result)); | |||
6253 | xorl(tmp1, tmp2); | |||
6254 | testl(tmp1, tmp1); | |||
6255 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found | |||
6256 | decq(length); | |||
6257 | jcc(Assembler::zero, SAME_TILL_END); | |||
6258 | incq(result); | |||
6259 | load_unsigned_byte(tmp1, Address(obja, result)); | |||
6260 | load_unsigned_byte(tmp2, Address(objb, result)); | |||
6261 | xorl(tmp1, tmp2); | |||
6262 | testl(tmp1, tmp1); | |||
6263 | jcc(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found | |||
6264 | jmp(SAME_TILL_END); | |||
6265 | ||||
6266 | if (UseAVX >= 2) { | |||
6267 | bind(VECTOR32_NOT_EQUAL); | |||
6268 | vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit); | |||
6269 | vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit); | |||
6270 | vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit); | |||
6271 | vpmovmskb(tmp1, rymm0); | |||
6272 | bsfq(tmp1, tmp1); | |||
6273 | addq(result, tmp1); | |||
6274 | shrq(result); | |||
6275 | jmp(DONE); | |||
6276 | } | |||
6277 | ||||
6278 | bind(VECTOR16_NOT_EQUAL); | |||
6279 | if (UseAVX >= 2) { | |||
6280 | vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit); | |||
6281 | vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit); | |||
6282 | pxor(rymm0, rymm2); | |||
6283 | } else { | |||
6284 | pcmpeqb(rymm2, rymm2); | |||
6285 | pxor(rymm0, rymm1); | |||
6286 | pcmpeqb(rymm0, rymm1); | |||
6287 | pxor(rymm0, rymm2); | |||
6288 | } | |||
6289 | pmovmskb(tmp1, rymm0); | |||
6290 | bsfq(tmp1, tmp1); | |||
6291 | addq(result, tmp1); | |||
6292 | shrq(result); | |||
6293 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6293); | |||
6294 | ||||
6295 | bind(VECTOR8_NOT_EQUAL); | |||
6296 | bind(VECTOR4_NOT_EQUAL); | |||
6297 | bsfq(tmp1, tmp1); | |||
6298 | shrq(tmp1, 3); | |||
6299 | addq(result, tmp1); | |||
6300 | bind(BYTES_NOT_EQUAL); | |||
6301 | shrq(result); | |||
6302 | jmpb(DONE)jmpb_0(DONE, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6302); | |||
6303 | ||||
6304 | bind(SAME_TILL_END); | |||
6305 | mov64(result, -1); | |||
6306 | ||||
6307 | bind(DONE); | |||
6308 | } | |||
6309 | ||||
6310 | //Helper functions for square_to_len() | |||
6311 | ||||
6312 | /** | |||
6313 | * Store the squares of x[], right shifted one bit (divided by 2) into z[] | |||
6314 | * Preserves x and z and modifies rest of the registers. | |||
6315 | */ | |||
6316 | void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { | |||
6317 | // Perform square and right shift by 1 | |||
6318 | // Handle odd xlen case first, then for even xlen do the following | |||
6319 | // jlong carry = 0; | |||
6320 | // for (int j=0, i=0; j < xlen; j+=2, i+=4) { | |||
6321 | // huge_128 product = x[j:j+1] * x[j:j+1]; | |||
6322 | // z[i:i+1] = (carry << 63) | (jlong)(product >>> 65); | |||
6323 | // z[i+2:i+3] = (jlong)(product >>> 1); | |||
6324 | // carry = (jlong)product; | |||
6325 | // } | |||
6326 | ||||
6327 | xorq(tmp5, tmp5); // carry | |||
6328 | xorq(rdxReg, rdxReg); | |||
6329 | xorl(tmp1, tmp1); // index for x | |||
6330 | xorl(tmp4, tmp4); // index for z | |||
6331 | ||||
6332 | Label L_first_loop, L_first_loop_exit; | |||
6333 | ||||
6334 | testl(xlen, 1); | |||
6335 | jccb(Assembler::zero, L_first_loop)jccb_0(Assembler::zero, L_first_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6335); //jump if xlen is even | |||
6336 | ||||
6337 | // Square and right shift by 1 the odd element using 32 bit multiply | |||
6338 | movl(raxReg, Address(x, tmp1, Address::times_4, 0)); | |||
6339 | imulq(raxReg, raxReg); | |||
6340 | shrq(raxReg, 1); | |||
6341 | adcq(tmp5, 0); | |||
6342 | movq(Address(z, tmp4, Address::times_4, 0), raxReg); | |||
6343 | incrementl(tmp1); | |||
6344 | addl(tmp4, 2); | |||
6345 | ||||
6346 | // Square and right shift by 1 the rest using 64 bit multiply | |||
6347 | bind(L_first_loop); | |||
6348 | cmpptr(tmp1, xlen); | |||
6349 | jccb(Assembler::equal, L_first_loop_exit)jccb_0(Assembler::equal, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6349); | |||
6350 | ||||
6351 | // Square | |||
6352 | movq(raxReg, Address(x, tmp1, Address::times_4, 0)); | |||
6353 | rorq(raxReg, 32); // convert big-endian to little-endian | |||
6354 | mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax | |||
6355 | ||||
6356 | // Right shift by 1 and save carry | |||
6357 | shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1 | |||
6358 | rcrq(rdxReg, 1); | |||
6359 | rcrq(raxReg, 1); | |||
6360 | adcq(tmp5, 0); | |||
6361 | ||||
6362 | // Store result in z | |||
6363 | movq(Address(z, tmp4, Address::times_4, 0), rdxReg); | |||
6364 | movq(Address(z, tmp4, Address::times_4, 8), raxReg); | |||
6365 | ||||
6366 | // Update indices for x and z | |||
6367 | addl(tmp1, 2); | |||
6368 | addl(tmp4, 4); | |||
6369 | jmp(L_first_loop); | |||
6370 | ||||
6371 | bind(L_first_loop_exit); | |||
6372 | } | |||
6373 | ||||
6374 | ||||
6375 | /** | |||
6376 | * Perform the following multiply add operation using BMI2 instructions | |||
6377 | * carry:sum = sum + op1*op2 + carry | |||
6378 | * op2 should be in rdx | |||
6379 | * op2 is preserved, all other registers are modified | |||
6380 | */ | |||
6381 | void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) { | |||
6382 | // assert op2 is rdx | |||
6383 | mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1 | |||
6384 | addq(sum, carry); | |||
6385 | adcq(tmp2, 0); | |||
6386 | addq(sum, op1); | |||
6387 | adcq(tmp2, 0); | |||
6388 | movq(carry, tmp2); | |||
6389 | } | |||
6390 | ||||
6391 | /** | |||
6392 | * Perform the following multiply add operation: | |||
6393 | * carry:sum = sum + op1*op2 + carry | |||
6394 | * Preserves op1, op2 and modifies rest of registers | |||
6395 | */ | |||
6396 | void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) { | |||
6397 | // rdx:rax = op1 * op2 | |||
6398 | movq(raxReg, op2); | |||
6399 | mulq(op1); | |||
6400 | ||||
6401 | // rdx:rax = sum + carry + rdx:rax | |||
6402 | addq(sum, carry); | |||
6403 | adcq(rdxReg, 0); | |||
6404 | addq(sum, raxReg); | |||
6405 | adcq(rdxReg, 0); | |||
6406 | ||||
6407 | // carry:sum = rdx:sum | |||
6408 | movq(carry, rdxReg); | |||
6409 | } | |||
6410 | ||||
6411 | /** | |||
6412 | * Add 64 bit long carry into z[] with carry propogation. | |||
6413 | * Preserves z and carry register values and modifies rest of registers. | |||
6414 | * | |||
6415 | */ | |||
6416 | void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) { | |||
6417 | Label L_fourth_loop, L_fourth_loop_exit; | |||
6418 | ||||
6419 | movl(tmp1, 1); | |||
6420 | subl(zlen, 2); | |||
6421 | addq(Address(z, zlen, Address::times_4, 0), carry); | |||
6422 | ||||
6423 | bind(L_fourth_loop); | |||
6424 | jccb(Assembler::carryClear, L_fourth_loop_exit)jccb_0(Assembler::carryClear, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6424); | |||
6425 | subl(zlen, 2); | |||
6426 | jccb(Assembler::negative, L_fourth_loop_exit)jccb_0(Assembler::negative, L_fourth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6426); | |||
6427 | addq(Address(z, zlen, Address::times_4, 0), tmp1); | |||
6428 | jmp(L_fourth_loop); | |||
6429 | bind(L_fourth_loop_exit); | |||
6430 | } | |||
6431 | ||||
6432 | /** | |||
6433 | * Shift z[] left by 1 bit. | |||
6434 | * Preserves x, len, z and zlen registers and modifies rest of the registers. | |||
6435 | * | |||
6436 | */ | |||
6437 | void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) { | |||
6438 | ||||
6439 | Label L_fifth_loop, L_fifth_loop_exit; | |||
6440 | ||||
6441 | // Fifth loop | |||
6442 | // Perform primitiveLeftShift(z, zlen, 1) | |||
6443 | ||||
6444 | const Register prev_carry = tmp1; | |||
6445 | const Register new_carry = tmp4; | |||
6446 | const Register value = tmp2; | |||
6447 | const Register zidx = tmp3; | |||
6448 | ||||
6449 | // int zidx, carry; | |||
6450 | // long value; | |||
6451 | // carry = 0; | |||
6452 | // for (zidx = zlen-2; zidx >=0; zidx -= 2) { | |||
6453 | // (carry:value) = (z[i] << 1) | carry ; | |||
6454 | // z[i] = value; | |||
6455 | // } | |||
6456 | ||||
6457 | movl(zidx, zlen); | |||
6458 | xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register | |||
6459 | ||||
6460 | bind(L_fifth_loop); | |||
6461 | decl(zidx); // Use decl to preserve carry flag | |||
6462 | decl(zidx); | |||
6463 | jccb(Assembler::negative, L_fifth_loop_exit)jccb_0(Assembler::negative, L_fifth_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6463); | |||
6464 | ||||
6465 | if (UseBMI2Instructions) { | |||
6466 | movq(value, Address(z, zidx, Address::times_4, 0)); | |||
6467 | rclq(value, 1); | |||
6468 | rorxq(value, value, 32); | |||
6469 | movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form | |||
6470 | } | |||
6471 | else { | |||
6472 | // clear new_carry | |||
6473 | xorl(new_carry, new_carry); | |||
6474 | ||||
6475 | // Shift z[i] by 1, or in previous carry and save new carry | |||
6476 | movq(value, Address(z, zidx, Address::times_4, 0)); | |||
6477 | shlq(value, 1); | |||
6478 | adcl(new_carry, 0); | |||
6479 | ||||
6480 | orq(value, prev_carry); | |||
6481 | rorq(value, 0x20); | |||
6482 | movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form | |||
6483 | ||||
6484 | // Set previous carry = new carry | |||
6485 | movl(prev_carry, new_carry); | |||
6486 | } | |||
6487 | jmp(L_fifth_loop); | |||
6488 | ||||
6489 | bind(L_fifth_loop_exit); | |||
6490 | } | |||
6491 | ||||
6492 | ||||
6493 | /** | |||
6494 | * Code for BigInteger::squareToLen() intrinsic | |||
6495 | * | |||
6496 | * rdi: x | |||
6497 | * rsi: len | |||
6498 | * r8: z | |||
6499 | * rcx: zlen | |||
6500 | * r12: tmp1 | |||
6501 | * r13: tmp2 | |||
6502 | * r14: tmp3 | |||
6503 | * r15: tmp4 | |||
6504 | * rbx: tmp5 | |||
6505 | * | |||
6506 | */ | |||
6507 | void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { | |||
6508 | ||||
6509 | Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, L_last_x, L_multiply; | |||
6510 | push(tmp1); | |||
6511 | push(tmp2); | |||
6512 | push(tmp3); | |||
6513 | push(tmp4); | |||
6514 | push(tmp5); | |||
6515 | ||||
6516 | // First loop | |||
6517 | // Store the squares, right shifted one bit (i.e., divided by 2). | |||
6518 | square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg); | |||
6519 | ||||
6520 | // Add in off-diagonal sums. | |||
6521 | // | |||
6522 | // Second, third (nested) and fourth loops. | |||
6523 | // zlen +=2; | |||
6524 | // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) { | |||
6525 | // carry = 0; | |||
6526 | // long op2 = x[xidx:xidx+1]; | |||
6527 | // for (int j=xidx-2,k=zidx; j >= 0; j-=2) { | |||
6528 | // k -= 2; | |||
6529 | // long op1 = x[j:j+1]; | |||
6530 | // long sum = z[k:k+1]; | |||
6531 | // carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs); | |||
6532 | // z[k:k+1] = sum; | |||
6533 | // } | |||
6534 | // add_one_64(z, k, carry, tmp_regs); | |||
6535 | // } | |||
6536 | ||||
6537 | const Register carry = tmp5; | |||
6538 | const Register sum = tmp3; | |||
6539 | const Register op1 = tmp4; | |||
6540 | Register op2 = tmp2; | |||
6541 | ||||
6542 | push(zlen); | |||
6543 | push(len); | |||
6544 | addl(zlen,2); | |||
6545 | bind(L_second_loop); | |||
6546 | xorq(carry, carry); | |||
6547 | subl(zlen, 4); | |||
6548 | subl(len, 2); | |||
6549 | push(zlen); | |||
6550 | push(len); | |||
6551 | cmpl(len, 0); | |||
6552 | jccb(Assembler::lessEqual, L_second_loop_exit)jccb_0(Assembler::lessEqual, L_second_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6552); | |||
6553 | ||||
6554 | // Multiply an array by one 64 bit long. | |||
6555 | if (UseBMI2Instructions) { | |||
6556 | op2 = rdxReg; | |||
6557 | movq(op2, Address(x, len, Address::times_4, 0)); | |||
6558 | rorxq(op2, op2, 32); | |||
6559 | } | |||
6560 | else { | |||
6561 | movq(op2, Address(x, len, Address::times_4, 0)); | |||
6562 | rorq(op2, 32); | |||
6563 | } | |||
6564 | ||||
6565 | bind(L_third_loop); | |||
6566 | decrementl(len); | |||
6567 | jccb(Assembler::negative, L_third_loop_exit)jccb_0(Assembler::negative, L_third_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6567); | |||
6568 | decrementl(len); | |||
6569 | jccb(Assembler::negative, L_last_x)jccb_0(Assembler::negative, L_last_x, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6569); | |||
6570 | ||||
6571 | movq(op1, Address(x, len, Address::times_4, 0)); | |||
6572 | rorq(op1, 32); | |||
6573 | ||||
6574 | bind(L_multiply); | |||
6575 | subl(zlen, 2); | |||
6576 | movq(sum, Address(z, zlen, Address::times_4, 0)); | |||
6577 | ||||
6578 | // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry. | |||
6579 | if (UseBMI2Instructions) { | |||
6580 | multiply_add_64_bmi2(sum, op1, op2, carry, tmp2); | |||
6581 | } | |||
6582 | else { | |||
6583 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); | |||
6584 | } | |||
6585 | ||||
6586 | movq(Address(z, zlen, Address::times_4, 0), sum); | |||
6587 | ||||
6588 | jmp(L_third_loop); | |||
6589 | bind(L_third_loop_exit); | |||
6590 | ||||
6591 | // Fourth loop | |||
6592 | // Add 64 bit long carry into z with carry propogation. | |||
6593 | // Uses offsetted zlen. | |||
6594 | add_one_64(z, zlen, carry, tmp1); | |||
6595 | ||||
6596 | pop(len); | |||
6597 | pop(zlen); | |||
6598 | jmp(L_second_loop); | |||
6599 | ||||
6600 | // Next infrequent code is moved outside loops. | |||
6601 | bind(L_last_x); | |||
6602 | movl(op1, Address(x, 0)); | |||
6603 | jmp(L_multiply); | |||
6604 | ||||
6605 | bind(L_second_loop_exit); | |||
6606 | pop(len); | |||
6607 | pop(zlen); | |||
6608 | pop(len); | |||
6609 | pop(zlen); | |||
6610 | ||||
6611 | // Fifth loop | |||
6612 | // Shift z left 1 bit. | |||
6613 | lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4); | |||
6614 | ||||
6615 | // z[zlen-1] |= x[len-1] & 1; | |||
6616 | movl(tmp3, Address(x, len, Address::times_4, -4)); | |||
6617 | andl(tmp3, 1); | |||
6618 | orl(Address(z, zlen, Address::times_4, -4), tmp3); | |||
6619 | ||||
6620 | pop(tmp5); | |||
6621 | pop(tmp4); | |||
6622 | pop(tmp3); | |||
6623 | pop(tmp2); | |||
6624 | pop(tmp1); | |||
6625 | } | |||
6626 | ||||
6627 | /** | |||
6628 | * Helper function for mul_add() | |||
6629 | * Multiply the in[] by int k and add to out[] starting at offset offs using | |||
6630 | * 128 bit by 32 bit multiply and return the carry in tmp5. | |||
6631 | * Only quad int aligned length of in[] is operated on in this function. | |||
6632 | * k is in rdxReg for BMI2Instructions, for others it is in tmp2. | |||
6633 | * This function preserves out, in and k registers. | |||
6634 | * len and offset point to the appropriate index in "in" & "out" correspondingly | |||
6635 | * tmp5 has the carry. | |||
6636 | * other registers are temporary and are modified. | |||
6637 | * | |||
6638 | */ | |||
6639 | void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in, | |||
6640 | Register offset, Register len, Register tmp1, Register tmp2, Register tmp3, | |||
6641 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { | |||
6642 | ||||
6643 | Label L_first_loop, L_first_loop_exit; | |||
6644 | ||||
6645 | movl(tmp1, len); | |||
6646 | shrl(tmp1, 2); | |||
6647 | ||||
6648 | bind(L_first_loop); | |||
6649 | subl(tmp1, 1); | |||
6650 | jccb(Assembler::negative, L_first_loop_exit)jccb_0(Assembler::negative, L_first_loop_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6650); | |||
6651 | ||||
6652 | subl(len, 4); | |||
6653 | subl(offset, 4); | |||
6654 | ||||
6655 | Register op2 = tmp2; | |||
6656 | const Register sum = tmp3; | |||
6657 | const Register op1 = tmp4; | |||
6658 | const Register carry = tmp5; | |||
6659 | ||||
6660 | if (UseBMI2Instructions) { | |||
6661 | op2 = rdxReg; | |||
6662 | } | |||
6663 | ||||
6664 | movq(op1, Address(in, len, Address::times_4, 8)); | |||
6665 | rorq(op1, 32); | |||
6666 | movq(sum, Address(out, offset, Address::times_4, 8)); | |||
6667 | rorq(sum, 32); | |||
6668 | if (UseBMI2Instructions) { | |||
6669 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); | |||
6670 | } | |||
6671 | else { | |||
6672 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); | |||
6673 | } | |||
6674 | // Store back in big endian from little endian | |||
6675 | rorq(sum, 0x20); | |||
6676 | movq(Address(out, offset, Address::times_4, 8), sum); | |||
6677 | ||||
6678 | movq(op1, Address(in, len, Address::times_4, 0)); | |||
6679 | rorq(op1, 32); | |||
6680 | movq(sum, Address(out, offset, Address::times_4, 0)); | |||
6681 | rorq(sum, 32); | |||
6682 | if (UseBMI2Instructions) { | |||
6683 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); | |||
6684 | } | |||
6685 | else { | |||
6686 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); | |||
6687 | } | |||
6688 | // Store back in big endian from little endian | |||
6689 | rorq(sum, 0x20); | |||
6690 | movq(Address(out, offset, Address::times_4, 0), sum); | |||
6691 | ||||
6692 | jmp(L_first_loop); | |||
6693 | bind(L_first_loop_exit); | |||
6694 | } | |||
6695 | ||||
6696 | /** | |||
6697 | * Code for BigInteger::mulAdd() intrinsic | |||
6698 | * | |||
6699 | * rdi: out | |||
6700 | * rsi: in | |||
6701 | * r11: offs (out.length - offset) | |||
6702 | * rcx: len | |||
6703 | * r8: k | |||
6704 | * r12: tmp1 | |||
6705 | * r13: tmp2 | |||
6706 | * r14: tmp3 | |||
6707 | * r15: tmp4 | |||
6708 | * rbx: tmp5 | |||
6709 | * Multiply the in[] by word k and add to out[], return the carry in rax | |||
6710 | */ | |||
6711 | void MacroAssembler::mul_add(Register out, Register in, Register offs, | |||
6712 | Register len, Register k, Register tmp1, Register tmp2, Register tmp3, | |||
6713 | Register tmp4, Register tmp5, Register rdxReg, Register raxReg) { | |||
6714 | ||||
6715 | Label L_carry, L_last_in, L_done; | |||
6716 | ||||
6717 | // carry = 0; | |||
6718 | // for (int j=len-1; j >= 0; j--) { | |||
6719 | // long product = (in[j] & LONG_MASK) * kLong + | |||
6720 | // (out[offs] & LONG_MASK) + carry; | |||
6721 | // out[offs--] = (int)product; | |||
6722 | // carry = product >>> 32; | |||
6723 | // } | |||
6724 | // | |||
6725 | push(tmp1); | |||
6726 | push(tmp2); | |||
6727 | push(tmp3); | |||
6728 | push(tmp4); | |||
6729 | push(tmp5); | |||
6730 | ||||
6731 | Register op2 = tmp2; | |||
6732 | const Register sum = tmp3; | |||
6733 | const Register op1 = tmp4; | |||
6734 | const Register carry = tmp5; | |||
6735 | ||||
6736 | if (UseBMI2Instructions) { | |||
6737 | op2 = rdxReg; | |||
6738 | movl(op2, k); | |||
6739 | } | |||
6740 | else { | |||
6741 | movl(op2, k); | |||
6742 | } | |||
6743 | ||||
6744 | xorq(carry, carry); | |||
6745 | ||||
6746 | //First loop | |||
6747 | ||||
6748 | //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply | |||
6749 | //The carry is in tmp5 | |||
6750 | mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg); | |||
6751 | ||||
6752 | //Multiply the trailing in[] entry using 64 bit by 32 bit, if any | |||
6753 | decrementl(len); | |||
6754 | jccb(Assembler::negative, L_carry)jccb_0(Assembler::negative, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6754); | |||
6755 | decrementl(len); | |||
6756 | jccb(Assembler::negative, L_last_in)jccb_0(Assembler::negative, L_last_in, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6756); | |||
6757 | ||||
6758 | movq(op1, Address(in, len, Address::times_4, 0)); | |||
6759 | rorq(op1, 32); | |||
6760 | ||||
6761 | subl(offs, 2); | |||
6762 | movq(sum, Address(out, offs, Address::times_4, 0)); | |||
6763 | rorq(sum, 32); | |||
6764 | ||||
6765 | if (UseBMI2Instructions) { | |||
6766 | multiply_add_64_bmi2(sum, op1, op2, carry, raxReg); | |||
6767 | } | |||
6768 | else { | |||
6769 | multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg); | |||
6770 | } | |||
6771 | ||||
6772 | // Store back in big endian from little endian | |||
6773 | rorq(sum, 0x20); | |||
6774 | movq(Address(out, offs, Address::times_4, 0), sum); | |||
6775 | ||||
6776 | testl(len, len); | |||
6777 | jccb(Assembler::zero, L_carry)jccb_0(Assembler::zero, L_carry, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6777); | |||
6778 | ||||
6779 | //Multiply the last in[] entry, if any | |||
6780 | bind(L_last_in); | |||
6781 | movl(op1, Address(in, 0)); | |||
6782 | movl(sum, Address(out, offs, Address::times_4, -4)); | |||
6783 | ||||
6784 | movl(raxReg, k); | |||
6785 | mull(op1); //tmp4 * eax -> edx:eax | |||
6786 | addl(sum, carry); | |||
6787 | adcl(rdxReg, 0); | |||
6788 | addl(sum, raxReg); | |||
6789 | adcl(rdxReg, 0); | |||
6790 | movl(carry, rdxReg); | |||
6791 | ||||
6792 | movl(Address(out, offs, Address::times_4, -4), sum); | |||
6793 | ||||
6794 | bind(L_carry); | |||
6795 | //return tmp5/carry as carry in rax | |||
6796 | movl(rax, carry); | |||
6797 | ||||
6798 | bind(L_done); | |||
6799 | pop(tmp5); | |||
6800 | pop(tmp4); | |||
6801 | pop(tmp3); | |||
6802 | pop(tmp2); | |||
6803 | pop(tmp1); | |||
6804 | } | |||
6805 | #endif | |||
6806 | ||||
6807 | /** | |||
6808 | * Emits code to update CRC-32 with a byte value according to constants in table | |||
6809 | * | |||
6810 | * @param [in,out]crc Register containing the crc. | |||
6811 | * @param [in]val Register containing the byte to fold into the CRC. | |||
6812 | * @param [in]table Register containing the table of crc constants. | |||
6813 | * | |||
6814 | * uint32_t crc; | |||
6815 | * val = crc_table[(val ^ crc) & 0xFF]; | |||
6816 | * crc = val ^ (crc >> 8); | |||
6817 | * | |||
6818 | */ | |||
6819 | void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { | |||
6820 | xorl(val, crc); | |||
6821 | andl(val, 0xFF); | |||
6822 | shrl(crc, 8); // unsigned shift | |||
6823 | xorl(crc, Address(table, val, Address::times_4, 0)); | |||
6824 | } | |||
6825 | ||||
6826 | /** | |||
6827 | * Fold 128-bit data chunk | |||
6828 | */ | |||
6829 | void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { | |||
6830 | if (UseAVX > 0) { | |||
6831 | vpclmulhdq(xtmp, xK, xcrc); // [123:64] | |||
6832 | vpclmulldq(xcrc, xK, xcrc); // [63:0] | |||
6833 | vpxor(xcrc, xcrc, Address(buf, offset), 0 /* vector_len */); | |||
6834 | pxor(xcrc, xtmp); | |||
6835 | } else { | |||
6836 | movdqa(xtmp, xcrc); | |||
6837 | pclmulhdq(xtmp, xK); // [123:64] | |||
6838 | pclmulldq(xcrc, xK); // [63:0] | |||
6839 | pxor(xcrc, xtmp); | |||
6840 | movdqu(xtmp, Address(buf, offset)); | |||
6841 | pxor(xcrc, xtmp); | |||
6842 | } | |||
6843 | } | |||
6844 | ||||
6845 | void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { | |||
6846 | if (UseAVX > 0) { | |||
6847 | vpclmulhdq(xtmp, xK, xcrc); | |||
6848 | vpclmulldq(xcrc, xK, xcrc); | |||
6849 | pxor(xcrc, xbuf); | |||
6850 | pxor(xcrc, xtmp); | |||
6851 | } else { | |||
6852 | movdqa(xtmp, xcrc); | |||
6853 | pclmulhdq(xtmp, xK); | |||
6854 | pclmulldq(xcrc, xK); | |||
6855 | pxor(xcrc, xbuf); | |||
6856 | pxor(xcrc, xtmp); | |||
6857 | } | |||
6858 | } | |||
6859 | ||||
6860 | /** | |||
6861 | * 8-bit folds to compute 32-bit CRC | |||
6862 | * | |||
6863 | * uint64_t xcrc; | |||
6864 | * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); | |||
6865 | */ | |||
6866 | void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { | |||
6867 | movdl(tmp, xcrc); | |||
6868 | andl(tmp, 0xFF); | |||
6869 | movdl(xtmp, Address(table, tmp, Address::times_4, 0)); | |||
6870 | psrldq(xcrc, 1); // unsigned shift one byte | |||
6871 | pxor(xcrc, xtmp); | |||
6872 | } | |||
6873 | ||||
6874 | /** | |||
6875 | * uint32_t crc; | |||
6876 | * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); | |||
6877 | */ | |||
6878 | void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { | |||
6879 | movl(tmp, crc); | |||
6880 | andl(tmp, 0xFF); | |||
6881 | shrl(crc, 8); | |||
6882 | xorl(crc, Address(table, tmp, Address::times_4, 0)); | |||
6883 | } | |||
6884 | ||||
6885 | /** | |||
6886 | * @param crc register containing existing CRC (32-bit) | |||
6887 | * @param buf register pointing to input byte buffer (byte*) | |||
6888 | * @param len register containing number of bytes | |||
6889 | * @param table register that will contain address of CRC table | |||
6890 | * @param tmp scratch register | |||
6891 | */ | |||
6892 | void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { | |||
6893 | assert_different_registers(crc, buf, len, table, tmp, rax); | |||
6894 | ||||
6895 | Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; | |||
6896 | Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; | |||
6897 | ||||
6898 | // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge | |||
6899 | // context for the registers used, where all instructions below are using 128-bit mode | |||
6900 | // On EVEX without VL and BW, these instructions will all be AVX. | |||
6901 | lea(table, ExternalAddress(StubRoutines::crc_table_addr())); | |||
6902 | notl(crc); // ~crc | |||
6903 | cmpl(len, 16); | |||
6904 | jcc(Assembler::less, L_tail); | |||
6905 | ||||
6906 | // Align buffer to 16 bytes | |||
6907 | movl(tmp, buf); | |||
6908 | andl(tmp, 0xF); | |||
6909 | jccb(Assembler::zero, L_aligned)jccb_0(Assembler::zero, L_aligned, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6909); | |||
6910 | subl(tmp, 16); | |||
6911 | addl(len, tmp); | |||
6912 | ||||
6913 | align(4); | |||
6914 | BIND(L_align_loop); | |||
6915 | movsbl(rax, Address(buf, 0)); // load byte with sign extension | |||
6916 | update_byte_crc32(crc, rax, table); | |||
6917 | increment(buf); | |||
6918 | incrementl(tmp); | |||
6919 | jccb(Assembler::less, L_align_loop)jccb_0(Assembler::less, L_align_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6919); | |||
6920 | ||||
6921 | BIND(L_aligned); | |||
6922 | movl(tmp, len); // save | |||
6923 | shrl(len, 4); | |||
6924 | jcc(Assembler::zero, L_tail_restore); | |||
6925 | ||||
6926 | // Fold crc into first bytes of vector | |||
6927 | movdqa(xmm1, Address(buf, 0)); | |||
6928 | movdl(rax, xmm1); | |||
6929 | xorl(crc, rax); | |||
6930 | if (VM_Version::supports_sse4_1()) { | |||
6931 | pinsrd(xmm1, crc, 0); | |||
6932 | } else { | |||
6933 | pinsrw(xmm1, crc, 0); | |||
6934 | shrl(crc, 16); | |||
6935 | pinsrw(xmm1, crc, 1); | |||
6936 | } | |||
6937 | addptr(buf, 16); | |||
6938 | subl(len, 4); // len > 0 | |||
6939 | jcc(Assembler::less, L_fold_tail); | |||
6940 | ||||
6941 | movdqa(xmm2, Address(buf, 0)); | |||
6942 | movdqa(xmm3, Address(buf, 16)); | |||
6943 | movdqa(xmm4, Address(buf, 32)); | |||
6944 | addptr(buf, 48); | |||
6945 | subl(len, 3); | |||
6946 | jcc(Assembler::lessEqual, L_fold_512b); | |||
6947 | ||||
6948 | // Fold total 512 bits of polynomial on each iteration, | |||
6949 | // 128 bits per each of 4 parallel streams. | |||
6950 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); | |||
6951 | ||||
6952 | align32(); | |||
6953 | BIND(L_fold_512b_loop); | |||
6954 | fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); | |||
6955 | fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); | |||
6956 | fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); | |||
6957 | fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); | |||
6958 | addptr(buf, 64); | |||
6959 | subl(len, 4); | |||
6960 | jcc(Assembler::greater, L_fold_512b_loop); | |||
6961 | ||||
6962 | // Fold 512 bits to 128 bits. | |||
6963 | BIND(L_fold_512b); | |||
6964 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); | |||
6965 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); | |||
6966 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); | |||
6967 | fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); | |||
6968 | ||||
6969 | // Fold the rest of 128 bits data chunks | |||
6970 | BIND(L_fold_tail); | |||
6971 | addl(len, 3); | |||
6972 | jccb(Assembler::lessEqual, L_fold_128b)jccb_0(Assembler::lessEqual, L_fold_128b, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6972); | |||
6973 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); | |||
6974 | ||||
6975 | BIND(L_fold_tail_loop); | |||
6976 | fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); | |||
6977 | addptr(buf, 16); | |||
6978 | decrementl(len); | |||
6979 | jccb(Assembler::greater, L_fold_tail_loop)jccb_0(Assembler::greater, L_fold_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 6979); | |||
6980 | ||||
6981 | // Fold 128 bits in xmm1 down into 32 bits in crc register. | |||
6982 | BIND(L_fold_128b); | |||
6983 | movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); | |||
6984 | if (UseAVX > 0) { | |||
6985 | vpclmulqdq(xmm2, xmm0, xmm1, 0x1); | |||
6986 | vpand(xmm3, xmm0, xmm2, 0 /* vector_len */); | |||
6987 | vpclmulqdq(xmm0, xmm0, xmm3, 0x1); | |||
6988 | } else { | |||
6989 | movdqa(xmm2, xmm0); | |||
6990 | pclmulqdq(xmm2, xmm1, 0x1); | |||
6991 | movdqa(xmm3, xmm0); | |||
6992 | pand(xmm3, xmm2); | |||
6993 | pclmulqdq(xmm0, xmm3, 0x1); | |||
6994 | } | |||
6995 | psrldq(xmm1, 8); | |||
6996 | psrldq(xmm2, 4); | |||
6997 | pxor(xmm0, xmm1); | |||
6998 | pxor(xmm0, xmm2); | |||
6999 | ||||
7000 | // 8 8-bit folds to compute 32-bit CRC. | |||
7001 | for (int j = 0; j < 4; j++) { | |||
7002 | fold_8bit_crc32(xmm0, table, xmm1, rax); | |||
7003 | } | |||
7004 | movdl(crc, xmm0); // mov 32 bits to general register | |||
7005 | for (int j = 0; j < 4; j++) { | |||
7006 | fold_8bit_crc32(crc, table, rax); | |||
7007 | } | |||
7008 | ||||
7009 | BIND(L_tail_restore); | |||
7010 | movl(len, tmp); // restore | |||
7011 | BIND(L_tail); | |||
7012 | andl(len, 0xf); | |||
7013 | jccb(Assembler::zero, L_exit)jccb_0(Assembler::zero, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7013); | |||
7014 | ||||
7015 | // Fold the rest of bytes | |||
7016 | align(4); | |||
7017 | BIND(L_tail_loop); | |||
7018 | movsbl(rax, Address(buf, 0)); // load byte with sign extension | |||
7019 | update_byte_crc32(crc, rax, table); | |||
7020 | increment(buf); | |||
7021 | decrementl(len); | |||
7022 | jccb(Assembler::greater, L_tail_loop)jccb_0(Assembler::greater, L_tail_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7022); | |||
7023 | ||||
7024 | BIND(L_exit); | |||
7025 | notl(crc); // ~c | |||
7026 | } | |||
7027 | ||||
7028 | #ifdef _LP641 | |||
7029 | // Helper function for AVX 512 CRC32 | |||
7030 | // Fold 512-bit data chunks | |||
7031 | void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, | |||
7032 | Register pos, int offset) { | |||
7033 | evmovdquq(xmm3, Address(buf, pos, Address::times_1, offset), Assembler::AVX_512bit); | |||
7034 | evpclmulqdq(xtmp, xcrc, xK, 0x10, Assembler::AVX_512bit); // [123:64] | |||
7035 | evpclmulqdq(xmm2, xcrc, xK, 0x01, Assembler::AVX_512bit); // [63:0] | |||
7036 | evpxorq(xcrc, xtmp, xmm2, Assembler::AVX_512bit /* vector_len */); | |||
7037 | evpxorq(xcrc, xcrc, xmm3, Assembler::AVX_512bit /* vector_len */); | |||
7038 | } | |||
7039 | ||||
7040 | // Helper function for AVX 512 CRC32 | |||
7041 | // Compute CRC32 for < 256B buffers | |||
7042 | void MacroAssembler::kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register table, Register pos, | |||
7043 | Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, | |||
7044 | Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup) { | |||
7045 | ||||
7046 | Label L_less_than_32, L_exact_16_left, L_less_than_16_left; | |||
7047 | Label L_less_than_8_left, L_less_than_4_left, L_less_than_2_left, L_zero_left; | |||
7048 | Label L_only_less_than_4, L_only_less_than_3, L_only_less_than_2; | |||
7049 | ||||
7050 | // check if there is enough buffer to be able to fold 16B at a time | |||
7051 | cmpl(len, 32); | |||
7052 | jcc(Assembler::less, L_less_than_32); | |||
7053 | ||||
7054 | // if there is, load the constants | |||
7055 | movdqu(xmm10, Address(table, 1 * 16)); //rk1 and rk2 in xmm10 | |||
7056 | movdl(xmm0, crc); // get the initial crc value | |||
7057 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext | |||
7058 | pxor(xmm7, xmm0); | |||
7059 | ||||
7060 | // update the buffer pointer | |||
7061 | addl(pos, 16); | |||
7062 | //update the counter.subtract 32 instead of 16 to save one instruction from the loop | |||
7063 | subl(len, 32); | |||
7064 | jmp(L_16B_reduction_loop); | |||
7065 | ||||
7066 | bind(L_less_than_32); | |||
7067 | //mov initial crc to the return value. this is necessary for zero - length buffers. | |||
7068 | movl(rax, crc); | |||
7069 | testl(len, len); | |||
7070 | jcc(Assembler::equal, L_cleanup); | |||
7071 | ||||
7072 | movdl(xmm0, crc); //get the initial crc value | |||
7073 | ||||
7074 | cmpl(len, 16); | |||
7075 | jcc(Assembler::equal, L_exact_16_left); | |||
7076 | jcc(Assembler::less, L_less_than_16_left); | |||
7077 | ||||
7078 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0 * 16)); //load the plaintext | |||
7079 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7080 | addl(pos, 16); | |||
7081 | subl(len, 16); | |||
7082 | movdqu(xmm10, Address(table, 1 * 16)); // rk1 and rk2 in xmm10 | |||
7083 | jmp(L_get_last_two_xmms); | |||
7084 | ||||
7085 | bind(L_less_than_16_left); | |||
7086 | //use stack space to load data less than 16 bytes, zero - out the 16B in memory first. | |||
7087 | pxor(xmm1, xmm1); | |||
7088 | movptr(tmp1, rsp); | |||
7089 | movdqu(Address(tmp1, 0 * 16), xmm1); | |||
7090 | ||||
7091 | cmpl(len, 4); | |||
7092 | jcc(Assembler::less, L_only_less_than_4); | |||
7093 | ||||
7094 | //backup the counter value | |||
7095 | movl(tmp2, len); | |||
7096 | cmpl(len, 8); | |||
7097 | jcc(Assembler::less, L_less_than_8_left); | |||
7098 | ||||
7099 | //load 8 Bytes | |||
7100 | movq(rax, Address(buf, pos, Address::times_1, 0 * 16)); | |||
7101 | movq(Address(tmp1, 0 * 16), rax); | |||
7102 | addptr(tmp1, 8); | |||
7103 | subl(len, 8); | |||
7104 | addl(pos, 8); | |||
7105 | ||||
7106 | bind(L_less_than_8_left); | |||
7107 | cmpl(len, 4); | |||
7108 | jcc(Assembler::less, L_less_than_4_left); | |||
7109 | ||||
7110 | //load 4 Bytes | |||
7111 | movl(rax, Address(buf, pos, Address::times_1, 0)); | |||
7112 | movl(Address(tmp1, 0 * 16), rax); | |||
7113 | addptr(tmp1, 4); | |||
7114 | subl(len, 4); | |||
7115 | addl(pos, 4); | |||
7116 | ||||
7117 | bind(L_less_than_4_left); | |||
7118 | cmpl(len, 2); | |||
7119 | jcc(Assembler::less, L_less_than_2_left); | |||
7120 | ||||
7121 | // load 2 Bytes | |||
7122 | movw(rax, Address(buf, pos, Address::times_1, 0)); | |||
7123 | movl(Address(tmp1, 0 * 16), rax); | |||
7124 | addptr(tmp1, 2); | |||
7125 | subl(len, 2); | |||
7126 | addl(pos, 2); | |||
7127 | ||||
7128 | bind(L_less_than_2_left); | |||
7129 | cmpl(len, 1); | |||
7130 | jcc(Assembler::less, L_zero_left); | |||
7131 | ||||
7132 | // load 1 Byte | |||
7133 | movb(rax, Address(buf, pos, Address::times_1, 0)); | |||
7134 | movb(Address(tmp1, 0 * 16), rax); | |||
7135 | ||||
7136 | bind(L_zero_left); | |||
7137 | movdqu(xmm7, Address(rsp, 0)); | |||
7138 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7139 | ||||
7140 | lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr())); | |||
7141 | movdqu(xmm0, Address(rax, tmp2)); | |||
| ||||
7142 | pshufb(xmm7, xmm0); | |||
7143 | jmp(L_128_done); | |||
7144 | ||||
7145 | bind(L_exact_16_left); | |||
7146 | movdqu(xmm7, Address(buf, pos, Address::times_1, 0)); | |||
7147 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7148 | jmp(L_128_done); | |||
7149 | ||||
7150 | bind(L_only_less_than_4); | |||
7151 | cmpl(len, 3); | |||
7152 | jcc(Assembler::less, L_only_less_than_3); | |||
7153 | ||||
7154 | // load 3 Bytes | |||
7155 | movb(rax, Address(buf, pos, Address::times_1, 0)); | |||
7156 | movb(Address(tmp1, 0), rax); | |||
7157 | ||||
7158 | movb(rax, Address(buf, pos, Address::times_1, 1)); | |||
7159 | movb(Address(tmp1, 1), rax); | |||
7160 | ||||
7161 | movb(rax, Address(buf, pos, Address::times_1, 2)); | |||
7162 | movb(Address(tmp1, 2), rax); | |||
7163 | ||||
7164 | movdqu(xmm7, Address(rsp, 0)); | |||
7165 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7166 | ||||
7167 | pslldq(xmm7, 0x5); | |||
7168 | jmp(L_barrett); | |||
7169 | bind(L_only_less_than_3); | |||
7170 | cmpl(len, 2); | |||
7171 | jcc(Assembler::less, L_only_less_than_2); | |||
7172 | ||||
7173 | // load 2 Bytes | |||
7174 | movb(rax, Address(buf, pos, Address::times_1, 0)); | |||
7175 | movb(Address(tmp1, 0), rax); | |||
7176 | ||||
7177 | movb(rax, Address(buf, pos, Address::times_1, 1)); | |||
7178 | movb(Address(tmp1, 1), rax); | |||
7179 | ||||
7180 | movdqu(xmm7, Address(rsp, 0)); | |||
7181 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7182 | ||||
7183 | pslldq(xmm7, 0x6); | |||
7184 | jmp(L_barrett); | |||
7185 | ||||
7186 | bind(L_only_less_than_2); | |||
7187 | //load 1 Byte | |||
7188 | movb(rax, Address(buf, pos, Address::times_1, 0)); | |||
7189 | movb(Address(tmp1, 0), rax); | |||
7190 | ||||
7191 | movdqu(xmm7, Address(rsp, 0)); | |||
7192 | pxor(xmm7, xmm0); //xor the initial crc value | |||
7193 | ||||
7194 | pslldq(xmm7, 0x7); | |||
7195 | } | |||
7196 | ||||
7197 | /** | |||
7198 | * Compute CRC32 using AVX512 instructions | |||
7199 | * param crc register containing existing CRC (32-bit) | |||
7200 | * param buf register pointing to input byte buffer (byte*) | |||
7201 | * param len register containing number of bytes | |||
7202 | * param table address of crc or crc32c table | |||
7203 | * param tmp1 scratch register | |||
7204 | * param tmp2 scratch register | |||
7205 | * return rax result register | |||
7206 | * | |||
7207 | * This routine is identical for crc32c with the exception of the precomputed constant | |||
7208 | * table which will be passed as the table argument. The calculation steps are | |||
7209 | * the same for both variants. | |||
7210 | */ | |||
7211 | void MacroAssembler::kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2) { | |||
7212 | assert_different_registers(crc, buf, len, table, tmp1, tmp2, rax, r12); | |||
7213 | ||||
7214 | Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; | |||
7215 | Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; | |||
7216 | Label L_less_than_256, L_fold_128_B_loop, L_fold_256_B_loop; | |||
7217 | Label L_fold_128_B_register, L_final_reduction_for_128, L_16B_reduction_loop; | |||
7218 | Label L_128_done, L_get_last_two_xmms, L_barrett, L_cleanup; | |||
7219 | ||||
7220 | const Register pos = r12; | |||
7221 | push(r12); | |||
7222 | subptr(rsp, 16 * 2 + 8); | |||
7223 | ||||
7224 | // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge | |||
7225 | // context for the registers used, where all instructions below are using 128-bit mode | |||
7226 | // On EVEX without VL and BW, these instructions will all be AVX. | |||
7227 | movl(pos, 0); | |||
7228 | ||||
7229 | // check if smaller than 256B | |||
7230 | cmpl(len, 256); | |||
7231 | jcc(Assembler::less, L_less_than_256); | |||
7232 | ||||
7233 | // load the initial crc value | |||
7234 | movdl(xmm10, crc); | |||
7235 | ||||
7236 | // receive the initial 64B data, xor the initial crc value | |||
7237 | evmovdquq(xmm0, Address(buf, pos, Address::times_1, 0 * 64), Assembler::AVX_512bit); | |||
7238 | evmovdquq(xmm4, Address(buf, pos, Address::times_1, 1 * 64), Assembler::AVX_512bit); | |||
7239 | evpxorq(xmm0, xmm0, xmm10, Assembler::AVX_512bit); | |||
7240 | evbroadcasti32x4(xmm10, Address(table, 2 * 16), Assembler::AVX_512bit); //zmm10 has rk3 and rk4 | |||
7241 | ||||
7242 | subl(len, 256); | |||
7243 | cmpl(len, 256); | |||
7244 | jcc(Assembler::less, L_fold_128_B_loop); | |||
7245 | ||||
7246 | evmovdquq(xmm7, Address(buf, pos, Address::times_1, 2 * 64), Assembler::AVX_512bit); | |||
7247 | evmovdquq(xmm8, Address(buf, pos, Address::times_1, 3 * 64), Assembler::AVX_512bit); | |||
7248 | evbroadcasti32x4(xmm16, Address(table, 0 * 16), Assembler::AVX_512bit); //zmm16 has rk-1 and rk-2 | |||
7249 | subl(len, 256); | |||
7250 | ||||
7251 | bind(L_fold_256_B_loop); | |||
7252 | addl(pos, 256); | |||
7253 | fold512bit_crc32_avx512(xmm0, xmm16, xmm1, buf, pos, 0 * 64); | |||
7254 | fold512bit_crc32_avx512(xmm4, xmm16, xmm1, buf, pos, 1 * 64); | |||
7255 | fold512bit_crc32_avx512(xmm7, xmm16, xmm1, buf, pos, 2 * 64); | |||
7256 | fold512bit_crc32_avx512(xmm8, xmm16, xmm1, buf, pos, 3 * 64); | |||
7257 | ||||
7258 | subl(len, 256); | |||
7259 | jcc(Assembler::greaterEqual, L_fold_256_B_loop); | |||
7260 | ||||
7261 | // Fold 256 into 128 | |||
7262 | addl(pos, 256); | |||
7263 | evpclmulqdq(xmm1, xmm0, xmm10, 0x01, Assembler::AVX_512bit); | |||
7264 | evpclmulqdq(xmm2, xmm0, xmm10, 0x10, Assembler::AVX_512bit); | |||
7265 | vpternlogq(xmm7, 0x96, xmm1, xmm2, Assembler::AVX_512bit); // xor ABC | |||
7266 | ||||
7267 | evpclmulqdq(xmm5, xmm4, xmm10, 0x01, Assembler::AVX_512bit); | |||
7268 | evpclmulqdq(xmm6, xmm4, xmm10, 0x10, Assembler::AVX_512bit); | |||
7269 | vpternlogq(xmm8, 0x96, xmm5, xmm6, Assembler::AVX_512bit); // xor ABC | |||
7270 | ||||
7271 | evmovdquq(xmm0, xmm7, Assembler::AVX_512bit); | |||
7272 | evmovdquq(xmm4, xmm8, Assembler::AVX_512bit); | |||
7273 | ||||
7274 | addl(len, 128); | |||
7275 | jmp(L_fold_128_B_register); | |||
7276 | ||||
7277 | // at this section of the code, there is 128 * x + y(0 <= y<128) bytes of buffer.The fold_128_B_loop | |||
7278 | // loop will fold 128B at a time until we have 128 + y Bytes of buffer | |||
7279 | ||||
7280 | // fold 128B at a time.This section of the code folds 8 xmm registers in parallel | |||
7281 | bind(L_fold_128_B_loop); | |||
7282 | addl(pos, 128); | |||
7283 | fold512bit_crc32_avx512(xmm0, xmm10, xmm1, buf, pos, 0 * 64); | |||
7284 | fold512bit_crc32_avx512(xmm4, xmm10, xmm1, buf, pos, 1 * 64); | |||
7285 | ||||
7286 | subl(len, 128); | |||
7287 | jcc(Assembler::greaterEqual, L_fold_128_B_loop); | |||
7288 | ||||
7289 | addl(pos, 128); | |||
7290 | ||||
7291 | // at this point, the buffer pointer is pointing at the last y Bytes of the buffer, where 0 <= y < 128 | |||
7292 | // the 128B of folded data is in 8 of the xmm registers : xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 | |||
7293 | bind(L_fold_128_B_register); | |||
7294 | evmovdquq(xmm16, Address(table, 5 * 16), Assembler::AVX_512bit); // multiply by rk9-rk16 | |||
7295 | evmovdquq(xmm11, Address(table, 9 * 16), Assembler::AVX_512bit); // multiply by rk17-rk20, rk1,rk2, 0,0 | |||
7296 | evpclmulqdq(xmm1, xmm0, xmm16, 0x01, Assembler::AVX_512bit); | |||
7297 | evpclmulqdq(xmm2, xmm0, xmm16, 0x10, Assembler::AVX_512bit); | |||
7298 | // save last that has no multiplicand | |||
7299 | vextracti64x2(xmm7, xmm4, 3); | |||
7300 | ||||
7301 | evpclmulqdq(xmm5, xmm4, xmm11, 0x01, Assembler::AVX_512bit); | |||
7302 | evpclmulqdq(xmm6, xmm4, xmm11, 0x10, Assembler::AVX_512bit); | |||
7303 | // Needed later in reduction loop | |||
7304 | movdqu(xmm10, Address(table, 1 * 16)); | |||
7305 | vpternlogq(xmm1, 0x96, xmm2, xmm5, Assembler::AVX_512bit); // xor ABC | |||
7306 | vpternlogq(xmm1, 0x96, xmm6, xmm7, Assembler::AVX_512bit); // xor ABC | |||
7307 | ||||
7308 | // Swap 1,0,3,2 - 01 00 11 10 | |||
7309 | evshufi64x2(xmm8, xmm1, xmm1, 0x4e, Assembler::AVX_512bit); | |||
7310 | evpxorq(xmm8, xmm8, xmm1, Assembler::AVX_256bit); | |||
7311 | vextracti128(xmm5, xmm8, 1); | |||
7312 | evpxorq(xmm7, xmm5, xmm8, Assembler::AVX_128bit); | |||
7313 | ||||
7314 | // instead of 128, we add 128 - 16 to the loop counter to save 1 instruction from the loop | |||
7315 | // instead of a cmp instruction, we use the negative flag with the jl instruction | |||
7316 | addl(len, 128 - 16); | |||
7317 | jcc(Assembler::less, L_final_reduction_for_128); | |||
7318 | ||||
7319 | bind(L_16B_reduction_loop); | |||
7320 | vpclmulqdq(xmm8, xmm7, xmm10, 0x01); | |||
7321 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); | |||
7322 | vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit); | |||
7323 | movdqu(xmm0, Address(buf, pos, Address::times_1, 0 * 16)); | |||
7324 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); | |||
7325 | addl(pos, 16); | |||
7326 | subl(len, 16); | |||
7327 | jcc(Assembler::greaterEqual, L_16B_reduction_loop); | |||
7328 | ||||
7329 | bind(L_final_reduction_for_128); | |||
7330 | addl(len, 16); | |||
7331 | jcc(Assembler::equal, L_128_done); | |||
7332 | ||||
7333 | bind(L_get_last_two_xmms); | |||
7334 | movdqu(xmm2, xmm7); | |||
7335 | addl(pos, len); | |||
7336 | movdqu(xmm1, Address(buf, pos, Address::times_1, -16)); | |||
7337 | subl(pos, len); | |||
7338 | ||||
7339 | // get rid of the extra data that was loaded before | |||
7340 | // load the shift constant | |||
7341 | lea(rax, ExternalAddress(StubRoutines::x86::shuf_table_crc32_avx512_addr())); | |||
7342 | movdqu(xmm0, Address(rax, len)); | |||
7343 | addl(rax, len); | |||
7344 | ||||
7345 | vpshufb(xmm7, xmm7, xmm0, Assembler::AVX_128bit); | |||
7346 | //Change mask to 512 | |||
7347 | vpxor(xmm0, xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 2 * 16), Assembler::AVX_128bit, tmp2); | |||
7348 | vpshufb(xmm2, xmm2, xmm0, Assembler::AVX_128bit); | |||
7349 | ||||
7350 | blendvpb(xmm2, xmm2, xmm1, xmm0, Assembler::AVX_128bit); | |||
7351 | vpclmulqdq(xmm8, xmm7, xmm10, 0x01); | |||
7352 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); | |||
7353 | vpxor(xmm7, xmm7, xmm8, Assembler::AVX_128bit); | |||
7354 | vpxor(xmm7, xmm7, xmm2, Assembler::AVX_128bit); | |||
7355 | ||||
7356 | bind(L_128_done); | |||
7357 | // compute crc of a 128-bit value | |||
7358 | movdqu(xmm10, Address(table, 3 * 16)); | |||
7359 | movdqu(xmm0, xmm7); | |||
7360 | ||||
7361 | // 64b fold | |||
7362 | vpclmulqdq(xmm7, xmm7, xmm10, 0x0); | |||
7363 | vpsrldq(xmm0, xmm0, 0x8, Assembler::AVX_128bit); | |||
7364 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); | |||
7365 | ||||
7366 | // 32b fold | |||
7367 | movdqu(xmm0, xmm7); | |||
7368 | vpslldq(xmm7, xmm7, 0x4, Assembler::AVX_128bit); | |||
7369 | vpclmulqdq(xmm7, xmm7, xmm10, 0x10); | |||
7370 | vpxor(xmm7, xmm7, xmm0, Assembler::AVX_128bit); | |||
7371 | jmp(L_barrett); | |||
7372 | ||||
7373 | bind(L_less_than_256); | |||
7374 | kernel_crc32_avx512_256B(crc, buf, len, table, pos, tmp1, tmp2, L_barrett, L_16B_reduction_loop, L_get_last_two_xmms, L_128_done, L_cleanup); | |||
7375 | ||||
7376 | //barrett reduction | |||
7377 | bind(L_barrett); | |||
7378 | vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr() + 1 * 16), Assembler::AVX_128bit, tmp2); | |||
7379 | movdqu(xmm1, xmm7); | |||
7380 | movdqu(xmm2, xmm7); | |||
7381 | movdqu(xmm10, Address(table, 4 * 16)); | |||
7382 | ||||
7383 | pclmulqdq(xmm7, xmm10, 0x0); | |||
7384 | pxor(xmm7, xmm2); | |||
7385 | vpand(xmm7, xmm7, ExternalAddress(StubRoutines::x86::crc_by128_masks_avx512_addr()), Assembler::AVX_128bit, tmp2); | |||
7386 | movdqu(xmm2, xmm7); | |||
7387 | pclmulqdq(xmm7, xmm10, 0x10); | |||
7388 | pxor(xmm7, xmm2); | |||
7389 | pxor(xmm7, xmm1); | |||
7390 | pextrd(crc, xmm7, 2); | |||
7391 | ||||
7392 | bind(L_cleanup); | |||
7393 | addptr(rsp, 16 * 2 + 8); | |||
7394 | pop(r12); | |||
7395 | } | |||
7396 | ||||
7397 | // S. Gueron / Information Processing Letters 112 (2012) 184 | |||
7398 | // Algorithm 4: Computing carry-less multiplication using a precomputed lookup table. | |||
7399 | // Input: A 32 bit value B = [byte3, byte2, byte1, byte0]. | |||
7400 | // Output: the 64-bit carry-less product of B * CONST | |||
7401 | void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n, | |||
7402 | Register tmp1, Register tmp2, Register tmp3) { | |||
7403 | lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr())); | |||
7404 | if (n > 0) { | |||
7405 | addq(tmp3, n * 256 * 8); | |||
7406 | } | |||
7407 | // Q1 = TABLEExt[n][B & 0xFF]; | |||
7408 | movl(tmp1, in); | |||
7409 | andl(tmp1, 0x000000FF); | |||
7410 | shll(tmp1, 3); | |||
7411 | addq(tmp1, tmp3); | |||
7412 | movq(tmp1, Address(tmp1, 0)); | |||
7413 | ||||
7414 | // Q2 = TABLEExt[n][B >> 8 & 0xFF]; | |||
7415 | movl(tmp2, in); | |||
7416 | shrl(tmp2, 8); | |||
7417 | andl(tmp2, 0x000000FF); | |||
7418 | shll(tmp2, 3); | |||
7419 | addq(tmp2, tmp3); | |||
7420 | movq(tmp2, Address(tmp2, 0)); | |||
7421 | ||||
7422 | shlq(tmp2, 8); | |||
7423 | xorq(tmp1, tmp2); | |||
7424 | ||||
7425 | // Q3 = TABLEExt[n][B >> 16 & 0xFF]; | |||
7426 | movl(tmp2, in); | |||
7427 | shrl(tmp2, 16); | |||
7428 | andl(tmp2, 0x000000FF); | |||
7429 | shll(tmp2, 3); | |||
7430 | addq(tmp2, tmp3); | |||
7431 | movq(tmp2, Address(tmp2, 0)); | |||
7432 | ||||
7433 | shlq(tmp2, 16); | |||
7434 | xorq(tmp1, tmp2); | |||
7435 | ||||
7436 | // Q4 = TABLEExt[n][B >> 24 & 0xFF]; | |||
7437 | shrl(in, 24); | |||
7438 | andl(in, 0x000000FF); | |||
7439 | shll(in, 3); | |||
7440 | addq(in, tmp3); | |||
7441 | movq(in, Address(in, 0)); | |||
7442 | ||||
7443 | shlq(in, 24); | |||
7444 | xorq(in, tmp1); | |||
7445 | // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24; | |||
7446 | } | |||
7447 | ||||
7448 | void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1, | |||
7449 | Register in_out, | |||
7450 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, | |||
7451 | XMMRegister w_xtmp2, | |||
7452 | Register tmp1, | |||
7453 | Register n_tmp2, Register n_tmp3) { | |||
7454 | if (is_pclmulqdq_supported) { | |||
7455 | movdl(w_xtmp1, in_out); // modified blindly | |||
7456 | ||||
7457 | movl(tmp1, const_or_pre_comp_const_index); | |||
7458 | movdl(w_xtmp2, tmp1); | |||
7459 | pclmulqdq(w_xtmp1, w_xtmp2, 0); | |||
7460 | ||||
7461 | movdq(in_out, w_xtmp1); | |||
7462 | } else { | |||
7463 | crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3); | |||
7464 | } | |||
7465 | } | |||
7466 | ||||
7467 | // Recombination Alternative 2: No bit-reflections | |||
7468 | // T1 = (CRC_A * U1) << 1 | |||
7469 | // T2 = (CRC_B * U2) << 1 | |||
7470 | // C1 = T1 >> 32 | |||
7471 | // C2 = T2 >> 32 | |||
7472 | // T1 = T1 & 0xFFFFFFFF | |||
7473 | // T2 = T2 & 0xFFFFFFFF | |||
7474 | // T1 = CRC32(0, T1) | |||
7475 | // T2 = CRC32(0, T2) | |||
7476 | // C1 = C1 ^ T1 | |||
7477 | // C2 = C2 ^ T2 | |||
7478 | // CRC = C1 ^ C2 ^ CRC_C | |||
7479 | void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, | |||
7480 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7481 | Register tmp1, Register tmp2, | |||
7482 | Register n_tmp3) { | |||
7483 | crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); | |||
7484 | crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); | |||
7485 | shlq(in_out, 1); | |||
7486 | movl(tmp1, in_out); | |||
7487 | shrq(in_out, 32); | |||
7488 | xorl(tmp2, tmp2); | |||
7489 | crc32(tmp2, tmp1, 4); | |||
7490 | xorl(in_out, tmp2); // we don't care about upper 32 bit contents here | |||
7491 | shlq(in1, 1); | |||
7492 | movl(tmp1, in1); | |||
7493 | shrq(in1, 32); | |||
7494 | xorl(tmp2, tmp2); | |||
7495 | crc32(tmp2, tmp1, 4); | |||
7496 | xorl(in1, tmp2); | |||
7497 | xorl(in_out, in1); | |||
7498 | xorl(in_out, in2); | |||
7499 | } | |||
7500 | ||||
7501 | // Set N to predefined value | |||
7502 | // Subtract from a lenght of a buffer | |||
7503 | // execute in a loop: | |||
7504 | // CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0 | |||
7505 | // for i = 1 to N do | |||
7506 | // CRC_A = CRC32(CRC_A, A[i]) | |||
7507 | // CRC_B = CRC32(CRC_B, B[i]) | |||
7508 | // CRC_C = CRC32(CRC_C, C[i]) | |||
7509 | // end for | |||
7510 | // Recombine | |||
7511 | void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, | |||
7512 | Register in_out1, Register in_out2, Register in_out3, | |||
7513 | Register tmp1, Register tmp2, Register tmp3, | |||
7514 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7515 | Register tmp4, Register tmp5, | |||
7516 | Register n_tmp6) { | |||
7517 | Label L_processPartitions; | |||
7518 | Label L_processPartition; | |||
7519 | Label L_exit; | |||
7520 | ||||
7521 | bind(L_processPartitions); | |||
7522 | cmpl(in_out1, 3 * size); | |||
7523 | jcc(Assembler::less, L_exit); | |||
7524 | xorl(tmp1, tmp1); | |||
7525 | xorl(tmp2, tmp2); | |||
7526 | movq(tmp3, in_out2); | |||
7527 | addq(tmp3, size); | |||
7528 | ||||
7529 | bind(L_processPartition); | |||
7530 | crc32(in_out3, Address(in_out2, 0), 8); | |||
7531 | crc32(tmp1, Address(in_out2, size), 8); | |||
7532 | crc32(tmp2, Address(in_out2, size * 2), 8); | |||
7533 | addq(in_out2, 8); | |||
7534 | cmpq(in_out2, tmp3); | |||
7535 | jcc(Assembler::less, L_processPartition); | |||
7536 | crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2, | |||
7537 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7538 | tmp4, tmp5, | |||
7539 | n_tmp6); | |||
7540 | addq(in_out2, 2 * size); | |||
7541 | subl(in_out1, 3 * size); | |||
7542 | jmp(L_processPartitions); | |||
7543 | ||||
7544 | bind(L_exit); | |||
7545 | } | |||
7546 | #else | |||
7547 | void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n, | |||
7548 | Register tmp1, Register tmp2, Register tmp3, | |||
7549 | XMMRegister xtmp1, XMMRegister xtmp2) { | |||
7550 | lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr())); | |||
7551 | if (n > 0) { | |||
7552 | addl(tmp3, n * 256 * 8); | |||
7553 | } | |||
7554 | // Q1 = TABLEExt[n][B & 0xFF]; | |||
7555 | movl(tmp1, in_out); | |||
7556 | andl(tmp1, 0x000000FF); | |||
7557 | shll(tmp1, 3); | |||
7558 | addl(tmp1, tmp3); | |||
7559 | movq(xtmp1, Address(tmp1, 0)); | |||
7560 | ||||
7561 | // Q2 = TABLEExt[n][B >> 8 & 0xFF]; | |||
7562 | movl(tmp2, in_out); | |||
7563 | shrl(tmp2, 8); | |||
7564 | andl(tmp2, 0x000000FF); | |||
7565 | shll(tmp2, 3); | |||
7566 | addl(tmp2, tmp3); | |||
7567 | movq(xtmp2, Address(tmp2, 0)); | |||
7568 | ||||
7569 | psllq(xtmp2, 8); | |||
7570 | pxor(xtmp1, xtmp2); | |||
7571 | ||||
7572 | // Q3 = TABLEExt[n][B >> 16 & 0xFF]; | |||
7573 | movl(tmp2, in_out); | |||
7574 | shrl(tmp2, 16); | |||
7575 | andl(tmp2, 0x000000FF); | |||
7576 | shll(tmp2, 3); | |||
7577 | addl(tmp2, tmp3); | |||
7578 | movq(xtmp2, Address(tmp2, 0)); | |||
7579 | ||||
7580 | psllq(xtmp2, 16); | |||
7581 | pxor(xtmp1, xtmp2); | |||
7582 | ||||
7583 | // Q4 = TABLEExt[n][B >> 24 & 0xFF]; | |||
7584 | shrl(in_out, 24); | |||
7585 | andl(in_out, 0x000000FF); | |||
7586 | shll(in_out, 3); | |||
7587 | addl(in_out, tmp3); | |||
7588 | movq(xtmp2, Address(in_out, 0)); | |||
7589 | ||||
7590 | psllq(xtmp2, 24); | |||
7591 | pxor(xtmp1, xtmp2); // Result in CXMM | |||
7592 | // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24; | |||
7593 | } | |||
7594 | ||||
7595 | void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1, | |||
7596 | Register in_out, | |||
7597 | uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, | |||
7598 | XMMRegister w_xtmp2, | |||
7599 | Register tmp1, | |||
7600 | Register n_tmp2, Register n_tmp3) { | |||
7601 | if (is_pclmulqdq_supported) { | |||
7602 | movdl(w_xtmp1, in_out); | |||
7603 | ||||
7604 | movl(tmp1, const_or_pre_comp_const_index); | |||
7605 | movdl(w_xtmp2, tmp1); | |||
7606 | pclmulqdq(w_xtmp1, w_xtmp2, 0); | |||
7607 | // Keep result in XMM since GPR is 32 bit in length | |||
7608 | } else { | |||
7609 | crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2); | |||
7610 | } | |||
7611 | } | |||
7612 | ||||
7613 | void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, | |||
7614 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7615 | Register tmp1, Register tmp2, | |||
7616 | Register n_tmp3) { | |||
7617 | crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); | |||
7618 | crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); | |||
7619 | ||||
7620 | psllq(w_xtmp1, 1); | |||
7621 | movdl(tmp1, w_xtmp1); | |||
7622 | psrlq(w_xtmp1, 32); | |||
7623 | movdl(in_out, w_xtmp1); | |||
7624 | ||||
7625 | xorl(tmp2, tmp2); | |||
7626 | crc32(tmp2, tmp1, 4); | |||
7627 | xorl(in_out, tmp2); | |||
7628 | ||||
7629 | psllq(w_xtmp2, 1); | |||
7630 | movdl(tmp1, w_xtmp2); | |||
7631 | psrlq(w_xtmp2, 32); | |||
7632 | movdl(in1, w_xtmp2); | |||
7633 | ||||
7634 | xorl(tmp2, tmp2); | |||
7635 | crc32(tmp2, tmp1, 4); | |||
7636 | xorl(in1, tmp2); | |||
7637 | xorl(in_out, in1); | |||
7638 | xorl(in_out, in2); | |||
7639 | } | |||
7640 | ||||
7641 | void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, | |||
7642 | Register in_out1, Register in_out2, Register in_out3, | |||
7643 | Register tmp1, Register tmp2, Register tmp3, | |||
7644 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7645 | Register tmp4, Register tmp5, | |||
7646 | Register n_tmp6) { | |||
7647 | Label L_processPartitions; | |||
7648 | Label L_processPartition; | |||
7649 | Label L_exit; | |||
7650 | ||||
7651 | bind(L_processPartitions); | |||
7652 | cmpl(in_out1, 3 * size); | |||
7653 | jcc(Assembler::less, L_exit); | |||
7654 | xorl(tmp1, tmp1); | |||
7655 | xorl(tmp2, tmp2); | |||
7656 | movl(tmp3, in_out2); | |||
7657 | addl(tmp3, size); | |||
7658 | ||||
7659 | bind(L_processPartition); | |||
7660 | crc32(in_out3, Address(in_out2, 0), 4); | |||
7661 | crc32(tmp1, Address(in_out2, size), 4); | |||
7662 | crc32(tmp2, Address(in_out2, size*2), 4); | |||
7663 | crc32(in_out3, Address(in_out2, 0+4), 4); | |||
7664 | crc32(tmp1, Address(in_out2, size+4), 4); | |||
7665 | crc32(tmp2, Address(in_out2, size*2+4), 4); | |||
7666 | addl(in_out2, 8); | |||
7667 | cmpl(in_out2, tmp3); | |||
7668 | jcc(Assembler::less, L_processPartition); | |||
7669 | ||||
7670 | push(tmp3); | |||
7671 | push(in_out1); | |||
7672 | push(in_out2); | |||
7673 | tmp4 = tmp3; | |||
7674 | tmp5 = in_out1; | |||
7675 | n_tmp6 = in_out2; | |||
7676 | ||||
7677 | crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2, | |||
7678 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7679 | tmp4, tmp5, | |||
7680 | n_tmp6); | |||
7681 | ||||
7682 | pop(in_out2); | |||
7683 | pop(in_out1); | |||
7684 | pop(tmp3); | |||
7685 | ||||
7686 | addl(in_out2, 2 * size); | |||
7687 | subl(in_out1, 3 * size); | |||
7688 | jmp(L_processPartitions); | |||
7689 | ||||
7690 | bind(L_exit); | |||
7691 | } | |||
7692 | #endif //LP64 | |||
7693 | ||||
7694 | #ifdef _LP641 | |||
7695 | // Algorithm 2: Pipelined usage of the CRC32 instruction. | |||
7696 | // Input: A buffer I of L bytes. | |||
7697 | // Output: the CRC32C value of the buffer. | |||
7698 | // Notations: | |||
7699 | // Write L = 24N + r, with N = floor (L/24). | |||
7700 | // r = L mod 24 (0 <= r < 24). | |||
7701 | // Consider I as the concatenation of A|B|C|R, where A, B, C, each, | |||
7702 | // N quadwords, and R consists of r bytes. | |||
7703 | // A[j] = I [8j+7:8j], j= 0, 1, ..., N-1 | |||
7704 | // B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1 | |||
7705 | // C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1 | |||
7706 | // if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1 | |||
7707 | void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, | |||
7708 | Register tmp1, Register tmp2, Register tmp3, | |||
7709 | Register tmp4, Register tmp5, Register tmp6, | |||
7710 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7711 | bool is_pclmulqdq_supported) { | |||
7712 | uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; | |||
7713 | Label L_wordByWord; | |||
7714 | Label L_byteByByteProlog; | |||
7715 | Label L_byteByByte; | |||
7716 | Label L_exit; | |||
7717 | ||||
7718 | if (is_pclmulqdq_supported ) { | |||
7719 | const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr; | |||
7720 | const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1); | |||
7721 | ||||
7722 | const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2); | |||
7723 | const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3); | |||
7724 | ||||
7725 | const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4); | |||
7726 | const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5); | |||
7727 | assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"")do { if (!((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5)) { ( *g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7727, "assert(" "(CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5" ") failed", "Checking whether you declared all of the constants based on the number of \"chunks\"" ); ::breakpoint(); } } while (0); | |||
7728 | } else { | |||
7729 | const_or_pre_comp_const_index[0] = 1; | |||
7730 | const_or_pre_comp_const_index[1] = 0; | |||
7731 | ||||
7732 | const_or_pre_comp_const_index[2] = 3; | |||
7733 | const_or_pre_comp_const_index[3] = 2; | |||
7734 | ||||
7735 | const_or_pre_comp_const_index[4] = 5; | |||
7736 | const_or_pre_comp_const_index[5] = 4; | |||
7737 | } | |||
7738 | crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported, | |||
7739 | in2, in1, in_out, | |||
7740 | tmp1, tmp2, tmp3, | |||
7741 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7742 | tmp4, tmp5, | |||
7743 | tmp6); | |||
7744 | crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported, | |||
7745 | in2, in1, in_out, | |||
7746 | tmp1, tmp2, tmp3, | |||
7747 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7748 | tmp4, tmp5, | |||
7749 | tmp6); | |||
7750 | crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported, | |||
7751 | in2, in1, in_out, | |||
7752 | tmp1, tmp2, tmp3, | |||
7753 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7754 | tmp4, tmp5, | |||
7755 | tmp6); | |||
7756 | movl(tmp1, in2); | |||
7757 | andl(tmp1, 0x00000007); | |||
7758 | negl(tmp1); | |||
7759 | addl(tmp1, in2); | |||
7760 | addq(tmp1, in1); | |||
7761 | ||||
7762 | BIND(L_wordByWord); | |||
7763 | cmpq(in1, tmp1); | |||
7764 | jcc(Assembler::greaterEqual, L_byteByByteProlog); | |||
7765 | crc32(in_out, Address(in1, 0), 4); | |||
7766 | addq(in1, 4); | |||
7767 | jmp(L_wordByWord); | |||
7768 | ||||
7769 | BIND(L_byteByByteProlog); | |||
7770 | andl(in2, 0x00000007); | |||
7771 | movl(tmp2, 1); | |||
7772 | ||||
7773 | BIND(L_byteByByte); | |||
7774 | cmpl(tmp2, in2); | |||
7775 | jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7775); | |||
7776 | crc32(in_out, Address(in1, 0), 1); | |||
7777 | incq(in1); | |||
7778 | incl(tmp2); | |||
7779 | jmp(L_byteByByte); | |||
7780 | ||||
7781 | BIND(L_exit); | |||
7782 | } | |||
7783 | #else | |||
7784 | void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, | |||
7785 | Register tmp1, Register tmp2, Register tmp3, | |||
7786 | Register tmp4, Register tmp5, Register tmp6, | |||
7787 | XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, | |||
7788 | bool is_pclmulqdq_supported) { | |||
7789 | uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; | |||
7790 | Label L_wordByWord; | |||
7791 | Label L_byteByByteProlog; | |||
7792 | Label L_byteByByte; | |||
7793 | Label L_exit; | |||
7794 | ||||
7795 | if (is_pclmulqdq_supported) { | |||
7796 | const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr; | |||
7797 | const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1); | |||
7798 | ||||
7799 | const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2); | |||
7800 | const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3); | |||
7801 | ||||
7802 | const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4); | |||
7803 | const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5); | |||
7804 | } else { | |||
7805 | const_or_pre_comp_const_index[0] = 1; | |||
7806 | const_or_pre_comp_const_index[1] = 0; | |||
7807 | ||||
7808 | const_or_pre_comp_const_index[2] = 3; | |||
7809 | const_or_pre_comp_const_index[3] = 2; | |||
7810 | ||||
7811 | const_or_pre_comp_const_index[4] = 5; | |||
7812 | const_or_pre_comp_const_index[5] = 4; | |||
7813 | } | |||
7814 | crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported, | |||
7815 | in2, in1, in_out, | |||
7816 | tmp1, tmp2, tmp3, | |||
7817 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7818 | tmp4, tmp5, | |||
7819 | tmp6); | |||
7820 | crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported, | |||
7821 | in2, in1, in_out, | |||
7822 | tmp1, tmp2, tmp3, | |||
7823 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7824 | tmp4, tmp5, | |||
7825 | tmp6); | |||
7826 | crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported, | |||
7827 | in2, in1, in_out, | |||
7828 | tmp1, tmp2, tmp3, | |||
7829 | w_xtmp1, w_xtmp2, w_xtmp3, | |||
7830 | tmp4, tmp5, | |||
7831 | tmp6); | |||
7832 | movl(tmp1, in2); | |||
7833 | andl(tmp1, 0x00000007); | |||
7834 | negl(tmp1); | |||
7835 | addl(tmp1, in2); | |||
7836 | addl(tmp1, in1); | |||
7837 | ||||
7838 | BIND(L_wordByWord); | |||
7839 | cmpl(in1, tmp1); | |||
7840 | jcc(Assembler::greaterEqual, L_byteByByteProlog); | |||
7841 | crc32(in_out, Address(in1,0), 4); | |||
7842 | addl(in1, 4); | |||
7843 | jmp(L_wordByWord); | |||
7844 | ||||
7845 | BIND(L_byteByByteProlog); | |||
7846 | andl(in2, 0x00000007); | |||
7847 | movl(tmp2, 1); | |||
7848 | ||||
7849 | BIND(L_byteByByte); | |||
7850 | cmpl(tmp2, in2); | |||
7851 | jccb(Assembler::greater, L_exit)jccb_0(Assembler::greater, L_exit, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7851); | |||
7852 | movb(tmp1, Address(in1, 0)); | |||
7853 | crc32(in_out, tmp1, 1); | |||
7854 | incl(in1); | |||
7855 | incl(tmp2); | |||
7856 | jmp(L_byteByByte); | |||
7857 | ||||
7858 | BIND(L_exit); | |||
7859 | } | |||
7860 | #endif // LP64 | |||
7861 | #undef BIND | |||
7862 | #undef BLOCK_COMMENT | |||
7863 | ||||
7864 | // Compress char[] array to byte[]. | |||
7865 | // ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java | |||
7866 | // @IntrinsicCandidate | |||
7867 | // private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) { | |||
7868 | // for (int i = 0; i < len; i++) { | |||
7869 | // int c = src[srcOff++]; | |||
7870 | // if (c >>> 8 != 0) { | |||
7871 | // return 0; | |||
7872 | // } | |||
7873 | // dst[dstOff++] = (byte)c; | |||
7874 | // } | |||
7875 | // return len; | |||
7876 | // } | |||
7877 | void MacroAssembler::char_array_compress(Register src, Register dst, Register len, | |||
7878 | XMMRegister tmp1Reg, XMMRegister tmp2Reg, | |||
7879 | XMMRegister tmp3Reg, XMMRegister tmp4Reg, | |||
7880 | Register tmp5, Register result, KRegister mask1, KRegister mask2) { | |||
7881 | Label copy_chars_loop, return_length, return_zero, done; | |||
7882 | ||||
7883 | // rsi: src | |||
7884 | // rdi: dst | |||
7885 | // rdx: len | |||
7886 | // rcx: tmp5 | |||
7887 | // rax: result | |||
7888 | ||||
7889 | // rsi holds start addr of source char[] to be compressed | |||
7890 | // rdi holds start addr of destination byte[] | |||
7891 | // rdx holds length | |||
7892 | ||||
7893 | assert(len != result, "")do { if (!(len != result)) { (*g_assert_poison) = 'X';; report_vm_error ("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 7893, "assert(" "len != result" ") failed", ""); ::breakpoint (); } } while (0); | |||
7894 | ||||
7895 | // save length for return | |||
7896 | push(len); | |||
7897 | ||||
7898 | if ((AVX3Threshold == 0) && (UseAVX > 2) && // AVX512 | |||
7899 | VM_Version::supports_avx512vlbw() && | |||
7900 | VM_Version::supports_bmi2()) { | |||
7901 | ||||
7902 | Label copy_32_loop, copy_loop_tail, below_threshold; | |||
7903 | ||||
7904 | // alignment | |||
7905 | Label post_alignment; | |||
7906 | ||||
7907 | // if length of the string is less than 16, handle it in an old fashioned way | |||
7908 | testl(len, -32); | |||
7909 | jcc(Assembler::zero, below_threshold); | |||
7910 | ||||
7911 | // First check whether a character is compressable ( <= 0xFF). | |||
7912 | // Create mask to test for Unicode chars inside zmm vector | |||
7913 | movl(result, 0x00FF); | |||
7914 | evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit); | |||
7915 | ||||
7916 | testl(len, -64); | |||
7917 | jcc(Assembler::zero, post_alignment); | |||
7918 | ||||
7919 | movl(tmp5, dst); | |||
7920 | andl(tmp5, (32 - 1)); | |||
7921 | negl(tmp5); | |||
7922 | andl(tmp5, (32 - 1)); | |||
7923 | ||||
7924 | // bail out when there is nothing to be done | |||
7925 | testl(tmp5, 0xFFFFFFFF); | |||
7926 | jcc(Assembler::zero, post_alignment); | |||
7927 | ||||
7928 | // ~(~0 << len), where len is the # of remaining elements to process | |||
7929 | movl(result, 0xFFFFFFFF); | |||
7930 | shlxl(result, result, tmp5); | |||
7931 | notl(result); | |||
7932 | kmovdl(mask2, result); | |||
7933 | ||||
7934 | evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); | |||
7935 | evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit); | |||
7936 | ktestd(mask1, mask2); | |||
7937 | jcc(Assembler::carryClear, return_zero); | |||
7938 | ||||
7939 | evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit); | |||
7940 | ||||
7941 | addptr(src, tmp5); | |||
7942 | addptr(src, tmp5); | |||
7943 | addptr(dst, tmp5); | |||
7944 | subl(len, tmp5); | |||
7945 | ||||
7946 | bind(post_alignment); | |||
7947 | // end of alignment | |||
7948 | ||||
7949 | movl(tmp5, len); | |||
7950 | andl(tmp5, (32 - 1)); // tail count (in chars) | |||
7951 | andl(len, ~(32 - 1)); // vector count (in chars) | |||
7952 | jcc(Assembler::zero, copy_loop_tail); | |||
7953 | ||||
7954 | lea(src, Address(src, len, Address::times_2)); | |||
7955 | lea(dst, Address(dst, len, Address::times_1)); | |||
7956 | negptr(len); | |||
7957 | ||||
7958 | bind(copy_32_loop); | |||
7959 | evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); | |||
7960 | evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); | |||
7961 | kortestdl(mask1, mask1); | |||
7962 | jcc(Assembler::carryClear, return_zero); | |||
7963 | ||||
7964 | // All elements in current processed chunk are valid candidates for | |||
7965 | // compression. Write a truncated byte elements to the memory. | |||
7966 | evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit); | |||
7967 | addptr(len, 32); | |||
7968 | jcc(Assembler::notZero, copy_32_loop); | |||
7969 | ||||
7970 | bind(copy_loop_tail); | |||
7971 | // bail out when there is nothing to be done | |||
7972 | testl(tmp5, 0xFFFFFFFF); | |||
7973 | jcc(Assembler::zero, return_length); | |||
7974 | ||||
7975 | movl(len, tmp5); | |||
7976 | ||||
7977 | // ~(~0 << len), where len is the # of remaining elements to process | |||
7978 | movl(result, 0xFFFFFFFF); | |||
7979 | shlxl(result, result, len); | |||
7980 | notl(result); | |||
7981 | ||||
7982 | kmovdl(mask2, result); | |||
7983 | ||||
7984 | evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); | |||
7985 | evpcmpw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, /*signed*/ false, Assembler::AVX_512bit); | |||
7986 | ktestd(mask1, mask2); | |||
7987 | jcc(Assembler::carryClear, return_zero); | |||
7988 | ||||
7989 | evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit); | |||
7990 | jmp(return_length); | |||
7991 | ||||
7992 | bind(below_threshold); | |||
7993 | } | |||
7994 | ||||
7995 | if (UseSSE42Intrinsics) { | |||
7996 | Label copy_32_loop, copy_16, copy_tail; | |||
7997 | ||||
7998 | movl(result, len); | |||
7999 | ||||
8000 | movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors | |||
8001 | ||||
8002 | // vectored compression | |||
8003 | andl(len, 0xfffffff0); // vector count (in chars) | |||
8004 | andl(result, 0x0000000f); // tail count (in chars) | |||
8005 | testl(len, len); | |||
8006 | jcc(Assembler::zero, copy_16); | |||
8007 | ||||
8008 | // compress 16 chars per iter | |||
8009 | movdl(tmp1Reg, tmp5); | |||
8010 | pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg | |||
8011 | pxor(tmp4Reg, tmp4Reg); | |||
8012 | ||||
8013 | lea(src, Address(src, len, Address::times_2)); | |||
8014 | lea(dst, Address(dst, len, Address::times_1)); | |||
8015 | negptr(len); | |||
8016 | ||||
8017 | bind(copy_32_loop); | |||
8018 | movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters | |||
8019 | por(tmp4Reg, tmp2Reg); | |||
8020 | movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters | |||
8021 | por(tmp4Reg, tmp3Reg); | |||
8022 | ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector | |||
8023 | jcc(Assembler::notZero, return_zero); | |||
8024 | packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte | |||
8025 | movdqu(Address(dst, len, Address::times_1), tmp2Reg); | |||
8026 | addptr(len, 16); | |||
8027 | jcc(Assembler::notZero, copy_32_loop); | |||
8028 | ||||
8029 | // compress next vector of 8 chars (if any) | |||
8030 | bind(copy_16); | |||
8031 | movl(len, result); | |||
8032 | andl(len, 0xfffffff8); // vector count (in chars) | |||
8033 | andl(result, 0x00000007); // tail count (in chars) | |||
8034 | testl(len, len); | |||
8035 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8035); | |||
8036 | ||||
8037 | movdl(tmp1Reg, tmp5); | |||
8038 | pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg | |||
8039 | pxor(tmp3Reg, tmp3Reg); | |||
8040 | ||||
8041 | movdqu(tmp2Reg, Address(src, 0)); | |||
8042 | ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector | |||
8043 | jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8043); | |||
8044 | packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte | |||
8045 | movq(Address(dst, 0), tmp2Reg); | |||
8046 | addptr(src, 16); | |||
8047 | addptr(dst, 8); | |||
8048 | ||||
8049 | bind(copy_tail); | |||
8050 | movl(len, result); | |||
8051 | } | |||
8052 | // compress 1 char per iter | |||
8053 | testl(len, len); | |||
8054 | jccb(Assembler::zero, return_length)jccb_0(Assembler::zero, return_length, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8054); | |||
8055 | lea(src, Address(src, len, Address::times_2)); | |||
8056 | lea(dst, Address(dst, len, Address::times_1)); | |||
8057 | negptr(len); | |||
8058 | ||||
8059 | bind(copy_chars_loop); | |||
8060 | load_unsigned_short(result, Address(src, len, Address::times_2)); | |||
8061 | testl(result, 0xff00); // check if Unicode char | |||
8062 | jccb(Assembler::notZero, return_zero)jccb_0(Assembler::notZero, return_zero, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8062); | |||
8063 | movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte | |||
8064 | increment(len); | |||
8065 | jcc(Assembler::notZero, copy_chars_loop); | |||
8066 | ||||
8067 | // if compression succeeded, return length | |||
8068 | bind(return_length); | |||
8069 | pop(result); | |||
8070 | jmpb(done)jmpb_0(done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8070); | |||
8071 | ||||
8072 | // if compression failed, return 0 | |||
8073 | bind(return_zero); | |||
8074 | xorl(result, result); | |||
8075 | addptr(rsp, wordSize); | |||
8076 | ||||
8077 | bind(done); | |||
8078 | } | |||
8079 | ||||
8080 | // Inflate byte[] array to char[]. | |||
8081 | // ..\jdk\src\java.base\share\classes\java\lang\StringLatin1.java | |||
8082 | // @IntrinsicCandidate | |||
8083 | // private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) { | |||
8084 | // for (int i = 0; i < len; i++) { | |||
8085 | // dst[dstOff++] = (char)(src[srcOff++] & 0xff); | |||
8086 | // } | |||
8087 | // } | |||
8088 | void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, | |||
8089 | XMMRegister tmp1, Register tmp2, KRegister mask) { | |||
8090 | Label copy_chars_loop, done, below_threshold, avx3_threshold; | |||
8091 | // rsi: src | |||
8092 | // rdi: dst | |||
8093 | // rdx: len | |||
8094 | // rcx: tmp2 | |||
8095 | ||||
8096 | // rsi holds start addr of source byte[] to be inflated | |||
8097 | // rdi holds start addr of destination char[] | |||
8098 | // rdx holds length | |||
8099 | assert_different_registers(src, dst, len, tmp2); | |||
8100 | movl(tmp2, len); | |||
8101 | if ((UseAVX > 2) && // AVX512 | |||
8102 | VM_Version::supports_avx512vlbw() && | |||
8103 | VM_Version::supports_bmi2()) { | |||
8104 | ||||
8105 | Label copy_32_loop, copy_tail; | |||
8106 | Register tmp3_aliased = len; | |||
8107 | ||||
8108 | // if length of the string is less than 16, handle it in an old fashioned way | |||
8109 | testl(len, -16); | |||
8110 | jcc(Assembler::zero, below_threshold); | |||
8111 | ||||
8112 | testl(len, -1 * AVX3Threshold); | |||
8113 | jcc(Assembler::zero, avx3_threshold); | |||
8114 | ||||
8115 | // In order to use only one arithmetic operation for the main loop we use | |||
8116 | // this pre-calculation | |||
8117 | andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop | |||
8118 | andl(len, -32); // vector count | |||
8119 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8119); | |||
8120 | ||||
8121 | lea(src, Address(src, len, Address::times_1)); | |||
8122 | lea(dst, Address(dst, len, Address::times_2)); | |||
8123 | negptr(len); | |||
8124 | ||||
8125 | ||||
8126 | // inflate 32 chars per iter | |||
8127 | bind(copy_32_loop); | |||
8128 | vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); | |||
8129 | evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); | |||
8130 | addptr(len, 32); | |||
8131 | jcc(Assembler::notZero, copy_32_loop); | |||
8132 | ||||
8133 | bind(copy_tail); | |||
8134 | // bail out when there is nothing to be done | |||
8135 | testl(tmp2, -1); // we don't destroy the contents of tmp2 here | |||
8136 | jcc(Assembler::zero, done); | |||
8137 | ||||
8138 | // ~(~0 << length), where length is the # of remaining elements to process | |||
8139 | movl(tmp3_aliased, -1); | |||
8140 | shlxl(tmp3_aliased, tmp3_aliased, tmp2); | |||
8141 | notl(tmp3_aliased); | |||
8142 | kmovdl(mask, tmp3_aliased); | |||
8143 | evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit); | |||
8144 | evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit); | |||
8145 | ||||
8146 | jmp(done); | |||
8147 | bind(avx3_threshold); | |||
8148 | } | |||
8149 | if (UseSSE42Intrinsics) { | |||
8150 | Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail; | |||
8151 | ||||
8152 | if (UseAVX > 1) { | |||
8153 | andl(tmp2, (16 - 1)); | |||
8154 | andl(len, -16); | |||
8155 | jccb(Assembler::zero, copy_new_tail)jccb_0(Assembler::zero, copy_new_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8155); | |||
8156 | } else { | |||
8157 | andl(tmp2, 0x00000007); // tail count (in chars) | |||
8158 | andl(len, 0xfffffff8); // vector count (in chars) | |||
8159 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8159); | |||
8160 | } | |||
8161 | ||||
8162 | // vectored inflation | |||
8163 | lea(src, Address(src, len, Address::times_1)); | |||
8164 | lea(dst, Address(dst, len, Address::times_2)); | |||
8165 | negptr(len); | |||
8166 | ||||
8167 | if (UseAVX > 1) { | |||
8168 | bind(copy_16_loop); | |||
8169 | vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_256bit); | |||
8170 | vmovdqu(Address(dst, len, Address::times_2), tmp1); | |||
8171 | addptr(len, 16); | |||
8172 | jcc(Assembler::notZero, copy_16_loop); | |||
8173 | ||||
8174 | bind(below_threshold); | |||
8175 | bind(copy_new_tail); | |||
8176 | movl(len, tmp2); | |||
8177 | andl(tmp2, 0x00000007); | |||
8178 | andl(len, 0xFFFFFFF8); | |||
8179 | jccb(Assembler::zero, copy_tail)jccb_0(Assembler::zero, copy_tail, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8179); | |||
8180 | ||||
8181 | pmovzxbw(tmp1, Address(src, 0)); | |||
8182 | movdqu(Address(dst, 0), tmp1); | |||
8183 | addptr(src, 8); | |||
8184 | addptr(dst, 2 * 8); | |||
8185 | ||||
8186 | jmp(copy_tail, true); | |||
8187 | } | |||
8188 | ||||
8189 | // inflate 8 chars per iter | |||
8190 | bind(copy_8_loop); | |||
8191 | pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words | |||
8192 | movdqu(Address(dst, len, Address::times_2), tmp1); | |||
8193 | addptr(len, 8); | |||
8194 | jcc(Assembler::notZero, copy_8_loop); | |||
8195 | ||||
8196 | bind(copy_tail); | |||
8197 | movl(len, tmp2); | |||
8198 | ||||
8199 | cmpl(len, 4); | |||
8200 | jccb(Assembler::less, copy_bytes)jccb_0(Assembler::less, copy_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8200); | |||
8201 | ||||
8202 | movdl(tmp1, Address(src, 0)); // load 4 byte chars | |||
8203 | pmovzxbw(tmp1, tmp1); | |||
8204 | movq(Address(dst, 0), tmp1); | |||
8205 | subptr(len, 4); | |||
8206 | addptr(src, 4); | |||
8207 | addptr(dst, 8); | |||
8208 | ||||
8209 | bind(copy_bytes); | |||
8210 | } else { | |||
8211 | bind(below_threshold); | |||
8212 | } | |||
8213 | ||||
8214 | testl(len, len); | |||
8215 | jccb(Assembler::zero, done)jccb_0(Assembler::zero, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8215); | |||
8216 | lea(src, Address(src, len, Address::times_1)); | |||
8217 | lea(dst, Address(dst, len, Address::times_2)); | |||
8218 | negptr(len); | |||
8219 | ||||
8220 | // inflate 1 char per iter | |||
8221 | bind(copy_chars_loop); | |||
8222 | load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char | |||
8223 | movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word | |||
8224 | increment(len); | |||
8225 | jcc(Assembler::notZero, copy_chars_loop); | |||
8226 | ||||
8227 | bind(done); | |||
8228 | } | |||
8229 | ||||
8230 | ||||
8231 | void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, int vector_len) { | |||
8232 | switch(type) { | |||
8233 | case T_BYTE: | |||
8234 | case T_BOOLEAN: | |||
8235 | evmovdqub(dst, kmask, src, false, vector_len); | |||
8236 | break; | |||
8237 | case T_CHAR: | |||
8238 | case T_SHORT: | |||
8239 | evmovdquw(dst, kmask, src, false, vector_len); | |||
8240 | break; | |||
8241 | case T_INT: | |||
8242 | case T_FLOAT: | |||
8243 | evmovdqul(dst, kmask, src, false, vector_len); | |||
8244 | break; | |||
8245 | case T_LONG: | |||
8246 | case T_DOUBLE: | |||
8247 | evmovdquq(dst, kmask, src, false, vector_len); | |||
8248 | break; | |||
8249 | default: | |||
8250 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8250, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); | |||
8251 | break; | |||
8252 | } | |||
8253 | } | |||
8254 | ||||
8255 | void MacroAssembler::evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, int vector_len) { | |||
8256 | switch(type) { | |||
8257 | case T_BYTE: | |||
8258 | case T_BOOLEAN: | |||
8259 | evmovdqub(dst, kmask, src, true, vector_len); | |||
8260 | break; | |||
8261 | case T_CHAR: | |||
8262 | case T_SHORT: | |||
8263 | evmovdquw(dst, kmask, src, true, vector_len); | |||
8264 | break; | |||
8265 | case T_INT: | |||
8266 | case T_FLOAT: | |||
8267 | evmovdqul(dst, kmask, src, true, vector_len); | |||
8268 | break; | |||
8269 | case T_LONG: | |||
8270 | case T_DOUBLE: | |||
8271 | evmovdquq(dst, kmask, src, true, vector_len); | |||
8272 | break; | |||
8273 | default: | |||
8274 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8274, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); | |||
8275 | break; | |||
8276 | } | |||
8277 | } | |||
8278 | ||||
8279 | void MacroAssembler::knot(uint masklen, KRegister dst, KRegister src, KRegister ktmp, Register rtmp) { | |||
8280 | switch(masklen) { | |||
8281 | case 2: | |||
8282 | knotbl(dst, src); | |||
8283 | movl(rtmp, 3); | |||
8284 | kmovbl(ktmp, rtmp); | |||
8285 | kandbl(dst, ktmp, dst); | |||
8286 | break; | |||
8287 | case 4: | |||
8288 | knotbl(dst, src); | |||
8289 | movl(rtmp, 15); | |||
8290 | kmovbl(ktmp, rtmp); | |||
8291 | kandbl(dst, ktmp, dst); | |||
8292 | break; | |||
8293 | case 8: | |||
8294 | knotbl(dst, src); | |||
8295 | break; | |||
8296 | case 16: | |||
8297 | knotwl(dst, src); | |||
8298 | break; | |||
8299 | case 32: | |||
8300 | knotdl(dst, src); | |||
8301 | break; | |||
8302 | case 64: | |||
8303 | knotql(dst, src); | |||
8304 | break; | |||
8305 | default: | |||
8306 | fatal("Unexpected vector length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8306, "Unexpected vector length %d", masklen); ::breakpoint (); } while (0); | |||
8307 | break; | |||
8308 | } | |||
8309 | } | |||
8310 | ||||
8311 | void MacroAssembler::kand(BasicType type, KRegister dst, KRegister src1, KRegister src2) { | |||
8312 | switch(type) { | |||
8313 | case T_BOOLEAN: | |||
8314 | case T_BYTE: | |||
8315 | kandbl(dst, src1, src2); | |||
8316 | break; | |||
8317 | case T_CHAR: | |||
8318 | case T_SHORT: | |||
8319 | kandwl(dst, src1, src2); | |||
8320 | break; | |||
8321 | case T_INT: | |||
8322 | case T_FLOAT: | |||
8323 | kanddl(dst, src1, src2); | |||
8324 | break; | |||
8325 | case T_LONG: | |||
8326 | case T_DOUBLE: | |||
8327 | kandql(dst, src1, src2); | |||
8328 | break; | |||
8329 | default: | |||
8330 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8330, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); | |||
8331 | break; | |||
8332 | } | |||
8333 | } | |||
8334 | ||||
8335 | void MacroAssembler::kor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { | |||
8336 | switch(type) { | |||
8337 | case T_BOOLEAN: | |||
8338 | case T_BYTE: | |||
8339 | korbl(dst, src1, src2); | |||
8340 | break; | |||
8341 | case T_CHAR: | |||
8342 | case T_SHORT: | |||
8343 | korwl(dst, src1, src2); | |||
8344 | break; | |||
8345 | case T_INT: | |||
8346 | case T_FLOAT: | |||
8347 | kordl(dst, src1, src2); | |||
8348 | break; | |||
8349 | case T_LONG: | |||
8350 | case T_DOUBLE: | |||
8351 | korql(dst, src1, src2); | |||
8352 | break; | |||
8353 | default: | |||
8354 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8354, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); | |||
8355 | break; | |||
8356 | } | |||
8357 | } | |||
8358 | ||||
8359 | void MacroAssembler::kxor(BasicType type, KRegister dst, KRegister src1, KRegister src2) { | |||
8360 | switch(type) { | |||
8361 | case T_BOOLEAN: | |||
8362 | case T_BYTE: | |||
8363 | kxorbl(dst, src1, src2); | |||
8364 | break; | |||
8365 | case T_CHAR: | |||
8366 | case T_SHORT: | |||
8367 | kxorwl(dst, src1, src2); | |||
8368 | break; | |||
8369 | case T_INT: | |||
8370 | case T_FLOAT: | |||
8371 | kxordl(dst, src1, src2); | |||
8372 | break; | |||
8373 | case T_LONG: | |||
8374 | case T_DOUBLE: | |||
8375 | kxorql(dst, src1, src2); | |||
8376 | break; | |||
8377 | default: | |||
8378 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8378, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); | |||
8379 | break; | |||
8380 | } | |||
8381 | } | |||
8382 | ||||
8383 | void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8384 | switch(type) { | |||
8385 | case T_BOOLEAN: | |||
8386 | case T_BYTE: | |||
8387 | evpermb(dst, mask, nds, src, merge, vector_len); break; | |||
8388 | case T_CHAR: | |||
8389 | case T_SHORT: | |||
8390 | evpermw(dst, mask, nds, src, merge, vector_len); break; | |||
8391 | case T_INT: | |||
8392 | case T_FLOAT: | |||
8393 | evpermd(dst, mask, nds, src, merge, vector_len); break; | |||
8394 | case T_LONG: | |||
8395 | case T_DOUBLE: | |||
8396 | evpermq(dst, mask, nds, src, merge, vector_len); break; | |||
8397 | default: | |||
8398 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8398, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8399 | } | |||
8400 | } | |||
8401 | ||||
8402 | void MacroAssembler::evperm(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8403 | switch(type) { | |||
8404 | case T_BOOLEAN: | |||
8405 | case T_BYTE: | |||
8406 | evpermb(dst, mask, nds, src, merge, vector_len); break; | |||
8407 | case T_CHAR: | |||
8408 | case T_SHORT: | |||
8409 | evpermw(dst, mask, nds, src, merge, vector_len); break; | |||
8410 | case T_INT: | |||
8411 | case T_FLOAT: | |||
8412 | evpermd(dst, mask, nds, src, merge, vector_len); break; | |||
8413 | case T_LONG: | |||
8414 | case T_DOUBLE: | |||
8415 | evpermq(dst, mask, nds, src, merge, vector_len); break; | |||
8416 | default: | |||
8417 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8417, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8418 | } | |||
8419 | } | |||
8420 | ||||
8421 | void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8422 | switch(type) { | |||
8423 | case T_BYTE: | |||
8424 | evpminsb(dst, mask, nds, src, merge, vector_len); break; | |||
8425 | case T_SHORT: | |||
8426 | evpminsw(dst, mask, nds, src, merge, vector_len); break; | |||
8427 | case T_INT: | |||
8428 | evpminsd(dst, mask, nds, src, merge, vector_len); break; | |||
8429 | case T_LONG: | |||
8430 | evpminsq(dst, mask, nds, src, merge, vector_len); break; | |||
8431 | default: | |||
8432 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8432, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8433 | } | |||
8434 | } | |||
8435 | ||||
8436 | void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8437 | switch(type) { | |||
8438 | case T_BYTE: | |||
8439 | evpmaxsb(dst, mask, nds, src, merge, vector_len); break; | |||
8440 | case T_SHORT: | |||
8441 | evpmaxsw(dst, mask, nds, src, merge, vector_len); break; | |||
8442 | case T_INT: | |||
8443 | evpmaxsd(dst, mask, nds, src, merge, vector_len); break; | |||
8444 | case T_LONG: | |||
8445 | evpmaxsq(dst, mask, nds, src, merge, vector_len); break; | |||
8446 | default: | |||
8447 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8447, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8448 | } | |||
8449 | } | |||
8450 | ||||
8451 | void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8452 | switch(type) { | |||
8453 | case T_BYTE: | |||
8454 | evpminsb(dst, mask, nds, src, merge, vector_len); break; | |||
8455 | case T_SHORT: | |||
8456 | evpminsw(dst, mask, nds, src, merge, vector_len); break; | |||
8457 | case T_INT: | |||
8458 | evpminsd(dst, mask, nds, src, merge, vector_len); break; | |||
8459 | case T_LONG: | |||
8460 | evpminsq(dst, mask, nds, src, merge, vector_len); break; | |||
8461 | default: | |||
8462 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8462, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8463 | } | |||
8464 | } | |||
8465 | ||||
8466 | void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8467 | switch(type) { | |||
8468 | case T_BYTE: | |||
8469 | evpmaxsb(dst, mask, nds, src, merge, vector_len); break; | |||
8470 | case T_SHORT: | |||
8471 | evpmaxsw(dst, mask, nds, src, merge, vector_len); break; | |||
8472 | case T_INT: | |||
8473 | evpmaxsd(dst, mask, nds, src, merge, vector_len); break; | |||
8474 | case T_LONG: | |||
8475 | evpmaxsq(dst, mask, nds, src, merge, vector_len); break; | |||
8476 | default: | |||
8477 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8477, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8478 | } | |||
8479 | } | |||
8480 | ||||
8481 | void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8482 | switch(type) { | |||
8483 | case T_INT: | |||
8484 | evpxord(dst, mask, nds, src, merge, vector_len); break; | |||
8485 | case T_LONG: | |||
8486 | evpxorq(dst, mask, nds, src, merge, vector_len); break; | |||
8487 | default: | |||
8488 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8488, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8489 | } | |||
8490 | } | |||
8491 | ||||
8492 | void MacroAssembler::evxor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8493 | switch(type) { | |||
8494 | case T_INT: | |||
8495 | evpxord(dst, mask, nds, src, merge, vector_len); break; | |||
8496 | case T_LONG: | |||
8497 | evpxorq(dst, mask, nds, src, merge, vector_len); break; | |||
8498 | default: | |||
8499 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8499, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8500 | } | |||
8501 | } | |||
8502 | ||||
8503 | void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8504 | switch(type) { | |||
8505 | case T_INT: | |||
8506 | Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; | |||
8507 | case T_LONG: | |||
8508 | evporq(dst, mask, nds, src, merge, vector_len); break; | |||
8509 | default: | |||
8510 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8510, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8511 | } | |||
8512 | } | |||
8513 | ||||
8514 | void MacroAssembler::evor(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8515 | switch(type) { | |||
8516 | case T_INT: | |||
8517 | Assembler::evpord(dst, mask, nds, src, merge, vector_len); break; | |||
8518 | case T_LONG: | |||
8519 | evporq(dst, mask, nds, src, merge, vector_len); break; | |||
8520 | default: | |||
8521 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8521, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8522 | } | |||
8523 | } | |||
8524 | ||||
8525 | void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { | |||
8526 | switch(type) { | |||
8527 | case T_INT: | |||
8528 | evpandd(dst, mask, nds, src, merge, vector_len); break; | |||
8529 | case T_LONG: | |||
8530 | evpandq(dst, mask, nds, src, merge, vector_len); break; | |||
8531 | default: | |||
8532 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8532, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8533 | } | |||
8534 | } | |||
8535 | ||||
8536 | void MacroAssembler::evand(BasicType type, XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { | |||
8537 | switch(type) { | |||
8538 | case T_INT: | |||
8539 | evpandd(dst, mask, nds, src, merge, vector_len); break; | |||
8540 | case T_LONG: | |||
8541 | evpandq(dst, mask, nds, src, merge, vector_len); break; | |||
8542 | default: | |||
8543 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8543, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8544 | } | |||
8545 | } | |||
8546 | ||||
8547 | void MacroAssembler::anytrue(Register dst, uint masklen, KRegister src1, KRegister src2) { | |||
8548 | masklen = masklen < 8 ? 8 : masklen; | |||
8549 | ktest(masklen, src1, src2); | |||
8550 | setb(Assembler::notZero, dst); | |||
8551 | movzbl(dst, dst); | |||
8552 | } | |||
8553 | ||||
8554 | void MacroAssembler::alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch) { | |||
8555 | if (masklen < 8) { | |||
8556 | knotbl(kscratch, src2); | |||
8557 | kortestbl(src1, kscratch); | |||
8558 | setb(Assembler::carrySet, dst); | |||
8559 | movzbl(dst, dst); | |||
8560 | } else { | |||
8561 | ktest(masklen, src1, src2); | |||
8562 | setb(Assembler::carrySet, dst); | |||
8563 | movzbl(dst, dst); | |||
8564 | } | |||
8565 | } | |||
8566 | ||||
8567 | void MacroAssembler::kortest(uint masklen, KRegister src1, KRegister src2) { | |||
8568 | switch(masklen) { | |||
8569 | case 8: | |||
8570 | kortestbl(src1, src2); | |||
8571 | break; | |||
8572 | case 16: | |||
8573 | kortestwl(src1, src2); | |||
8574 | break; | |||
8575 | case 32: | |||
8576 | kortestdl(src1, src2); | |||
8577 | break; | |||
8578 | case 64: | |||
8579 | kortestql(src1, src2); | |||
8580 | break; | |||
8581 | default: | |||
8582 | fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8582, "Unexpected mask length %d", masklen); ::breakpoint() ; } while (0); | |||
8583 | break; | |||
8584 | } | |||
8585 | } | |||
8586 | ||||
8587 | ||||
8588 | void MacroAssembler::ktest(uint masklen, KRegister src1, KRegister src2) { | |||
8589 | switch(masklen) { | |||
8590 | case 8: | |||
8591 | ktestbl(src1, src2); | |||
8592 | break; | |||
8593 | case 16: | |||
8594 | ktestwl(src1, src2); | |||
8595 | break; | |||
8596 | case 32: | |||
8597 | ktestdl(src1, src2); | |||
8598 | break; | |||
8599 | case 64: | |||
8600 | ktestql(src1, src2); | |||
8601 | break; | |||
8602 | default: | |||
8603 | fatal("Unexpected mask length %d", masklen)do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8603, "Unexpected mask length %d", masklen); ::breakpoint() ; } while (0); | |||
8604 | break; | |||
8605 | } | |||
8606 | } | |||
8607 | ||||
8608 | void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { | |||
8609 | switch(type) { | |||
8610 | case T_INT: | |||
8611 | evprold(dst, mask, src, shift, merge, vlen_enc); break; | |||
8612 | case T_LONG: | |||
8613 | evprolq(dst, mask, src, shift, merge, vlen_enc); break; | |||
8614 | default: | |||
8615 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8615, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8616 | break; | |||
8617 | } | |||
8618 | } | |||
8619 | ||||
8620 | void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc) { | |||
8621 | switch(type) { | |||
8622 | case T_INT: | |||
8623 | evprord(dst, mask, src, shift, merge, vlen_enc); break; | |||
8624 | case T_LONG: | |||
8625 | evprorq(dst, mask, src, shift, merge, vlen_enc); break; | |||
8626 | default: | |||
8627 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8627, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8628 | } | |||
8629 | } | |||
8630 | ||||
8631 | void MacroAssembler::evrold(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { | |||
8632 | switch(type) { | |||
8633 | case T_INT: | |||
8634 | evprolvd(dst, mask, src1, src2, merge, vlen_enc); break; | |||
8635 | case T_LONG: | |||
8636 | evprolvq(dst, mask, src1, src2, merge, vlen_enc); break; | |||
8637 | default: | |||
8638 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8638, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8639 | } | |||
8640 | } | |||
8641 | ||||
8642 | void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc) { | |||
8643 | switch(type) { | |||
8644 | case T_INT: | |||
8645 | evprorvd(dst, mask, src1, src2, merge, vlen_enc); break; | |||
8646 | case T_LONG: | |||
8647 | evprorvq(dst, mask, src1, src2, merge, vlen_enc); break; | |||
8648 | default: | |||
8649 | fatal("Unexpected type argument %s", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8649, "Unexpected type argument %s", type2name(type)); ::breakpoint (); } while (0); break; | |||
8650 | } | |||
8651 | } | |||
8652 | #if COMPILER2_OR_JVMCI1 | |||
8653 | ||||
8654 | void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask, | |||
8655 | Register length, Register temp, int vec_enc) { | |||
8656 | // Computing mask for predicated vector store. | |||
8657 | movptr(temp, -1); | |||
8658 | bzhiq(temp, temp, length); | |||
8659 | kmov(mask, temp); | |||
8660 | evmovdqu(bt, mask, dst, xmm, vec_enc); | |||
8661 | } | |||
8662 | ||||
8663 | // Set memory operation for length "less than" 64 bytes. | |||
8664 | void MacroAssembler::fill64_masked(uint shift, Register dst, int disp, | |||
8665 | XMMRegister xmm, KRegister mask, Register length, | |||
8666 | Register temp, bool use64byteVector) { | |||
8667 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8667, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); | |||
8668 | BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; | |||
8669 | if (!use64byteVector) { | |||
8670 | fill32(dst, disp, xmm); | |||
8671 | subptr(length, 32 >> shift); | |||
8672 | fill32_masked(shift, dst, disp + 32, xmm, mask, length, temp); | |||
8673 | } else { | |||
8674 | assert(MaxVectorSize == 64, "vector length != 64")do { if (!(MaxVectorSize == 64)) { (*g_assert_poison) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8674, "assert(" "MaxVectorSize == 64" ") failed", "vector length != 64" ); ::breakpoint(); } } while (0); | |||
8675 | fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_512bit); | |||
8676 | } | |||
8677 | } | |||
8678 | ||||
8679 | ||||
8680 | void MacroAssembler::fill32_masked(uint shift, Register dst, int disp, | |||
8681 | XMMRegister xmm, KRegister mask, Register length, | |||
8682 | Register temp) { | |||
8683 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8683, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); | |||
8684 | BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; | |||
8685 | fill_masked(type[shift], Address(dst, disp), xmm, mask, length, temp, Assembler::AVX_256bit); | |||
8686 | } | |||
8687 | ||||
8688 | ||||
8689 | void MacroAssembler::fill32(Register dst, int disp, XMMRegister xmm) { | |||
8690 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8690, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); | |||
8691 | vmovdqu(Address(dst, disp), xmm); | |||
8692 | } | |||
8693 | ||||
8694 | void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64byteVector) { | |||
8695 | assert(MaxVectorSize >= 32, "vector length should be >= 32")do { if (!(MaxVectorSize >= 32)) { (*g_assert_poison) = 'X' ;; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8695, "assert(" "MaxVectorSize >= 32" ") failed", "vector length should be >= 32" ); ::breakpoint(); } } while (0); | |||
8696 | BasicType type[] = {T_BYTE, T_SHORT, T_INT, T_LONG}; | |||
8697 | if (!use64byteVector) { | |||
8698 | fill32(dst, disp, xmm); | |||
8699 | fill32(dst, disp + 32, xmm); | |||
8700 | } else { | |||
8701 | evmovdquq(Address(dst, disp), xmm, Assembler::AVX_512bit); | |||
8702 | } | |||
8703 | } | |||
8704 | ||||
8705 | #ifdef _LP641 | |||
8706 | void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value, | |||
8707 | Register count, Register rtmp, XMMRegister xtmp) { | |||
8708 | Label L_exit; | |||
8709 | Label L_fill_start; | |||
8710 | Label L_fill_64_bytes; | |||
8711 | Label L_fill_96_bytes; | |||
8712 | Label L_fill_128_bytes; | |||
8713 | Label L_fill_128_bytes_loop; | |||
8714 | Label L_fill_128_loop_header; | |||
8715 | Label L_fill_128_bytes_loop_header; | |||
8716 | Label L_fill_128_bytes_loop_pre_header; | |||
8717 | Label L_fill_zmm_sequence; | |||
8718 | ||||
8719 | int shift = -1; | |||
8720 | int avx3threshold = VM_Version::avx3_threshold(); | |||
8721 | switch(type) { | |||
8722 | case T_BYTE: shift = 0; | |||
8723 | break; | |||
8724 | case T_SHORT: shift = 1; | |||
8725 | break; | |||
8726 | case T_INT: shift = 2; | |||
8727 | break; | |||
8728 | /* Uncomment when LONG fill stubs are supported. | |||
8729 | case T_LONG: shift = 3; | |||
8730 | break; | |||
8731 | */ | |||
8732 | default: | |||
8733 | fatal("Unhandled type: %s\n", type2name(type))do { (*g_assert_poison) = 'X';; report_fatal(INTERNAL_ERROR, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8733, "Unhandled type: %s\n", type2name(type)); ::breakpoint (); } while (0); | |||
8734 | } | |||
8735 | ||||
8736 | if ((avx3threshold != 0) || (MaxVectorSize == 32)) { | |||
8737 | ||||
8738 | if (MaxVectorSize == 64) { | |||
8739 | cmpq(count, avx3threshold >> shift); | |||
8740 | jcc(Assembler::greater, L_fill_zmm_sequence); | |||
8741 | } | |||
8742 | ||||
8743 | evpbroadcast(type, xtmp, value, Assembler::AVX_256bit); | |||
8744 | ||||
8745 | bind(L_fill_start); | |||
8746 | ||||
8747 | cmpq(count, 32 >> shift); | |||
8748 | jccb(Assembler::greater, L_fill_64_bytes)jccb_0(Assembler::greater, L_fill_64_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8748); | |||
8749 | fill32_masked(shift, to, 0, xtmp, k2, count, rtmp); | |||
8750 | jmp(L_exit); | |||
8751 | ||||
8752 | bind(L_fill_64_bytes); | |||
8753 | cmpq(count, 64 >> shift); | |||
8754 | jccb(Assembler::greater, L_fill_96_bytes)jccb_0(Assembler::greater, L_fill_96_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8754); | |||
8755 | fill64_masked(shift, to, 0, xtmp, k2, count, rtmp); | |||
8756 | jmp(L_exit); | |||
8757 | ||||
8758 | bind(L_fill_96_bytes); | |||
8759 | cmpq(count, 96 >> shift); | |||
8760 | jccb(Assembler::greater, L_fill_128_bytes)jccb_0(Assembler::greater, L_fill_128_bytes, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8760); | |||
8761 | fill64(to, 0, xtmp); | |||
8762 | subq(count, 64 >> shift); | |||
8763 | fill32_masked(shift, to, 64, xtmp, k2, count, rtmp); | |||
8764 | jmp(L_exit); | |||
8765 | ||||
8766 | bind(L_fill_128_bytes); | |||
8767 | cmpq(count, 128 >> shift); | |||
8768 | jccb(Assembler::greater, L_fill_128_bytes_loop_pre_header)jccb_0(Assembler::greater, L_fill_128_bytes_loop_pre_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8768); | |||
8769 | fill64(to, 0, xtmp); | |||
8770 | fill32(to, 64, xtmp); | |||
8771 | subq(count, 96 >> shift); | |||
8772 | fill32_masked(shift, to, 96, xtmp, k2, count, rtmp); | |||
8773 | jmp(L_exit); | |||
8774 | ||||
8775 | bind(L_fill_128_bytes_loop_pre_header); | |||
8776 | { | |||
8777 | mov(rtmp, to); | |||
8778 | andq(rtmp, 31); | |||
8779 | jccb(Assembler::zero, L_fill_128_bytes_loop_header)jccb_0(Assembler::zero, L_fill_128_bytes_loop_header, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8779); | |||
8780 | negq(rtmp); | |||
8781 | addq(rtmp, 32); | |||
8782 | mov64(r8, -1L); | |||
8783 | bzhiq(r8, r8, rtmp); | |||
8784 | kmovql(k2, r8); | |||
8785 | evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_256bit); | |||
8786 | addq(to, rtmp); | |||
8787 | shrq(rtmp, shift); | |||
8788 | subq(count, rtmp); | |||
8789 | } | |||
8790 | ||||
8791 | cmpq(count, 128 >> shift); | |||
8792 | jcc(Assembler::less, L_fill_start); | |||
8793 | ||||
8794 | bind(L_fill_128_bytes_loop_header); | |||
8795 | subq(count, 128 >> shift); | |||
8796 | ||||
8797 | align32(); | |||
8798 | bind(L_fill_128_bytes_loop); | |||
8799 | fill64(to, 0, xtmp); | |||
8800 | fill64(to, 64, xtmp); | |||
8801 | addq(to, 128); | |||
8802 | subq(count, 128 >> shift); | |||
8803 | jccb(Assembler::greaterEqual, L_fill_128_bytes_loop)jccb_0(Assembler::greaterEqual, L_fill_128_bytes_loop, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8803); | |||
8804 | ||||
8805 | addq(count, 128 >> shift); | |||
8806 | jcc(Assembler::zero, L_exit); | |||
8807 | jmp(L_fill_start); | |||
8808 | } | |||
8809 | ||||
8810 | if (MaxVectorSize == 64) { | |||
8811 | // Sequence using 64 byte ZMM register. | |||
8812 | Label L_fill_128_bytes_zmm; | |||
8813 | Label L_fill_192_bytes_zmm; | |||
8814 | Label L_fill_192_bytes_loop_zmm; | |||
8815 | Label L_fill_192_bytes_loop_header_zmm; | |||
8816 | Label L_fill_192_bytes_loop_pre_header_zmm; | |||
8817 | Label L_fill_start_zmm_sequence; | |||
8818 | ||||
8819 | bind(L_fill_zmm_sequence); | |||
8820 | evpbroadcast(type, xtmp, value, Assembler::AVX_512bit); | |||
8821 | ||||
8822 | bind(L_fill_start_zmm_sequence); | |||
8823 | cmpq(count, 64 >> shift); | |||
8824 | jccb(Assembler::greater, L_fill_128_bytes_zmm)jccb_0(Assembler::greater, L_fill_128_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8824); | |||
8825 | fill64_masked(shift, to, 0, xtmp, k2, count, rtmp, true); | |||
8826 | jmp(L_exit); | |||
8827 | ||||
8828 | bind(L_fill_128_bytes_zmm); | |||
8829 | cmpq(count, 128 >> shift); | |||
8830 | jccb(Assembler::greater, L_fill_192_bytes_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8830); | |||
8831 | fill64(to, 0, xtmp, true); | |||
8832 | subq(count, 64 >> shift); | |||
8833 | fill64_masked(shift, to, 64, xtmp, k2, count, rtmp, true); | |||
8834 | jmp(L_exit); | |||
8835 | ||||
8836 | bind(L_fill_192_bytes_zmm); | |||
8837 | cmpq(count, 192 >> shift); | |||
8838 | jccb(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm)jccb_0(Assembler::greater, L_fill_192_bytes_loop_pre_header_zmm , "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8838); | |||
8839 | fill64(to, 0, xtmp, true); | |||
8840 | fill64(to, 64, xtmp, true); | |||
8841 | subq(count, 128 >> shift); | |||
8842 | fill64_masked(shift, to, 128, xtmp, k2, count, rtmp, true); | |||
8843 | jmp(L_exit); | |||
8844 | ||||
8845 | bind(L_fill_192_bytes_loop_pre_header_zmm); | |||
8846 | { | |||
8847 | movq(rtmp, to); | |||
8848 | andq(rtmp, 63); | |||
8849 | jccb(Assembler::zero, L_fill_192_bytes_loop_header_zmm)jccb_0(Assembler::zero, L_fill_192_bytes_loop_header_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8849); | |||
8850 | negq(rtmp); | |||
8851 | addq(rtmp, 64); | |||
8852 | mov64(r8, -1L); | |||
8853 | bzhiq(r8, r8, rtmp); | |||
8854 | kmovql(k2, r8); | |||
8855 | evmovdqu(T_BYTE, k2, Address(to, 0), xtmp, Assembler::AVX_512bit); | |||
8856 | addq(to, rtmp); | |||
8857 | shrq(rtmp, shift); | |||
8858 | subq(count, rtmp); | |||
8859 | } | |||
8860 | ||||
8861 | cmpq(count, 192 >> shift); | |||
8862 | jcc(Assembler::less, L_fill_start_zmm_sequence); | |||
8863 | ||||
8864 | bind(L_fill_192_bytes_loop_header_zmm); | |||
8865 | subq(count, 192 >> shift); | |||
8866 | ||||
8867 | align32(); | |||
8868 | bind(L_fill_192_bytes_loop_zmm); | |||
8869 | fill64(to, 0, xtmp, true); | |||
8870 | fill64(to, 64, xtmp, true); | |||
8871 | fill64(to, 128, xtmp, true); | |||
8872 | addq(to, 192); | |||
8873 | subq(count, 192 >> shift); | |||
8874 | jccb(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm)jccb_0(Assembler::greaterEqual, L_fill_192_bytes_loop_zmm, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8874); | |||
8875 | ||||
8876 | addq(count, 192 >> shift); | |||
8877 | jcc(Assembler::zero, L_exit); | |||
8878 | jmp(L_fill_start_zmm_sequence); | |||
8879 | } | |||
8880 | bind(L_exit); | |||
8881 | } | |||
8882 | #endif | |||
8883 | #endif //COMPILER2_OR_JVMCI | |||
8884 | ||||
8885 | ||||
8886 | #ifdef _LP641 | |||
8887 | void MacroAssembler::convert_f2i(Register dst, XMMRegister src) { | |||
8888 | Label done; | |||
8889 | cvttss2sil(dst, src); | |||
8890 | // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub | |||
8891 | cmpl(dst, 0x80000000); // float_sign_flip | |||
8892 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8892); | |||
8893 | subptr(rsp, 8); | |||
8894 | movflt(Address(rsp, 0), src); | |||
8895 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())((address)((address_word)(StubRoutines::x86::f2i_fixup()))))); | |||
8896 | pop(dst); | |||
8897 | bind(done); | |||
8898 | } | |||
8899 | ||||
8900 | void MacroAssembler::convert_d2i(Register dst, XMMRegister src) { | |||
8901 | Label done; | |||
8902 | cvttsd2sil(dst, src); | |||
8903 | // Conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub | |||
8904 | cmpl(dst, 0x80000000); // float_sign_flip | |||
8905 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8905); | |||
8906 | subptr(rsp, 8); | |||
8907 | movdbl(Address(rsp, 0), src); | |||
8908 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())((address)((address_word)(StubRoutines::x86::d2i_fixup()))))); | |||
8909 | pop(dst); | |||
8910 | bind(done); | |||
8911 | } | |||
8912 | ||||
8913 | void MacroAssembler::convert_f2l(Register dst, XMMRegister src) { | |||
8914 | Label done; | |||
8915 | cvttss2siq(dst, src); | |||
8916 | cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip())); | |||
8917 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8917); | |||
8918 | subptr(rsp, 8); | |||
8919 | movflt(Address(rsp, 0), src); | |||
8920 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())((address)((address_word)(StubRoutines::x86::f2l_fixup()))))); | |||
8921 | pop(dst); | |||
8922 | bind(done); | |||
8923 | } | |||
8924 | ||||
8925 | void MacroAssembler::convert_d2l(Register dst, XMMRegister src) { | |||
8926 | Label done; | |||
8927 | cvttsd2siq(dst, src); | |||
8928 | cmp64(dst, ExternalAddress((address) StubRoutines::x86::double_sign_flip())); | |||
8929 | jccb(Assembler::notEqual, done)jccb_0(Assembler::notEqual, done, "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8929); | |||
8930 | subptr(rsp, 8); | |||
8931 | movdbl(Address(rsp, 0), src); | |||
8932 | call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())((address)((address_word)(StubRoutines::x86::d2l_fixup()))))); | |||
8933 | pop(dst); | |||
8934 | bind(done); | |||
8935 | } | |||
8936 | ||||
8937 | void MacroAssembler::cache_wb(Address line) | |||
8938 | { | |||
8939 | // 64 bit cpus always support clflush | |||
8940 | assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8940, "assert(" "VM_Version::supports_clflush()" ") failed" , "clflush should be available"); ::breakpoint(); } } while ( 0); | |||
8941 | bool optimized = VM_Version::supports_clflushopt(); | |||
8942 | bool no_evict = VM_Version::supports_clwb(); | |||
8943 | ||||
8944 | // prefer clwb (writeback without evict) otherwise | |||
8945 | // prefer clflushopt (potentially parallel writeback with evict) | |||
8946 | // otherwise fallback on clflush (serial writeback with evict) | |||
8947 | ||||
8948 | if (optimized) { | |||
8949 | if (no_evict) { | |||
8950 | clwb(line); | |||
8951 | } else { | |||
8952 | clflushopt(line); | |||
8953 | } | |||
8954 | } else { | |||
8955 | // no need for fence when using CLFLUSH | |||
8956 | clflush(line); | |||
8957 | } | |||
8958 | } | |||
8959 | ||||
8960 | void MacroAssembler::cache_wbsync(bool is_pre) | |||
8961 | { | |||
8962 | assert(VM_Version::supports_clflush(), "clflush should be available")do { if (!(VM_Version::supports_clflush())) { (*g_assert_poison ) = 'X';; report_vm_error("/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8962, "assert(" "VM_Version::supports_clflush()" ") failed" , "clflush should be available"); ::breakpoint(); } } while ( 0); | |||
8963 | bool optimized = VM_Version::supports_clflushopt(); | |||
8964 | bool no_evict = VM_Version::supports_clwb(); | |||
8965 | ||||
8966 | // pick the correct implementation | |||
8967 | ||||
8968 | if (!is_pre && (optimized || no_evict)) { | |||
8969 | // need an sfence for post flush when using clflushopt or clwb | |||
8970 | // otherwise no no need for any synchroniaztion | |||
8971 | ||||
8972 | sfence(); | |||
8973 | } | |||
8974 | } | |||
8975 | ||||
8976 | #endif // _LP64 | |||
8977 | ||||
8978 | Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { | |||
8979 | switch (cond) { | |||
8980 | // Note some conditions are synonyms for others | |||
8981 | case Assembler::zero: return Assembler::notZero; | |||
8982 | case Assembler::notZero: return Assembler::zero; | |||
8983 | case Assembler::less: return Assembler::greaterEqual; | |||
8984 | case Assembler::lessEqual: return Assembler::greater; | |||
8985 | case Assembler::greater: return Assembler::lessEqual; | |||
8986 | case Assembler::greaterEqual: return Assembler::less; | |||
8987 | case Assembler::below: return Assembler::aboveEqual; | |||
8988 | case Assembler::belowEqual: return Assembler::above; | |||
8989 | case Assembler::above: return Assembler::belowEqual; | |||
8990 | case Assembler::aboveEqual: return Assembler::below; | |||
8991 | case Assembler::overflow: return Assembler::noOverflow; | |||
8992 | case Assembler::noOverflow: return Assembler::overflow; | |||
8993 | case Assembler::negative: return Assembler::positive; | |||
8994 | case Assembler::positive: return Assembler::negative; | |||
8995 | case Assembler::parity: return Assembler::noParity; | |||
8996 | case Assembler::noParity: return Assembler::parity; | |||
8997 | } | |||
8998 | ShouldNotReachHere()do { (*g_assert_poison) = 'X';; report_should_not_reach_here( "/home/daniel/Projects/java/jdk/src/hotspot/cpu/x86/macroAssembler_x86.cpp" , 8998); ::breakpoint(); } while (0); return Assembler::overflow; | |||
8999 | } | |||
9000 | ||||
9001 | SkipIfEqual::SkipIfEqual( | |||
9002 | MacroAssembler* masm, const bool* flag_addr, bool value) { | |||
9003 | _masm = masm; | |||
9004 | _masm->cmp8(ExternalAddress((address)flag_addr), value); | |||
9005 | _masm->jcc(Assembler::equal, _label); | |||
9006 | } | |||
9007 | ||||
9008 | SkipIfEqual::~SkipIfEqual() { | |||
9009 | _masm->bind(_label); | |||
9010 | } | |||
9011 | ||||
9012 | // 32-bit Windows has its own fast-path implementation | |||
9013 | // of get_thread | |||
9014 | #if !defined(WIN32) || defined(_LP641) | |||
9015 | ||||
9016 | // This is simply a call to Thread::current() | |||
9017 | void MacroAssembler::get_thread(Register thread) { | |||
9018 | if (thread != rax) { | |||
9019 | push(rax); | |||
9020 | } | |||
9021 | LP64_ONLY(push(rdi);)push(rdi); | |||
9022 | LP64_ONLY(push(rsi);)push(rsi); | |||
9023 | push(rdx); | |||
9024 | push(rcx); | |||
9025 | #ifdef _LP641 | |||
9026 | push(r8); | |||
9027 | push(r9); | |||
9028 | push(r10); | |||
9029 | push(r11); | |||
9030 | #endif | |||
9031 | ||||
9032 | MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current)((address)((address_word)(Thread::current))), 0); | |||
9033 | ||||
9034 | #ifdef _LP641 | |||
9035 | pop(r11); | |||
9036 | pop(r10); | |||
9037 | pop(r9); | |||
9038 | pop(r8); | |||
9039 | #endif | |||
9040 | pop(rcx); | |||
9041 | pop(rdx); | |||
9042 | LP64_ONLY(pop(rsi);)pop(rsi); | |||
9043 | LP64_ONLY(pop(rdi);)pop(rdi); | |||
9044 | if (thread != rax) { | |||
9045 | mov(thread, rax); | |||
9046 | pop(rax); | |||
9047 | } | |||
9048 | } | |||
9049 | ||||
9050 | ||||
9051 | #endif // !WIN32 || _LP64 |